# Collaborative Filtering for GitHub Stars

In [1]:
# libraries
import numpy as np
from scipy import io
import pickle
import pandas as pd
import re

# files
import utils
from user import User

## Create inputs from Y_df.pickle

In [2]:
# any input with the following specifications will work:
# ITEM_LIST: list of item names
# Y: 2D array of no. items (repos) x no. users
# R: same shape as Y, R[i,j] = 1 if and only if user j gave a rating to movie i

# initialize the inputs
ITEM_LIST = []

Y = None
R = None

In [3]:
# fill ITEM_LIST, Y and R
Y_df = pickle.load(open('Y_df.pickle', 'rb'))
ITEM_LIST = Y_df.index.values
Y = Y_df.values
R = Y  # equal since Y values are binary (0/1) in this case

print("ITEM_LIST: {} of length {}".format(type(ITEM_LIST), len(ITEM_LIST)))
print("Y: {} of {} items x {} users".format(type(Y), Y.shape[0], Y.shape[1]))
print("R: {} of {} items x {} users".format(type(R), R.shape[0], R.shape[1]))

ITEM_LIST: <class 'numpy.ndarray'> of length 1000
Y: <class 'numpy.ndarray'> of 1000 items x 17047 users
R: <class 'numpy.ndarray'> of 1000 items x 17047 users


In [4]:
# sparsity: (no. stars)/(no. possible stars)
np.sum(R)/np.size(R)

0.010004282278406757

## Create a new user and choose repos to star

In [5]:
# create new user
user = User(ITEM_LIST)

In [6]:
# change query to search ITEM_LIST for which items (repos) to star
query = 'python'  # can use regex
result = [(i,ITEM_LIST[i]) for i in range(len(ITEM_LIST)) if re.search(query, ITEM_LIST[i]) is not None]

print("index\tname")
for i in result:
    print(str(i[0]) + "\t" + i[1])

index	name
26	vinta/awesome-python
337	faif/python-patterns
338	python/cpython
344	kennethreitz/python-guide
359	ipython/ipython
507	donnemartin/data-science-ipython-notebooks
948	rasbt/python-machine-learning-book
999	google/python-fire


In [7]:
# enter indexes of starred items into dex
# for example, a user wants to star every repo matching the current query
dex = [i[0] for i in result]
dex

[26, 337, 338, 344, 359, 507, 948, 999]

In [8]:
# starred repos are represented as 1s in the ratings matrix
user.ratings[dex] = 1
user.printRatings(ITEM_LIST)

User ratings:
Starred vinta/awesome-python
Starred faif/python-patterns
Starred python/cpython
Starred kennethreitz/python-guide
Starred ipython/ipython
Starred donnemartin/data-science-ipython-notebooks
Starred rasbt/python-machine-learning-book
Starred google/python-fire


## Make recommendations for the user

In [9]:
# play around with parameters: lambd (for regularization), num_features, maxiter
# don't normalize the ratings since all of the ratings are 1
user.train(Y, R, lambd=0.5, normalize=False, num_features=16, maxiter=100)  # prints loss every 10th iteration


Training model...
iteration 0	loss 556048.0623009756
iteration 10	loss 134538.13668888542
iteration 20	loss 75891.9873360359
iteration 30	loss 47501.2248331613
iteration 40	loss 33856.775920234235
iteration 50	loss 22668.14511342928
iteration 60	loss 13889.674561787315
iteration 70	loss 8532.045944274947
iteration 80	loss 2521.001348934327
iteration 90	loss 2282.733456726387
         Current function value: 2139.537442
         Iterations: 100
         Function evaluations: 149
         Gradient evaluations: 149


In [10]:
user.predict(ITEM_LIST, Y, R, n=20, normalize=False)
user.printTop()

# when running this algorithm repeatedly,
# recommendations will be different for the same user ratings
# due to random initialization of the weights


Top 20 recommendations:
Predicting star for vurtun/nuklear
Predicting star for gitlabhq/gitlabhq
Predicting star for ruby/ruby
Predicting star for nodejs/node-v0.x-archive
Predicting star for google/guetzli
Predicting star for jquery/jquery
Predicting star for apache/incubator-mxnet
Predicting star for fchollet/keras
Predicting star for Homebrew/legacy-homebrew
Predicting star for tj/git-extras
Predicting star for 0xAX/linux-insides
Predicting star for freeCodeCamp/freeCodeCamp
Predicting star for alebcay/awesome-shell
Predicting star for gohugoio/hugo
Predicting star for rails/rails
Predicting star for requests/requests
Predicting star for NARKOZ/hacker-scripts
Predicting star for nicolargo/glances
Predicting star for twbs/bootstrap
Predicting star for open-source-society/computer-science
