In [1]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds

In [2]:
papers_df = pd.read_json('Papers_Metadata_6K.json')
users_df = pd.read_json('users.json')
ratings_df = pd.read_json('User_Ratings.json')

In [3]:
papers_df.head()

Unnamed: 0,author,day,id,link,month,summary,tag,title,year
0,"[{'name': 'Ahmed Osman'}, {'name': 'Wojciech S...",1,1802.00209v1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",2,We propose an architecture for VQA which utili...,"[{'term': 'cs.AI', 'scheme': 'http://arxiv.org...",Dual Recurrent Attention Units for Visual Ques...,2018
1,"[{'name': 'Ji Young Lee'}, {'name': 'Franck De...",12,1603.03827v1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",3,Recent approaches based on artificial neural n...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",Sequential Short-Text Classification with Recu...,2016
2,"[{'name': 'Iulian Vlad Serban'}, {'name': 'Tim...",2,1606.00776v2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",6,We introduce the multiresolution recurrent neu...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",Multiresolution Recurrent Neural Networks: An ...,2016
3,"[{'name': 'Sebastian Ruder'}, {'name': 'Joachi...",23,1705.08142v2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",5,Multi-task learning is motivated by the observ...,"[{'term': 'stat.ML', 'scheme': 'http://arxiv.o...",Learning what to share between loosely related...,2017
4,"[{'name': 'Iulian V. Serban'}, {'name': 'Chinn...",7,1709.02349v2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",9,We present MILABOT: a deep reinforcement learn...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",A Deep Reinforcement Learning Chatbot,2017


In [4]:
papers_df.index

RangeIndex(start=0, stop=6000, step=1)

In [5]:
users_df.head()

Unnamed: 0,userID,Location,Age
0,1,nyc,
1,2,stockton,18.0
2,3,moscow,
3,4,porto,17.0
4,5,farnborough,


In [6]:
ratings_df.head()

Unnamed: 0,userID,id,paperRating
0,112,1802.00209v1,6
1,21,1603.03827v1,9
2,12,1606.00776v2,6
3,92,1705.08142v2,2
4,52,1709.02349v2,7


In [9]:
ratings_df.drop_duplicates(subset=['userID', 'id'], keep='last', inplace=True)

In [10]:
userItemRatingMatrix = ratings_df.pivot(index = 'userID', columns = 'id', values = 'paperRating').fillna(0)

In [11]:
userItemRatingMatrix.head()

id,0812.0743v2,0911.5372v1,1004.4965v1,1006.1346v2,1007.2449v1,1008.1566v5,1008.1643v2,1010.3460v2,1102.2739v1,1103.4487v1,...,1802.09914v1,1803.00094v1,1803.01686v1,1803.02544v2,1803.03232v1,1803.03692v1,1803.05407v1,1803.06959v1,1803.07679v1,1803.08240v1
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
R = userItemRatingMatrix.values
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1,1)

In [13]:
U, sigma, Vt = svds(R_demeaned, k = 50)

In [14]:
sigma = np.diag(sigma)

In [15]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = userItemRatingMatrix.columns)

In [16]:
def recommend_papers(predictions_df, userID, papers_df, original_ratings_df, num_recommendations=5):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
    
    # Get the user's data and merge in the paper information.
    user_data = original_ratings_df[original_ratings_df.userID == (userID)]
    user_full = (user_data.merge(papers_df, how = 'left', left_on = 'id', right_on = 'id').
                     sort_values(['paperRating'], ascending=False))

    print(('User {0} has already rated {1} papers.').format(userID, user_full.shape[0]))
    print(('Recommending the highest {0} predicted ratings papers not already rated.').format(num_recommendations))
    
    # Recommend the highest predicted rating paper that the user hasn't seen yet.
    recommendations = (papers_df[~papers_df['id'].isin(user_full['id'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'id',right_on = 'id').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).iloc[:num_recommendations, :-1])

    return user_full, recommendations

In [18]:
already_rated, predictions = recommend_papers(preds_df, 100, papers_df, ratings_df, 10)

User 100 has already rated 9 papers.
Recommending the highest 10 predicted ratings papers not already rated.


In [19]:
already_rated.head(10)

Unnamed: 0,userID,id,paperRating,author,day,link,month,summary,tag,title,year
4,100,1707.09219v4,8,"[{'name': 'Isabeau Prémont-Schwarz'}, {'name':...",28,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",7,We propose a recurrent extension of the Ladder...,"[{'term': 'cs.NE', 'scheme': 'http://arxiv.org...",Recurrent Ladder Networks,2017
6,100,1603.03827v1,8,"[{'name': 'Ji Young Lee'}, {'name': 'Franck De...",12,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",3,Recent approaches based on artificial neural n...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",Sequential Short-Text Classification with Recu...,2016
7,100,1802.00209v1,7,"[{'name': 'Ahmed Osman'}, {'name': 'Wojciech S...",1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",2,We propose an architecture for VQA which utili...,"[{'term': 'cs.AI', 'scheme': 'http://arxiv.org...",Dual Recurrent Attention Units for Visual Ques...,2018
2,100,1305.1027v2,6,"[{'name': 'Mohammad Gheshlaghi Azar'}, {'name'...",5,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",5,In some reinforcement learning problems an age...,"[{'term': 'stat.ML', 'scheme': 'http://arxiv.o...",Regret Bounds for Reinforcement Learning with ...,2013
5,100,1604.00289v3,6,"[{'name': 'Brenden M. Lake'}, {'name': 'Tomer ...",1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",4,Recent progress in artificial intelligence (AI...,"[{'term': 'cs.AI', 'scheme': 'http://arxiv.org...",Building Machines That Learn and Think Like Pe...,2016
8,100,1606.00776v2,6,"[{'name': 'Iulian Vlad Serban'}, {'name': 'Tim...",2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",6,We introduce the multiresolution recurrent neu...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",Multiresolution Recurrent Neural Networks: An ...,2016
1,100,1402.0929v3,5,"[{'name': 'Jasper Snoek'}, {'name': 'Kevin Swe...",5,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",2,Bayesian optimization has proven to be a highl...,"[{'term': 'stat.ML', 'scheme': 'http://arxiv.o...",Input Warping for Bayesian Optimization of Non...,2014
0,100,1611.00454v1,1,"[{'name': 'Hao Wang'}, {'name': 'Xingjian Shi'...",2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",11,Hybrid methods that utilize both content and r...,"[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",Collaborative Recurrent Autoencoder: Recommend...,2016
3,100,1206.6434v1,1,"[{'name': 'Salah Rifai'}, {'name': 'Yoshua Ben...",27,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",6,The contractive auto-encoder learns a represen...,"[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",A Generative Process for Sampling Contractive ...,2012


In [20]:
predictions

Unnamed: 0,author,day,id,link,month,summary,tag,title,year
187,"[{'name': 'Suraj Srinivas'}, {'name': 'R. Venk...",21,1611.06791v1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",11,Deep Neural Networks often require good regula...,"[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",Generalized Dropout,2016
446,"[{'name': 'Shuohang Wang'}, {'name': 'Jing Jia...",30,1512.08849v2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",12,Natural language inference (NLI) is a fundamen...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",Learning Natural Language Inference with LSTM,2015
5984,"[{'name': 'Nikhil Rao'}, {'name': 'Robert Nowa...",18,1402.4512v2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",2,Classification with a sparsity constraint on t...,"[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",Classification with Sparse Overlapping Groups,2014
422,"[{'name': 'Pierre Stock'}, {'name': 'Moustapha...",30,1711.11443v1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",11,ConvNets and Imagenet have driven the recent s...,"[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",ConvNets and ImageNet Beyond Accuracy: Explana...,2017
33,"[{'name': 'Tsung-Hsien Wen'}, {'name': 'David ...",15,1604.04562v3,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",4,Teaching machines to accomplish tasks by conve...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",A Network-based End-to-End Trainable Task-orie...,2016
223,"[{'name': 'Ido Cohen'}, {'name': 'Eli David'},...",27,1711.09663v1,"[{'rel': 'related', 'href': 'http://dx.doi.org...",11,This paper presents a novel deep learning-base...,"[{'term': 'cs.CV', 'scheme': 'http://arxiv.org...",DeepBrain: Functional Representation of Neural...,2017
55,"[{'name': 'Baolin Peng'}, {'name': 'Kaisheng Y...",31,1506.00195v1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",5,Recurrent Neural Networks (RNNs) have become i...,"[{'term': 'cs.CL', 'scheme': 'http://arxiv.org...",Recurrent Neural Networks with External Memory...,2015
15,"[{'name': 'Jeff Donahue'}, {'name': 'Philipp K...",31,1605.09782v7,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",5,The ability of the Generative Adversarial Netw...,"[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",Adversarial Feature Learning,2016
126,"[{'name': 'Yuntian Deng'}, {'name': 'Anssi Kan...",16,1609.04938v2,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",9,We present a neural encoder-decoder model to c...,"[{'term': 'cs.CV', 'scheme': 'http://arxiv.org...",Image-to-Markup Generation with Coarse-to-Fine...,2016
5965,"[{'name': 'Gang Niu'}, {'name': 'Bo Dai'}, {'n...",3,1402.0288v1,"[{'rel': 'alternate', 'href': 'http://arxiv.or...",2,"Given a hypothesis space, the large volume pri...","[{'term': 'cs.LG', 'scheme': 'http://arxiv.org...",Transductive Learning with Multi-class Volume ...,2014
