In [48]:
import numpy as np
import pandas as pd
import graphlab

In [49]:
graphlab.product_key.get_product_key('195F-D2D7-942A-6C7F-7054-9CF4-C2B2-803A')

In [50]:
r_cols = ['user_id', 'song_id', 'rating', 'unix_timestamp']
ratings_base = pd.read_csv('ml-100k/ml-100k/ua.base', sep='\t', names=r_cols, encoding='latin-1')
ratings_test = pd.read_csv('ml-100k/ml-100k/ua.test', sep='\t', names=r_cols, encoding='latin-1')
print ratings_base.shape
print ratings_test.shape

(90570, 4)
(9430, 4)


In [51]:
display (ratings_base.head())

Unnamed: 0,user_id,song_id,rating,unix_timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712


In [52]:
train_data = graphlab.SFrame(ratings_base)
test_data = graphlab.SFrame(ratings_test)

In [53]:
train_data['rating'].mean()

3.5238268742409184

In [54]:
train_data.head()

user_id,song_id,rating,unix_timestamp
1,1,5,874965758
1,2,3,876893171
1,3,4,878542960
1,4,3,876893119
1,5,3,889751712
1,6,5,887431973
1,7,4,875071561
1,8,1,875072484
1,9,5,878543541
1,10,3,875693118


In [55]:
train_data['song_id'].sketch_summary()



+--------------------+---------------+----------+
|        item        |     value     | is exact |
+--------------------+---------------+----------+
|       Length       |     90570     |   Yes    |
|        Min         |      1.0      |   Yes    |
|        Max         |     1682.0    |   Yes    |
|        Mean        | 428.104891244 |   Yes    |
|        Sum         |   38773460.0  |   Yes    |
|      Variance      | 110946.410374 |   Yes    |
| Standard Deviation | 333.086190608 |   Yes    |
|  # Missing Values  |       0       |   Yes    |
|  # unique values   |      1679     |    No    |
+--------------------+---------------+----------+

Most frequent items:
+-------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
| value |  50 | 100 | 181 | 258 | 286 | 294 |  1  | 288 | 121 | 174 |
+-------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
| count | 495 | 443 | 439 | 412 | 400 | 398 | 392 | 386 | 384 | 379 |
+-------+-----+-----+-----+-----+-----+-----+--

In [56]:
train_data.num_rows()

90570

In [57]:
train_data.show()

In [58]:
graphlab.canvas.set_target('ipynb')
train_data['song_id'].show()

In [59]:
train_data['song_id'].show(view='Categorical')

In [60]:
train_data['rating'].show(view='Categorical')

In [61]:
num_of_users = train_data['user_id'].unique()
num_of_songs = train_data['song_id']
print "Number of Unique Users"
print len(num_of_users)
print "Number of songs"
print len(num_of_songs)

Number of Unique Users
943
Number of songs
90570


In [62]:
users = train_data['user_id'].unique()
print len(users)

943


## Prepare data to run Popularity Model and Collabarative Filtering Model

## Popularity based Recommendation

In [63]:
popularity_model = graphlab.popularity_recommender.create(train_data, user_id='user_id', item_id='song_id',target='rating')

### Prediction using popuarity Based Model

In [64]:
a = popularity_model.recommend(users=range(1,6),k=5)

In [65]:
a.print_rows(num_rows=100, num_columns=4)

+---------+---------+-------+------+
| user_id | song_id | score | rank |
+---------+---------+-------+------+
|    1    |   1467  |  5.0  |  1   |
|    1    |   1201  |  5.0  |  2   |
|    1    |   1189  |  5.0  |  3   |
|    1    |   1122  |  5.0  |  4   |
|    1    |   814   |  5.0  |  5   |
|    2    |   1467  |  5.0  |  1   |
|    2    |   1201  |  5.0  |  2   |
|    2    |   1189  |  5.0  |  3   |
|    2    |   1122  |  5.0  |  4   |
|    2    |   814   |  5.0  |  5   |
|    3    |   1467  |  5.0  |  1   |
|    3    |   1201  |  5.0  |  2   |
|    3    |   1189  |  5.0  |  3   |
|    3    |   1122  |  5.0  |  4   |
|    3    |   814   |  5.0  |  5   |
|    4    |   1467  |  5.0  |  1   |
|    4    |   1201  |  5.0  |  2   |
|    4    |   1189  |  5.0  |  3   |
|    4    |   1122  |  5.0  |  4   |
|    4    |   814   |  5.0  |  5   |
|    5    |   1467  |  5.0  |  1   |
|    5    |   1201  |  5.0  |  2   |
|    5    |   1189  |  5.0  |  3   |
|    5    |   1122  |  5.0  |  4   |
|

In [66]:
eval = popularity_model.evaluate(test_data)


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    |        0.0        |        0.0        |
|   3    | 0.000353481795688 | 0.000106044538706 |
|   4    | 0.000265111346766 | 0.000106044538706 |
|   5    | 0.000212089077413 | 0.000106044538706 |
|   6    | 0.000176740897844 | 0.000106044538706 |
|   7    | 0.000302984396304 | 0.000212089077413 |
|   8    | 0.000265111346766 | 0.000212089077413 |
|   9    | 0.000235654530458 | 0.000212089077413 |
|   10   | 0.000212089077413 | 0.000212089077413 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.041764796943981)

Per User RMSE (best)
+---------+-------+----------------+
| user_id | count |      rmse      |
+---------+-------+----------------+
|    2    |   10  | 0.390590992997 |
+-

In [67]:
popularity_model.evaluate_precision_recall(test_data)

{'precision_recall_by_user': Columns:
 	user_id	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 16974
 
 Data:
 +---------+--------+-----------+--------+-------+
 | user_id | cutoff | precision | recall | count |
 +---------+--------+-----------+--------+-------+
 |    1    |   1    |    0.0    |  0.0   |   10  |
 |    1    |   2    |    0.0    |  0.0   |   10  |
 |    1    |   3    |    0.0    |  0.0   |   10  |
 |    1    |   4    |    0.0    |  0.0   |   10  |
 |    1    |   5    |    0.0    |  0.0   |   10  |
 |    1    |   6    |    0.0    |  0.0   |   10  |
 |    1    |   7    |    0.0    |  0.0   |   10  |
 |    1    |   8    |    0.0    |  0.0   |   10  |
 |    1    |   9    |    0.0    |  0.0   |   10  |
 |    1    |   10   |    0.0    |  0.0   |   10  |
 +---------+--------+-----------+--------+-------+
 [16974 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and colum

In [68]:
popularity_model.evaluate_rmse(test_data, target='rating')

{'rmse_by_item': Columns:
 	song_id	int
 	count	int
 	rmse	float
 
 Rows: 1129
 
 Data:
 +---------+-------+----------------+
 | song_id | count |      rmse      |
 +---------+-------+----------------+
 |   118   |   28  | 0.966448015555 |
 |   1029  |   1   | 1.07692307692  |
 |   435   |   17  | 0.867256255757 |
 |   537   |   1   | 0.655172413793 |
 |   526   |   10  | 1.24223658737  |
 |   232   |   8   |  1.0906825348  |
 |   310   |   27  | 1.14912508971  |
 |    49   |   2   | 1.30818360173  |
 |    13   |   20  | 1.05746545784  |
 |   511   |   15  | 0.762901986712 |
 +---------+-------+----------------+
 [1129 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_by_user': Columns:
 	user_id	int
 	count	int
 	rmse	float
 
 Rows: 943
 
 Data:
 +---------+-------+----------------+
 | user_id | count |      rmse      |
 +---------+-------+----------------+
 |   118   |   10  |  1

In [69]:
popularity_model.get_current_options()

{'item_id': 'song_id',
 'random_seed': 1L,
 'target': 'rating',
 'user_id': 'user_id'}

In [70]:
popularity_model.get_num_items_per_user()

user_id,num_items
1,262
2,52
3,44
4,14
5,165
6,201
7,393
8,49
9,12
10,174


In [71]:
popularity_model.get_num_users_per_item()

song_id,num_users
1,392
2,121
3,85
4,198
5,79
6,23
7,346
8,194
9,268
10,82


In [72]:
popularity_model.get_similar_items()

song_id,similar,score,rank
1,663,0.999999998262,1
1,969,0.999999754391,2
1,896,0.999999754391,3
1,434,0.999999204228,4
1,61,0.999999204228,5
1,1073,0.999997117499,6
1,144,0.999994054697,7
1,500,0.999994054697,8
1,521,0.999993084995,9
1,265,0.99999095857,10


In [73]:
popularity_model.list_fields()

['data_load_time',
 'item_id',
 'item_predictions',
 'item_side_data_column_names',
 'item_side_data_column_types',
 'model_name',
 'num_features',
 'num_item_side_features',
 'num_items',
 'num_observations',
 'num_user_side_features',
 'num_users',
 'observation_data_column_names',
 'random_seed',
 'target',
 'training_rmse',
 'training_time',
 'user_id',
 'user_side_data_column_names',
 'user_side_data_column_types']

In [74]:
popularity_model.predict(test_data)

dtype: float
Rows: 9430
[3.3442622950819674, 3.460674157303371, 3.857142857142857, 3.690625, 3.09375, 3.4262295081967213, 3.9298245614035086, 4.135593220338983, 3.755813953488372, 3.868292682926829, 3.451219512195122, 4.365656565656566, 4.2439024390243905, 3.1466666666666665, 3.130081300813008, 3.077777777777778, 3.5795454545454546, 3.8974358974358974, 3.1363636363636362, 1.0, 3.0964467005076144, 3.1155778894472363, 2.9010416666666665, 3.4327731092436973, 3.5681818181818183, 3.412280701754386, 3.480769230769231, 2.764705882352941, 3.1176470588235294, 3.0476190476190474, 4.365656565656566, 2.607142857142857, 2.7228915662650603, 3.4740932642487046, 3.1155778894472363, 3.6788990825688073, 3.433333333333333, 3.359550561797753, 4.292181069958848, 2.8333333333333335, 3.8596938775510203, 3.1983471074380163, 3.0941176470588236, 4.311428571428571, 2.5517241379310347, 2.94, 2.75, 1.2941176470588236, 1.0, 2.923076923076923, 3.9565217391304346, 4.114457831325301, 3.8493150684931505, 3.934306569343

In [75]:
popularity_model.recommend()

user_id,song_id,score,rank
1,1656,5.0,1
1,1536,5.0,2
1,1293,5.0,3
1,1500,5.0,4
1,1599,5.0,5
1,1467,5.0,6
1,1201,5.0,7
1,1189,5.0,8
1,1122,5.0,9
1,814,5.0,10


In [76]:
data = train_data.select_columns(['song_id','rating'])
popularity_model.recommend_from_interactions(data)


song_id,score,rank
1582,3.52382687424,1
1653,3.52382687424,2


In [77]:
popularity_model.show()

In [78]:
popularity_model.summary()

Class                            : PopularityRecommender

Schema
------
User ID                          : user_id
Item ID                          : song_id
Target                           : rating
Additional observation features  : 0
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 90570
Number of users                  : 943
Number of items                  : 1680

Training summary
----------------
Training time                    : 0.014

Model Parameters
----------------
Model class                      : PopularityRecommender



## Build a song recommender with personalization (personalized recommendations to each user)

In [79]:
#Train Model
personalized_model = graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='song_id', target='rating', similarity_type='cosine')

#Make Recommendations:
item_sim_recomm = personalized_model.recommend(users=range(1,6),k=5)
item_sim_recomm.print_rows(num_rows=25)

+---------+---------+----------------+------+
| user_id | song_id |     score      | rank |
+---------+---------+----------------+------+
|    1    |   423   | 0.98515109388  |  1   |
|    1    |   202   | 0.943134688012 |  2   |
|    1    |   655   | 0.799626473025 |  3   |
|    1    |   403   | 0.762677106694 |  4   |
|    1    |   568   | 0.748210583252 |  5   |
|    2    |    50   | 1.12562584877  |  1   |
|    2    |   181   | 1.06517731685  |  2   |
|    2    |    7    | 0.999819083856 |  3   |
|    2    |   121   | 0.941627963231 |  4   |
|    2    |   117   | 0.792605129572 |  5   |
|    3    |   313   | 0.635376662016 |  1   |
|    3    |   328   | 0.603288030083 |  2   |
|    3    |   315   | 0.542258712378 |  3   |
|    3    |   331   | 0.535507185893 |  4   |
|    3    |   332   | 0.531669611281 |  5   |
|    4    |    50   | 1.13114770821  |  1   |
|    4    |   288   | 1.04871511459  |  2   |
|    4    |   181   | 0.950599938631 |  3   |
|    4    |    7    | 0.9417778807

### Finding  similar songs to any song in the dataset

In [80]:
personalized_model.get_similar_items()

song_id,similar,score,rank
1,50,0.651777803898,1
1,181,0.636943340302,2
1,121,0.635370850563,3
1,174,0.592788994312,4
1,405,0.591268002987,5
1,237,0.586470067501,6
1,222,0.583917498589,7
1,100,0.580404996872,8
1,151,0.578688085079,9
1,117,0.578327476978,10


# Quantitative comparison between the models

We now formally compare the popularity and the personalized models using precision-recall curves. 

In [81]:
if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
    graphlab.show_comparison(model_performance,[popularity_model, personalized_model])
else:
    %matplotlib inline
    model_performance = graphlab.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)

compare_models: using 47 users to estimate model performance
PROGRESS: Evaluate model M0

Precision and recall summary statistics by cutoff
+--------+----------------+-------------+
| cutoff | mean_precision | mean_recall |
+--------+----------------+-------------+
|   1    |      0.0       |     0.0     |
|   2    |      0.0       |     0.0     |
|   3    |      0.0       |     0.0     |
|   4    |      0.0       |     0.0     |
|   5    |      0.0       |     0.0     |
|   6    |      0.0       |     0.0     |
|   7    |      0.0       |     0.0     |
|   8    |      0.0       |     0.0     |
|   9    |      0.0       |     0.0     |
|   10   |      0.0       |     0.0     |
+--------+----------------+-------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1

Precision and recall summary statistics by cutoff
+--------+----------------+-----------------+
| cutoff | mean_precision |   mean_recall   |
+--------+----------------+-----------------+
|   1    | 0.31914893617  |  0.0

In [82]:
train_data.print_rows(num_rows=20, num_columns=4)

+---------+---------+--------+----------------+
| user_id | song_id | rating | unix_timestamp |
+---------+---------+--------+----------------+
|    1    |    1    |   5    |   874965758    |
|    1    |    2    |   3    |   876893171    |
|    1    |    3    |   4    |   878542960    |
|    1    |    4    |   3    |   876893119    |
|    1    |    5    |   3    |   889751712    |
|    1    |    6    |   5    |   887431973    |
|    1    |    7    |   4    |   875071561    |
|    1    |    8    |   1    |   875072484    |
|    1    |    9    |   5    |   878543541    |
|    1    |    10   |   3    |   875693118    |
|    1    |    11   |   2    |   875072262    |
|    1    |    12   |   5    |   878542960    |
|    1    |    13   |   5    |   875071805    |
|    1    |    14   |   5    |   874965706    |
|    1    |    15   |   5    |   875071608    |
|    1    |    16   |   5    |   878543541    |
|    1    |    17   |   3    |   875073198    |
|    1    |    18   |   4    |   8874320

In [83]:
eval = personalized_model.evaluate(test_data)


Precision and recall summary statistics by cutoff
+--------+----------------+-----------------+
| cutoff | mean_precision |   mean_recall   |
+--------+----------------+-----------------+
|   1    | 0.386002120891 | 0.0386002120891 |
|   2    | 0.332979851538 | 0.0665959703075 |
|   3    | 0.291975963238 | 0.0875927889714 |
|   4    | 0.265906680806 |  0.106362672322 |
|   5    | 0.24750795334  |  0.12375397667  |
|   6    | 0.228525980912 |  0.137115588547 |
|   7    | 0.214512952583 |  0.150159066808 |
|   8    | 0.202014846235 |  0.161611876988 |
|   9    | 0.19288323318  |  0.173594909862 |
|   10   | 0.184729586426 |  0.184729586426 |
+--------+----------------+-----------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 3.3607523731839795)

Per User RMSE (best)
+---------+-------+---------------+
| user_id | count |      rmse     |
+---------+-------+---------------+
|   774   |   10  | 1.62714214779 |
+---------+-------+---------------+
[1 rows x 3 columns]


Per User RMSE (w

In [84]:
personalized_model.evaluate_precision_recall(test_data)

{'precision_recall_by_user': Columns:
 	user_id	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 16974
 
 Data:
 +---------+--------+----------------+--------+-------+
 | user_id | cutoff |   precision    | recall | count |
 +---------+--------+----------------+--------+-------+
 |    1    |   1    |      0.0       |  0.0   |   10  |
 |    1    |   2    |      0.5       |  0.1   |   10  |
 |    1    |   3    | 0.333333333333 |  0.1   |   10  |
 |    1    |   4    |      0.25      |  0.1   |   10  |
 |    1    |   5    |      0.2       |  0.1   |   10  |
 |    1    |   6    | 0.166666666667 |  0.1   |   10  |
 |    1    |   7    | 0.285714285714 |  0.2   |   10  |
 |    1    |   8    |      0.25      |  0.2   |   10  |
 |    1    |   9    | 0.222222222222 |  0.2   |   10  |
 |    1    |   10   |      0.2       |  0.2   |   10  |
 +---------+--------+----------------+--------+-------+
 [16974 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can 

In [85]:
personalized_model.evaluate_rmse(test_data, target='rating')

{'rmse_by_item': Columns:
 	song_id	int
 	count	int
 	rmse	float
 
 Rows: 1129
 
 Data:
 +---------+-------+---------------+
 | song_id | count |      rmse     |
 +---------+-------+---------------+
 |   118   |   28  | 2.59556047827 |
 |   1029  |   1   |      1.0      |
 |   435   |   17  | 3.54962916373 |
 |   537   |   1   |      3.0      |
 |   526   |   10  | 3.34972002123 |
 |   232   |   8   | 3.20880854773 |
 |   310   |   27  | 3.26215122354 |
 |    49   |   2   | 4.45517242059 |
 |    13   |   20  | 2.93875508089 |
 |   511   |   15  | 4.15132274608 |
 +---------+-------+---------------+
 [1129 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_by_user': Columns:
 	user_id	int
 	count	int
 	rmse	float
 
 Rows: 943
 
 Data:
 +---------+-------+---------------+
 | user_id | count |      rmse     |
 +---------+-------+---------------+
 |   118   |   10  | 4.32895039091 |
 | 

In [86]:
personalized_model.get_current_options()

{'degree_approximation_threshold': 4096L,
 'item_id': 'song_id',
 'max_data_passes': 4096L,
 'max_item_neighborhood_size': 64L,
 'nearest_neighbors_interaction_proportion_threshold': 0.05,
 'seed_item_set_size': 50L,
 'similarity_type': 'cosine',
 'sparse_density_estimation_sample_size': 4096L,
 'target': 'rating',
 'target_memory_usage': 8589934592L,
 'threshold': 0.001,
 'training_method': 'auto',
 'user_id': 'user_id'}

In [87]:
personalized_model.get_num_users_per_item()
## An SFrame with a column containing each observed item and another column containing the corresponding number of items observed during training.


song_id,num_users
1,392
2,121
3,85
4,198
5,79
6,23
7,346
8,194
9,268
10,82


In [88]:
personalized_model.get_num_items_per_user()
##An SFrame with a column containing each observed user and another column containing the corresponding number of items observed during training.

user_id,num_items
1,262
2,52
3,44
4,14
5,165
6,201
7,393
8,49
9,12
10,174


In [89]:
nn = personalized_model.get_similar_items()
nn.show()

In [43]:
personalized_model.list_fields()
##Get the current settings of the model. The keys depend on the type of model

['data_load_time',
 'degree_approximation_threshold',
 'item_id',
 'item_side_data_column_names',
 'item_side_data_column_types',
 'max_data_passes',
 'max_item_neighborhood_size',
 'model_name',
 'nearest_neighbors_interaction_proportion_threshold',
 'num_features',
 'num_item_side_features',
 'num_items',
 'num_observations',
 'num_user_side_features',
 'num_users',
 'observation_data_column_names',
 'seed_item_set_size',
 'similarity_type',
 'sparse_density_estimation_sample_size',
 'target',
 'target_memory_usage',
 'threshold',
 'training_method',
 'training_rmse',
 'training_time',
 'user_id',
 'user_side_data_column_names',
 'user_side_data_column_types']

In [44]:
model_name = personalized_model.name()
print model_name

ItemSimilarityRecommender


In [45]:
data = train_data.select_columns(['song_id','rating'])
personalized_model.recommend_from_interactions(data)

song_id,score,rank
1582,0.0,1
1653,0.0,2


In [46]:
personalized_model.show()

In [47]:
personalized_model.summary()

Class                            : ItemSimilarityRecommender

Schema
------
User ID                          : user_id
Item ID                          : song_id
Target                           : rating
Additional observation features  : 0
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 90570
Number of users                  : 943
Number of items                  : 1680

Training summary
----------------
Training time                    : 1.2471

Model Parameters
----------------
Model class                      : ItemSimilarityRecommender
threshold                        : 0.001
similarity_type                  : cosine
training_method                  : auto

Other Settings
--------------
degree_approximation_threshold   : 4096
sparse_density_estimation_sample_size : 4096
max_data_passes                  : 4096
target_memory_usage              : 8589934592
seed_item_set_size               : 50
near

similarity_graph = graphlab.SGraph().add_edges(similar_songs,src_field='song_id',dst_field = 'similar')
similarity_graph.summary()

## Rating Performance_Factorization recommender

In [None]:
# Train the model
factorization_model = graphlab.recommender.factorization_recommender.create(train_data, user_id='user_id', item_id='song_id', target='rating')


In [None]:

# Evaluate the model
rmse_data = factorization_model.evaluate_rmse(test_data, target="rating")

In [None]:
# Print the results
print rmse_data

## Creating a ranking_factorization_recommender (Optimising)

In [None]:
rec_model = graphlab.recommender.create(train_data,user_id='user_id', item_id='song_id', target='rating')

In [None]:
# Evaluate the model
rmse_data_new = rec_model.evaluate_rmse(test_data, target="rating")


In [None]:
# Print the results
print rmse_data_new

In [None]:
recommendations = rec_model.recommend()

In [None]:
#print recommendations()

In [None]:
rec_model['coefficients']

In [None]:
view = rec_model.views.description()
view.show()

In [None]:

newview = rec_model.views.evaluate(test_data)
newview.show()

In [None]:
items = train_data.groupby('song_id', graphlab.aggregate.MEAN('rating'))


view_1 = rec_model.views.explore(item_data=items, item_name_column='song_id')
view_1.show()

In [None]:
items = train_data.groupby('song_id', graphlab.aggregate.MEAN('rating'))

view = rec_model.views.overview(
        validation_set=test_data,
        item_data=items,
        item_name_column='song_id')
view.show()

## Recommending

In [None]:
recommendations = rec_model.recommend(users=range(1,6),k=3)

In [None]:
song_recommendations = recommendations.join(train_data, on="song_id", how="inner").sort("song_id")
# Show the results
#print song_recommendations
song_recommendations.print_rows(num_rows=30, num_columns=7)

In [None]:
if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(test_data, [factorization_model, rec_model], user_sample=0.05)
    graphlab.show_comparison(model_performance,[popularity_model, personalized_model])
else:
    %matplotlib inline
    model_performance = graphlab.recommender.util.compare_models(test_data, [factorization_model, rec_model], user_sample=.05)

## Content_Recommendations

In [None]:
m = graphlab.recommender.item_content_recommender.create(train_data, "song_id")
m.recommend_from_interactions([0])

In [None]:
out_sframe = graphlab.recommender.item_content_recommender.get_default_options()
print out_sframe

In [None]:
#eval = m.evaluate(song_data_test)

In [None]:
#m.evaluate_precision_recall(test_data)

In [None]:
#m.evaluate_rmse(test_data, target='rating')

In [None]:
m.get_current_options()

In [None]:
m.get_num_items_per_user()

In [None]:
m.get_num_users_per_item()

In [None]:
m.get_similar_items()

In [None]:
m.list_fields()

In [None]:
m.predict(song_data_test)

In [None]:
m.recommend()

In [None]:
m.show()

In [None]:
m.summary()

## UV 

In [None]:
R_df = train_data.pivot(index = 'user_id', columns ='song_id', values = 'rating').fillna(0)
R_df.head()

In [None]:
R = R_df.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)
#print R_df.columns

In [None]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 50)

In [None]:
sigma = np.diag(sigma)

In [None]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)
display(preds_df.head())


In [None]:
already_rated.head(10)

In [None]:
predictions

## #####################################************************