In [18]:
import graphlab
graphlab.canvas.set_target("ipynb")
# set canvas to show sframes and sgraphs in ipython notebook
import matplotlib.pyplot as plt
%matplotlib inline
In [24]:
data = graphlab.SFrame.read_csv('/Users/sara/github/data/ml-1m/ratings.dat', delimiter='\n', 
                                header=False)['X1'].apply(lambda x: x.split('::')).unpack()
for col in data.column_names():
    data[col] = data[col].astype(int)
data.rename({'X.0': 'user_id', 'X.1': 'movie_id', 'X.2': 'rating', 'X.3': 'timestamp'})
data.save('ratings')

users = graphlab.SFrame.read_csv('/Users/sara/github/data/ml-1m/users.dat', delimiter='\n', 
                                 header=False)['X1'].apply(lambda x: x.split('::')).unpack()
users.rename({'X.0': 'user_id', 'X.1': 'gender', 'X.2': 'age', 'X.3': 'occupation', 'X.4': 'zip-code'})
users['user_id'] = users['user_id'].astype(int)
users.save('users')

items = graphlab.SFrame.read_csv('/Users/sara/github/data/ml-1m/movies.dat', delimiter='\n', 
                                 header=False)['X1'].apply(lambda x: x.split('::')).unpack()
items.rename({'X.0': 'movie_id', 'X.1': 'title', 'X.2': 'genre'})
items['movie_id'] = items['movie_id'].astype(int)
items.save('items')
Finished parsing file /Users/sara/github/data/ml-1m/ratings.dat
Parsing completed. Parsed 100 lines in 0.835118 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file /Users/sara/github/data/ml-1m/ratings.dat
Parsing completed. Parsed 1000209 lines in 0.730755 secs.
Finished parsing file /Users/sara/github/data/ml-1m/users.dat
Parsing completed. Parsed 100 lines in 0.041599 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file /Users/sara/github/data/ml-1m/users.dat
Parsing completed. Parsed 6040 lines in 0.015945 secs.
Finished parsing file /Users/sara/github/data/ml-1m/movies.dat
Parsing completed. Parsed 100 lines in 0.035944 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file /Users/sara/github/data/ml-1m/movies.dat
Parsing completed. Parsed 3883 lines in 0.016024 secs.
In [25]:
data.show()
In [26]:
items.head()
Out[26]:
movie_id title genre
1 Toy Story (1995) Animation|Children's|Come
dy ...
2 Jumanji (1995) Adventure|Children's|Fant
asy ...
3 Grumpier Old Men (1995) Comedy|Romance
4 Waiting to Exhale (1995) Comedy|Drama
5 Father of the Bride Part
II (1995) ...
Comedy
6 Heat (1995) Action|Crime|Thriller
7 Sabrina (1995) Comedy|Romance
8 Tom and Huck (1995) Adventure|Children's
9 Sudden Death (1995) Action
10 GoldenEye (1995) Action|Adventure|Thriller
[10 rows x 3 columns]
In [28]:
data = data.join(items, on='movie_id')
In [29]:
data
Out[29]:
user_id movie_id rating timestamp title genre
1 1193 5 978300760 One Flew Over the
Cuckoo's Nest (1975) ...
Drama
1 661 3 978302109 James and the Giant Peach
(1996) ...
Animation|Children's|Musi
cal ...
1 914 3 978301968 My Fair Lady (1964) Musical|Romance
1 3408 4 978300275 Erin Brockovich (2000) Drama
1 2355 5 978824291 Bug's Life, A (1998) Animation|Children's|Come
dy ...
1 1197 3 978302268 Princess Bride, The
(1987) ...
Action|Adventure|Comedy|R
omance ...
1 1287 5 978302039 Ben-Hur (1959) Action|Adventure|Drama
1 2804 5 978300719 Christmas Story, A (1983) Comedy|Drama
1 594 4 978302268 Snow White and the Seven
Dwarfs (1937) ...
Animation|Children's|Musi
cal ...
1 919 4 978301368 Wizard of Oz, The (1939) Adventure|Children's|Dram
a|Musical ...
title.1 genre.1
One Flew Over the
Cuckoo's Nest (1975) ...
Drama
James and the Giant Peach
(1996) ...
Animation|Children's|Musi
cal ...
My Fair Lady (1964) Musical|Romance
Erin Brockovich (2000) Drama
Bug's Life, A (1998) Animation|Children's|Come
dy ...
Princess Bride, The
(1987) ...
Action|Adventure|Comedy|R
omance ...
Ben-Hur (1959) Action|Adventure|Drama
Christmas Story, A (1983) Comedy|Drama
Snow White and the Seven
Dwarfs (1937) ...
Animation|Children's|Musi
cal ...
Wizard of Oz, The (1939) Adventure|Children's|Dram
a|Musical ...
[1000209 rows x 8 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [30]:
(train_set, test_set) = data.random_split(0.95, seed=1)
In [31]:
m = graphlab.recommender.create(train_set, 'user_id', 'movie_id', 'rating')
Recsys training: model = ranking_factorization_recommender
Preparing data set.
    Data has 949852 observations with 6040 users and 3701 items.
    Data prepared in: 3.97115s
Training ranking_factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter                      | Description                                      | Value    |
+--------------------------------+--------------------------------------------------+----------+
| num_factors                    | Factor Dimension                                 | 32       |
| regularization                 | L2 Regularization on Factors                     | 1e-09    |
| solver                         | Solver used for training                         | adagrad  |
| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-09    |
| ranking_regularization         | Rank-based Regularization Weight                 | 0.25     |
| max_iterations                 | Maximum Number of Iterations                     | 25       |
+--------------------------------+--------------------------------------------------+----------+
  Optimizing model using SGD; tuning step size.
  Using 118731 / 949852 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value                |
+---------+-------------------+------------------------------------------+
| 0       | 7.14286           | Not Viable                               |
| 1       | 1.78571           | Not Viable                               |
| 2       | 0.446429          | Not Viable                               |
| 3       | 0.111607          | 0.265423                                 |
| 4       | 0.0558036         | 0.408085                                 |
| 5       | 0.0279018         | 0.738093                                 |
+---------+-------------------+------------------------------------------+
| Final   | 0.111607          | 0.265423                                 |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 7.604ms      | 2.44712           | 1.11724               |             |
+---------+--------------+-------------------+-----------------------+-------------+
| 1       | 3.58s        | 1.27269           | 1.04299               | 0.111607    |
| 2       | 7.17s        | 0.797095          | 0.85519               | 0.111607    |
| 3       | 10.41s       | 0.687679          | 0.805035              | 0.111607    |
| 4       | 13.54s       | 0.635424          | 0.778665              | 0.111607    |
| 5       | 16.22s       | 0.603984          | 0.761812              | 0.111607    |
| 6       | 18.77s       | 0.582014          | 0.749613              | 0.111607    |
| 7       | 21.32s       | 0.565325          | 0.739962              | 0.111607    |
| 8       | 23.82s       | 0.552608          | 0.732597              | 0.111607    |
| 9       | 26.33s       | 0.542306          | 0.726429              | 0.111607    |
| 10      | 28.85s       | 0.533743          | 0.72121               | 0.111607    |
| 11      | 31.38s       | 0.52633           | 0.716699              | 0.111607    |
| 12      | 33.87s       | 0.519767          | 0.71265               | 0.111607    |
| 13      | 36.31s       | 0.513847          | 0.708915              | 0.111607    |
| 14      | 38.81s       | 0.509107          | 0.705975              | 0.111607    |
| 15      | 41.26s       | 0.50475           | 0.703172              | 0.111607    |
| 16      | 43.72s       | 0.501084          | 0.70088               | 0.111607    |
| 17      | 46.15s       | 0.497772          | 0.698774              | 0.111607    |
| 18      | 48.58s       | 0.494326          | 0.696493              | 0.111607    |
| 19      | 51.04s       | 0.491087          | 0.694412              | 0.111607    |
| 20      | 53.53s       | 0.488491          | 0.692727              | 0.111607    |
| 21      | 55.95s       | 0.48585           | 0.691005              | 0.111607    |
| 22      | 58.38s       | 0.483484          | 0.689435              | 0.111607    |
| 23      | 1m 0s        | 0.48171           | 0.688309              | 0.111607    |
| 24      | 1m 3s        | 0.47937           | 0.686755              | 0.111607    |
| 25      | 1m 6s        | 0.477543          | 0.685553              | 0.111607    |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Maximum number of passes through the data reached.
Computing final objective value and training RMSE.
       Final objective value: 0.455708
       Final training RMSE: 0.669415
In [32]:
m
Out[32]:
Class                            : RankingFactorizationRecommender

Schema
------
User ID                          : user_id
Item ID                          : movie_id
Target                           : rating
Additional observation features  : 5
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 949852
Number of users                  : 6040
Number of items                  : 3701

Training summary
----------------
Training time                    : 80.0658

Model Parameters
----------------
Model class                      : RankingFactorizationRecommender
num_factors                      : 32
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 25

Regularization Settings
-----------------------
regularization                   : 0.0
regularization_type              : normal
linear_regularization            : 0.0
ranking_regularization           : 0.25
unobserved_rating_value          : -1.79769313486e+308
num_sampled_negative_examples    : 4
ials_confidence_scaling_type     : auto
ials_confidence_scaling_factor   : 1

Optimization Settings
---------------------
init_random_sigma                : 0.01
sgd_convergence_interval         : 4
sgd_convergence_threshold        : 0.0
sgd_max_trial_iterations         : 5
sgd_sampling_block_size          : 131072
sgd_step_adjustment_interval     : 4
sgd_step_size                    : 0.0
sgd_trial_sample_minimum_size    : 10000
sgd_trial_sample_proportion      : 0.125
step_size_decrease_rate          : 0.75
additional_iterations_if_unhealthy : 5
adagrad_momentum_weighting       : 0.9
num_tempering_iterations         : 4
tempering_regularization_start_value : 0.0
track_exact_loss                 : 0
In [33]:
m2 = graphlab.item_similarity_recommender.create(train_set, 'user_id', 'movie_id', 'rating',
                                 similarity_type='pearson')
Recsys training: model = item_similarity
Warning: Ignoring columns timestamp, title, genre, title.1, genre.1;
    To use these columns in scoring predictions, use a model that allows the use of additional features.
Preparing data set.
    Data has 949852 observations with 6040 users and 3701 items.
    Data prepared in: 1.01426s
Training model from provided data.
Gathering per-item and per-user statistics.
+--------------------------------+------------+
| Elapsed Time (Item Statistics) | % Complete |
+--------------------------------+------------+
| 32.965ms                       | 16.5       |
| 81.521ms                       | 100        |
+--------------------------------+------------+
Setting up lookup tables.
Processing data in one pass using dense lookup tables.
+-------------------------------------+------------------+-----------------+
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
+-------------------------------------+------------------+-----------------+
| 132.057ms                           | 0                | 0               |
| 1.13s                               | 14.75            | 548             |
| 2.13s                               | 32.25            | 1198            |
| 3.13s                               | 47.75            | 1768            |
| 4.13s                               | 65.25            | 2415            |
| 5.14s                               | 70.75            | 2620            |
| 6.19s                               | 74.75            | 2772            |
| 7.13s                               | 79.75            | 2960            |
| 8.14s                               | 87.75            | 3253            |
| 9.14s                               | 93.25            | 3453            |
| 10.19s                              | 99.5             | 3685            |
| 10.54s                              | 100              | 3701            |
+-------------------------------------+------------------+-----------------+
Finalizing lookup tables.
Generating candidate set for working with new users.
Finished training in 10.6912s
In [34]:
result = graphlab.recommender.util.compare_models(test_set, [m, m2],
                                            user_sample=.1, skip_set=train_set)
compare_models: using 562 users to estimate model performance
PROGRESS: Evaluate model M0

Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0871886120996 | 0.00940350918539 |
|   2    | 0.0862989323843 | 0.0169969961993  |
|   3    | 0.0788849347568 | 0.0214904134866  |
|   4    | 0.0693950177936 | 0.0286556788819  |
|   5    | 0.0658362989324 | 0.0364509581458  |
|   6    | 0.0640569395018 | 0.0416793857787  |
|   7    | 0.0620233858668 | 0.0453416604355  |
|   8    | 0.0618327402135 | 0.0505978282316  |
|   9    | 0.0589165678134 | 0.0548614094093  |
|   10   | 0.0574733096085 | 0.0576527047991  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 0.9322792765618511)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|   5952  |   1   | 0.00274074805105 |
+---------+-------+------------------+
[1 rows x 3 columns]


Per User RMSE (worst)
+---------+-------+---------------+
| user_id | count |      rmse     |
+---------+-------+---------------+
|   913   |   3   | 3.18282554319 |
+---------+-------+---------------+
[1 rows x 3 columns]


Per Item RMSE (best)
+----------+-------+-------------------+
| movie_id | count |        rmse       |
+----------+-------+-------------------+
|   3327   |   1   | 0.000535245977211 |
+----------+-------+-------------------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+----------+-------+---------------+
| movie_id | count |      rmse     |
+----------+-------+---------------+
|   3652   |   1   | 4.49104136955 |
+----------+-------+---------------+
[1 rows x 3 columns]

PROGRESS: Evaluate model M1

Precision and recall summary statistics by cutoff
+--------+----------------+-------------+
| cutoff | mean_precision | mean_recall |
+--------+----------------+-------------+
|   1    |      0.0       |     0.0     |
|   2    |      0.0       |     0.0     |
|   3    |      0.0       |     0.0     |
|   4    |      0.0       |     0.0     |
|   5    |      0.0       |     0.0     |
|   6    |      0.0       |     0.0     |
|   7    |      0.0       |     0.0     |
|   8    |      0.0       |     0.0     |
|   9    |      0.0       |     0.0     |
|   10   |      0.0       |     0.0     |
+--------+----------------+-------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 0.9588111429319057)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|   5821  |   1   | 0.00493980551612 |
+---------+-------+------------------+
[1 rows x 3 columns]


Per User RMSE (worst)
+---------+-------+---------------+
| user_id | count |      rmse     |
+---------+-------+---------------+
|   5560  |   1   | 2.70117484953 |
+---------+-------+---------------+
[1 rows x 3 columns]


Per Item RMSE (best)
+----------+-------+------------------+
| movie_id | count |       rmse       |
+----------+-------+------------------+
|   1829   |   1   | 4.4408920985e-16 |
+----------+-------+------------------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+----------+-------+---------------+
| movie_id | count |      rmse     |
+----------+-------+---------------+
|   519    |   1   | 3.08017915084 |
+----------+-------+---------------+
[1 rows x 3 columns]

In [35]:
m.get_similar_items([1287])  # movie_id is Ben-Hur
Out[35]:
movie_id similar score rank
1287 1250 0.645178675652 1
1287 1201 0.527256309986 2
1287 2299 0.52110517025 3
1287 1204 0.506636857986 4
1287 2529 0.490836203098 5
1287 1954 0.48522144556 6
1287 3737 0.484936207533 7
1287 1387 0.483054697514 8
1287 3030 0.466315209866 9
1287 2520 0.463885247707 10
[10 rows x 4 columns]
In [36]:
m.get_similar_items([1287]).join(items, on={'similar': 'movie_id'}).sort('rank')
Out[36]:
movie_id similar score rank title genre
1287 1250 0.645178675652 1 Bridge on the River Kwai,
The (1957) ...
Drama|War
1287 1201 0.527256309986 2 Good, The Bad and The
Ugly, The (1966) ...
Action|Western
1287 2299 0.52110517025 3 Battle of the Sexes, The
(1959) ...
Comedy
1287 1204 0.506636857986 4 Lawrence of Arabia (1962) Adventure|War
1287 2529 0.490836203098 5 Planet of the Apes (1968) Action|Sci-Fi
1287 1954 0.48522144556 6 Rocky (1976) Action|Drama
1287 3737 0.484936207533 7 Lonely Are the Brave
(1962) ...
Drama|Western
1287 1387 0.483054697514 8 Jaws (1975) Action|Horror
1287 3030 0.466315209866 9 Yojimbo (1961) Comedy|Drama|Western
1287 2520 0.463885247707 10 Airport (1970) Drama
[10 rows x 6 columns]
In [37]:
recs = m.recommend()
recommendations finished on 1000/6040 queries. users per second: 3715.92
recommendations finished on 2000/6040 queries. users per second: 3721.98
recommendations finished on 3000/6040 queries. users per second: 3694.54
recommendations finished on 4000/6040 queries. users per second: 3668.23
recommendations finished on 5000/6040 queries. users per second: 3749.62
recommendations finished on 6000/6040 queries. users per second: 3767.96
In [38]:
recs
Out[38]:
user_id movie_id score rank
1 318 4.74526386596 1
1 110 4.42536681033 2
1 356 4.22886579371 3
1 648 4.13073746003 4
1 590 4.10278884209 5
1 858 4.08946245051 6
1 3755 4.0492739927 7
1 380 4.02751833833 8
1 1259 4.00622065998 9
1 3471 3.99937391914 10
[60400 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [39]:
data[data['user_id'] == 4].join(items, on='movie_id')
Out[39]:
user_id movie_id rating timestamp title genre
4 260 5 978294199 Star Wars: Episode IV - A
New Hope (1977) ...
Action|Adventure|Fantasy
|Sci-Fi ...
4 480 4 978294008 Jurassic Park (1993) Action|Adventure|Sci-Fi
4 1036 4 978294282 Die Hard (1988) Action|Thriller
4 1097 4 978293964 E.T. the Extra-
Terrestrial (1982) ...
Children's|Drama|Fantasy
|Sci-Fi ...
4 1196 2 978294199 Star Wars: Episode V -
The Empire Strikes Back ...
Action|Adventure|Drama
|Sci-Fi|War ...
4 1198 5 978294199 Raiders of the Lost Ark
(1981) ...
Action|Adventure
4 1201 5 978294230 Good, The Bad and The
Ugly, The (1966) ...
Action|Western
4 1210 3 978293924 Star Wars: Episode VI -
Return of the Jedi (1 ...
Action|Adventure|Romance
|Sci-Fi|War ...
4 1214 4 978294260 Alien (1979) Action|Horror|Sci-
Fi|Thriller ...
4 1240 5 978294260 Terminator, The (1984) Action|Sci-Fi|Thriller
title.1 genre.1 title.2 genre.2
Star Wars: Episode IV - A
New Hope (1977) ...
Action|Adventure|Fantasy
|Sci-Fi ...
Star Wars: Episode IV - A
New Hope (1977) ...
Action|Adventure|Fantasy
|Sci-Fi ...
Jurassic Park (1993) Action|Adventure|Sci-Fi Jurassic Park (1993) Action|Adventure|Sci-Fi
Die Hard (1988) Action|Thriller Die Hard (1988) Action|Thriller
E.T. the Extra-
Terrestrial (1982) ...
Children's|Drama|Fantasy
|Sci-Fi ...
E.T. the Extra-
Terrestrial (1982) ...
Children's|Drama|Fantasy
|Sci-Fi ...
Star Wars: Episode V -
The Empire Strikes Back ...
Action|Adventure|Drama
|Sci-Fi|War ...
Star Wars: Episode V -
The Empire Strikes Back ...
Action|Adventure|Drama
|Sci-Fi|War ...
Raiders of the Lost Ark
(1981) ...
Action|Adventure Raiders of the Lost Ark
(1981) ...
Action|Adventure
Good, The Bad and The
Ugly, The (1966) ...
Action|Western Good, The Bad and The
Ugly, The (1966) ...
Action|Western
Star Wars: Episode VI -
Return of the Jedi (1 ...
Action|Adventure|Romance
|Sci-Fi|War ...
Star Wars: Episode VI -
Return of the Jedi (1 ...
Action|Adventure|Romance
|Sci-Fi|War ...
Alien (1979) Action|Horror|Sci-
Fi|Thriller ...
Alien (1979) Action|Horror|Sci-
Fi|Thriller ...
Terminator, The (1984) Action|Sci-Fi|Thriller Terminator, The (1984) Action|Sci-Fi|Thriller
[21 rows x 10 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [40]:
m.recommend(users=[4], k=20).join(items, on='movie_id')
Out[40]:
user_id movie_id score rank title genre
4 6 4.27746418518 18 Heat (1995) Action|Crime|Thriller
4 50 4.4049657125 9 Usual Suspects, The
(1995) ...
Crime|Thriller
4 457 4.39840461201 11 Fugitive, The (1993) Action|Thriller
4 608 4.71744032092 4 Fargo (1996) Crime|Drama|Thriller
4 750 4.89037424214 2 Dr. Strangelove or: How I
Learned to Stop Worrying ...
Sci-Fi|War
4 858 5.11956931538 1 Godfather, The (1972) Action|Crime|Drama
4 912 4.42332206554 8 Casablanca (1942) Drama|Romance|War
4 919 4.3301066 14 Wizard of Oz, The (1939) Adventure|Children's|Dram
a|Musical ...
4 1193 4.33210258074 13 One Flew Over the
Cuckoo's Nest (1975) ...
Drama
4 1204 4.27302726246 19 Lawrence of Arabia (1962) Adventure|War
[20 rows x 6 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [42]:
m.recommend
Out[42]:
<bound method RankingFactorizationRecommender.recommend of Class                            : RankingFactorizationRecommender

Schema
------
User ID                          : user_id
Item ID                          : movie_id
Target                           : rating
Additional observation features  : 5
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 949852
Number of users                  : 6040
Number of items                  : 3701

Training summary
----------------
Training time                    : 80.0658

Model Parameters
----------------
Model class                      : RankingFactorizationRecommender
num_factors                      : 32
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 25

Regularization Settings
-----------------------
regularization                   : 0.0
regularization_type              : normal
linear_regularization            : 0.0
ranking_regularization           : 0.25
unobserved_rating_value          : -1.79769313486e+308
num_sampled_negative_examples    : 4
ials_confidence_scaling_type     : auto
ials_confidence_scaling_factor   : 1

Optimization Settings
---------------------
init_random_sigma                : 0.01
sgd_convergence_interval         : 4
sgd_convergence_threshold        : 0.0
sgd_max_trial_iterations         : 5
sgd_sampling_block_size          : 131072
sgd_step_adjustment_interval     : 4
sgd_step_size                    : 0.0
sgd_trial_sample_minimum_size    : 10000
sgd_trial_sample_proportion      : 0.125
step_size_decrease_rate          : 0.75
additional_iterations_if_unhealthy : 5
adagrad_momentum_weighting       : 0.9
num_tempering_iterations         : 4
tempering_regularization_start_value : 0.0
track_exact_loss                 : 0
>
In [43]:
recent_data = graphlab.SFrame()
recent_data['movie_id'] = [1291] 
recent_data['user_id'] = 99999
In [44]:
m2.recommend(users=[99999], new_observation_data=recent_data).join(items, on='movie_id').sort('rank')
Out[44]:
user_id movie_id score rank title genre
99999 3881 5.0 1 Bittersweet Motel (2000) Documentary
99999 1830 5.0 2 Follow the Bitch (1998) Comedy
99999 3382 5.0 3 Song of Freedom (1936) Drama
99999 3656 5.0 4 Lured (1947) Crime
99999 572 5.0 5 Foreign Student (1994) Drama
99999 3172 5.0 6 Ulysses (Ulisse) (1954) Adventure
99999 989 5.0 7 Schlafes Bruder (Brother
of Sleep) (1995) ...
Drama
99999 3233 5.0 8 Smashing Time (1967) Comedy
99999 787 5.0 9 Gate of Heavenly Peace,
The (1995) ...
Documentary
99999 3280 5.0 10 Baby, The (1973) Horror
[10 rows x 6 columns]
In [45]:
m.save('my_model')
In [46]:
m_again = graphlab.load_model('my_model')
In [47]:
m_again
Out[47]:
Class                            : RankingFactorizationRecommender

Schema
------
User ID                          : user_id
Item ID                          : movie_id
Target                           : rating
Additional observation features  : 5
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 949852
Number of users                  : 6040
Number of items                  : 3701

Training summary
----------------
Training time                    : 80.0658

Model Parameters
----------------
Model class                      : RankingFactorizationRecommender
num_factors                      : 32
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 25

Regularization Settings
-----------------------
regularization                   : 0.0
regularization_type              : normal
linear_regularization            : 0.0
ranking_regularization           : 0.25
unobserved_rating_value          : -1.79769313486e+308
num_sampled_negative_examples    : 4
ials_confidence_scaling_type     : auto
ials_confidence_scaling_factor   : 1

Optimization Settings
---------------------
init_random_sigma                : 0.01
sgd_convergence_interval         : 4
sgd_convergence_threshold        : 0.0
sgd_max_trial_iterations         : 5
sgd_sampling_block_size          : 131072
sgd_step_adjustment_interval     : 4
sgd_step_size                    : 0.0
sgd_trial_sample_minimum_size    : 10000
sgd_trial_sample_proportion      : 0.125
step_size_decrease_rate          : 0.75
additional_iterations_if_unhealthy : 5
adagrad_momentum_weighting       : 0.9
num_tempering_iterations         : 4
tempering_regularization_start_value : 0.0
track_exact_loss                 : 0