In [18]:
import pandas as pd

# User columns, using to read csv
u_cols = ['userID', 'Location', 'Age']
users = pd.read_csv('BX-CSV-Dump/BX-Users.csv', sep=';', names=u_cols, encoding='latin-1')

# Book columns, using to read CSV
b_cols = ['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication',
          'Publisher', 'Image-URL-S', 'Image-URL-M', 'Image-URL-L', '1', '2', '3', '4', '5', '6'
         ]
books = pd.read_csv('BX-CSV-Dump/BX-Books.csv', sep=',', names=b_cols, encoding='utf-8')
books.loc[:, 'ISBN':'Book-Author']

# Rating column, using to read CSV
r_cols = ["userId", "ISBN", "Rating"]
ratings = pd.read_csv('BX-CSV-Dump/BX-Book-Ratings.csv', sep=';', names=r_cols, encoding='latin-1')
ratings = ratings[1:]; users = users[1:]; books = books[1:]
ratings.head()

Unnamed: 0,userId,ISBN,Rating
1,276725,034545104X,0
2,276726,0155061224,5
3,276727,0446520802,0
4,276729,052165615X,3
5,276729,0521795028,6


In [19]:
import sys
default_stdout = sys.stdout
default_stderr = sys.stderr
reload(sys)
sys.stdout = default_stdout
sys.stderr = default_stderr
sys.setdefaultencoding('utf-8')
import graphlab
ratings['userId'] = ratings['userId'].astype(str)
ratings['ISBN'] = ratings['ISBN'].astype(str)
ratings['Rating'] = ratings['Rating'].astype(int)
data = graphlab.SFrame(ratings)

In [22]:
popularity_model = graphlab.popularity_recommender.create(data, user_id='userId', item_id='ISBN', target='Rating')
popularity_recomm = popularity_model.recommend(users=[276725, 276726],k=5)
popularity_recomm.print_rows(num_rows=50)

+--------+------------+-------+------+
| userId |    ISBN    | score | rank |
+--------+------------+-------+------+
| 276725 | 8530805461 |  10.0 |  1   |
| 276725 | 3821815191 |  10.0 |  2   |
| 276725 | 3125785006 |  10.0 |  3   |
| 276725 | 0699854289 |  10.0 |  4   |
| 276725 | 8440682697 |  10.0 |  5   |
| 276726 | 8530805461 |  10.0 |  1   |
| 276726 | 3821815191 |  10.0 |  2   |
| 276726 | 3125785006 |  10.0 |  3   |
| 276726 | 0699854289 |  10.0 |  4   |
| 276726 | 8440682697 |  10.0 |  5   |
+--------+------------+-------+------+
[10 rows x 4 columns]



In [23]:
ratings.groupby(by='ISBN')['Rating'].mean().sort_values(ascending=False).head(20)

ISBN
0862418917    10.0
8440682697    10.0
8440684096    10.0
1573540706    10.0
0871352427    10.0
157356303X    10.0
0871319853    10.0
1573660949    10.0
0440585732    10.0
0140193588    10.0
0440585031    10.0
1573800759    10.0
0140193529    10.0
8440688288    10.0
1573890049    10.0
8440690509    10.0
157392010X    10.0
0140192417    10.0
1573920630    10.0
087131789     10.0
Name: Rating, dtype: float64

In [24]:
train_data, test_data = data.random_split(.6, seed=5)

In [25]:
len(train_data), len(test_data)

(689219, 460561)

In [26]:
popularity_model = graphlab.popularity_recommender.create(train_data, user_id='userId', item_id='ISBN', target='Rating')
popularity_recomm = popularity_model.recommend(users=[276725, 276726],k=5)
popularity_recomm.print_rows(num_rows=50)

+--------+------------+-------+------+
| userId |    ISBN    | score | rank |
+--------+------------+-------+------+
| 276725 | 0375901582 |  10.0 |  1   |
| 276725 | 3125785006 |  10.0 |  2   |
| 276725 | 0699854289 |  10.0 |  3   |
| 276725 | 038076041X |  10.0 |  4   |
| 276725 | 8440682697 |  10.0 |  5   |
| 276726 | 0375901582 |  10.0 |  1   |
| 276726 | 3125785006 |  10.0 |  2   |
| 276726 | 0699854289 |  10.0 |  3   |
| 276726 | 038076041X |  10.0 |  4   |
| 276726 | 8440682697 |  10.0 |  5   |
+--------+------------+-------+------+
[10 rows x 4 columns]



In [43]:
#Train Model
item_sim_model = graphlab.item_similarity_recommender.create(train_data, user_id='userId', item_id='ISBN', target='Rating', similarity_type='pearson')

In [49]:
#Make Recommendations:
item_sim_recomm = item_sim_model.recommend(users=range(36000, 37000),k=5)
item_sim_recomm.print_rows(num_rows=3000)

+--------+------------+---------------+------+
| userId |    ISBN    |     score     | rank |
+--------+------------+---------------+------+
| 36000  | 0375901582 |      10.0     |  1   |
| 36000  | 3125785006 |      10.0     |  2   |
| 36000  | 0699854289 |      10.0     |  3   |
| 36000  | 038076041X |      10.0     |  4   |
| 36000  | 8440682697 |      10.0     |  5   |
| 36001  | 0375901582 |      10.0     |  1   |
| 36001  | 3125785006 |      10.0     |  2   |
| 36001  | 0699854289 |      10.0     |  3   |
| 36001  | 038076041X |      10.0     |  4   |
| 36001  | 8440682697 |      10.0     |  5   |
| 36002  | 0375901582 |      10.0     |  1   |
| 36002  | 3125785006 |      10.0     |  2   |
| 36002  | 0699854289 |      10.0     |  3   |
| 36002  | 038076041X |      10.0     |  4   |
| 36002  | 8440682697 |      10.0     |  5   |
| 36003  | 0375901582 |      10.0     |  1   |
| 36003  | 3125785006 |      10.0     |  2   |
| 36003  | 0699854289 |      10.0     |  3   |
| 36003  | 03

In [31]:
len(train_data), len(test_data)

(689219, 460561)

In [36]:
users.head(100)

Unnamed: 0,userID,Location,Age
1,1,"nyc, new york, usa",
2,2,"stockton, california, usa",18
3,3,"moscow, yukon territory, russia",
4,4,"porto, v.n.gaia, portugal",17
5,5,"farnborough, hants, united kingdom",
6,6,"santa monica, california, usa",61
7,7,"washington, dc, usa",
8,8,"timmins, ontario, canada",
9,9,"germantown, tennessee, usa",
10,10,"albacete, wisconsin, spain",26
