In [78]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

 Note: csr_matrix(): Compressed Sparse Row Matrix


In [88]:
data = pd.read_csv("book_recom.csv")
data.head()

Unnamed: 0,userID,ISBN,bookRating,bookTitle,totalRatingCount,Location
0,276725,034545104X,0,Flesh Tones: A Novel,60,"tyler, texas, usa"
1,2313,034545104X,5,Flesh Tones: A Novel,60,"cincinnati, ohio, usa"
2,6543,034545104X,0,Flesh Tones: A Novel,60,"strafford, missouri, usa"
3,8680,034545104X,5,Flesh Tones: A Novel,60,"st. charles county, missouri, usa"
4,10314,034545104X,9,Flesh Tones: A Novel,60,"beaverton, oregon, usa"


In [81]:
data.isnull().any()

userID              False
ISBN                False
bookRating          False
bookTitle           False
totalRatingCount    False
Location            False
dtype: bool

In [80]:
data.shape

(26500, 6)

In [9]:
data['bookTitle'].nunique()
# There are total 198 different books

198

In [10]:
top_rated_books = data.groupby('bookTitle')['bookRating'].count().sort_values(ascending=False)
top5 = top_rated_books.head().reset_index()
top5.columns = ['Books','Rating Count']
top5
# These are the top 5 books which have been rated the most
# It can be said that these are the most trending books.

Unnamed: 0,Books,Rating Count
0,Wild Animus,1436
1,The Lovely Bones: A Novel,1052
2,The Da Vinci Code,745
3,A Painted House,736
4,The Summons,594


## Building Utility Matrix

### Taking only userid, book rating, book title
- creating a pivot table with columns as userID
- index as bookTitle
- values- bookRating
- filling 0 where there are no values for bookRating

In [12]:
um = data.pivot_table(index = 'bookTitle', columns = 'userID', values = 'bookRating', fill_value=0)
# um = data.pivot(index = 'bookTitle', columns = 'userID', values = 'bookRating').fillna(0)

In [13]:
um.head()

userID,14,23,26,51,67,99,135,243,254,256,...,278483,278514,278535,278552,278582,278633,278740,278769,278773,278843
bookTitle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1st to Die: A Novel,0,0,0,0,0,0,0,0,0,0,...,0,0,10,0,0,0,0,0,0,0
"A Child Called \It\"": One Child's Courage to Survive""",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A Cold Heart: An Alex Delaware Novel,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A Kiss of Shadows (Meredith Gentry Novels (Paperback)),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A Man in Full,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
type(um.index)

pandas.core.indexes.base.Index

- More number of 0's and very few actual values- Sparse Matrix
- Sparse Matrix stores only the actual values and not all values
- It stores the values in the tuple format
- Example: (0,5,10)- 0th Row and 5th column is having value 10

In [90]:
type(um)

pandas.core.frame.DataFrame

In [91]:
um.shape

(198, 11337)

- We are using csr_matrix to compress the utility matrix containg only the actual values
- The original shape of the matrix is stored internally but stored as compressed matrix

In [66]:
# csr_matrix always takes an array as an argument
book_data_matrix = csr_matrix(um.values) # need to pass as an array
book_data_matrix

<198x11337 sparse matrix of type '<class 'numpy.int64'>'
	with 11110 stored elements in Compressed Sparse Row format>

In [92]:
# Using Nearest Neighbors model to fit and give predictions
knn = NearestNeighbors(metric = 'cosine')
# using cosine similarity
# can use euclidean, manhattan also
knn.fit(book_data_matrix) # should be a sparse matrix

NearestNeighbors(algorithm='auto', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

# Recommendation for reading
- Find the 5 similar(with maximum similarity- means lowest distance) books as that of the first book '1st to Die: A Novel'
- The function kneighors() is used to find k number of neighbors of a point
- We need to compute 6 neighbors from book 1 to all other books. Note that the shortest distance possible now is 0, which is book1 to book1 itself. Hence, we need to take 6 neighbors instead of 5, to get 5 similar neighbors

In [93]:
um.iloc[0, :].values.reshape(1,-1)
# Getting the user ratings for the first book in 2d array format
# These are passed so that all distances are calculated wrt the book(at 0th row)

array([[0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [94]:
query_index1 = 0 # All users for 1st book
distances, indices = knn.kneighbors(um.iloc[query_index1, :].values.reshape(1,-1), n_neighbors = 6)
# um.iloc[query_index1,:].values.reshape(1,-1)-taking ratings by all the users for book1, in the 2d array format(.reshape(1,-1))
# by knn.kneighbors to convert into vectors
# distances shows similarity between rating for each user from book1 to all other books, 
# indices give the index at which a book is there with the calculated distance

In [95]:
# display the distances
distances
# knn will give distances and indices in 2d array

array([[0.        , 0.85554748, 0.89964868, 0.90149976, 0.90925394,
        0.92285717]])

In [96]:
# display respective book indices
indices

array([[  0, 110,  12, 188,  26,  78]], dtype=int64)

In [97]:
# Changing to 1d array so that we can iterate through it
indices.flatten()

array([  0, 110,  12, 188,  26,  78], dtype=int64)

In [98]:
print("Recommendations for {0}\n".format(um.index[query_index1]))
# using flatten to take 1d array and use it for iterations
# Starting from 1 as at 0th index its the same book itself with  distance
for i in indices.flatten()[1:]:
    print(book_names[i])

Recommendations for 1st to Die: A Novel

Roses Are Red (Alex Cross Novels)
Along Came a Spider (Alex Cross Novels)
Violets Are Blue
Cat &amp; Mouse (Alex Cross Novels)
Jack &amp; Jill (Alex Cross Novels)


- These are the 5 top books based on '1st to Die: A Novel'

- Finding people who have read the book 1st to Die: A Novel and recommend them top 5 similar books if rating is more than 5, ie threshold rating is 5

In [70]:
um.index

Index(['1st to Die: A Novel',
       'A Child Called \It\": One Child's Courage to Survive"',
       'A Cold Heart: An Alex Delaware Novel',
       'A Kiss of Shadows (Meredith Gentry Novels (Paperback))',
       'A Man in Full', 'A Painted House', 'A Pirate Looks at Fifty',
       'A Wizard of Earthsea (Earthsea Trilogy, Book 1)', 'A Wrinkle In Time',
       'About a Boy (Movie Tie-In)',
       ...
       'Violets Are Blue', 'Waiting to Exhale', 'Walk Two Moons',
       'While I Was Gone', 'Whirlwind (The X-Files)', 'Whispers',
       'White Teeth: A Novel', 'Wild Animus', 'Winter Solstice',
       'Writ of Execution'],
      dtype='object', name='bookTitle', length=198)

In [114]:
a = um.loc['1st to Die: A Novel']
# These are  the ratings given to 1st to Die: A Novel by all users
a.unique()

array([ 0,  8,  9,  7, 10,  6,  5,  4,  3], dtype=int64)

In [118]:
a[a.values>5]
# These are the users who have given more than 5 ratings to the book '1st to Die: A Novel'

userID
6074       8
6543       9
9178       8
9417       7
9492      10
16795      9
17229      8
20445      6
27399      7
27472      8
28899      9
30276      8
33124      8
35859      7
37377      9
37874      7
43246      9
43626      7
45284      9
46417      7
48732      9
50784      9
51207     10
51450     10
52159      9
53220     10
55187     10
55487      7
57833      9
58224      9
          ..
227428     9
228764    10
230505     7
230949     6
232131     8
232945     8
234174     9
235282     8
236426     6
236606     9
236782     9
237089     8
240207    10
242143     9
243100     8
246513    10
250947     9
251378     9
254201     8
256915     7
256989     8
257419     9
258185     9
258534     6
261829     8
266056     7
266109    10
267642     8
273820     6
278535    10
Name: 1st to Die: A Novel, Length: 140, dtype: int64

In [120]:
index = a[a.values>5].index
index
# Getting the userids for these users, so that the 5 books similar to 1st to Die: A Novel can be recommended to them

Int64Index([  6074,   6543,   9178,   9417,   9492,  16795,  17229,  20445,
             27399,  27472,
            ...
            256989, 257419, 258185, 258534, 261829, 266056, 266109, 267642,
            273820, 278535],
           dtype='int64', name='userID', length=140)

In [121]:
len(index)
# There are total 140 users to whom 5 recommendations will be given

140

In [125]:
print("Recommendations for {0}\n".format(um.index[query_index1]))
# using flatten to take 1d array and use it for iterations
for i in indices.flatten()[1:]:
    print(um.index[i])
print("\n To {} users: \n".format(len(index)))
print(list(index))

Recommendations for 1st to Die: A Novel

Roses Are Red (Alex Cross Novels)
Along Came a Spider (Alex Cross Novels)
Violets Are Blue
Cat &amp; Mouse (Alex Cross Novels)
Jack &amp; Jill (Alex Cross Novels)

 To 140 users: 

[6074, 6543, 9178, 9417, 9492, 16795, 17229, 20445, 27399, 27472, 28899, 30276, 33124, 35859, 37377, 37874, 43246, 43626, 45284, 46417, 48732, 50784, 51207, 51450, 52159, 53220, 55187, 55487, 57833, 58224, 62542, 67288, 69389, 69512, 70414, 75825, 77480, 77856, 81216, 82497, 83287, 84024, 93363, 94965, 95359, 96843, 99204, 101041, 104144, 104636, 107853, 110912, 115435, 115948, 123115, 124048, 125519, 128085, 128915, 134761, 136382, 138232, 142579, 143175, 143294, 144727, 144953, 146386, 148344, 152651, 154944, 161744, 163134, 164858, 167934, 170652, 171697, 174367, 177458, 178035, 178834, 180658, 180927, 184152, 184513, 187262, 187624, 189516, 189558, 190925, 191178, 192428, 194719, 196202, 196886, 199515, 204591, 204753, 207750, 211359, 213150, 215820, 216442, 21828

## All users who have read book 1st to Die: A Novel will be recommended these top 5 books

# Now finding the similar books to that book which has got maximum ratings

In [105]:
top_rated_books = data.groupby('bookTitle')['bookRating'].count().sort_values(ascending=False)
top5 = top_rated_books.head().reset_index()
top5.columns = ['Books','Rating Count']
top5
# These are the top 5 books which have been rated the most from all data
# It can be said that these are the most trending books

Unnamed: 0,Books,Rating Count
0,Wild Animus,1436
1,The Lovely Bones: A Novel,1052
2,The Da Vinci Code,745
3,A Painted House,736
4,The Summons,594


## Getting similar books to these top 5 rated books

In [126]:
for i in top5['Books']:
    print(i)

Wild Animus
The Lovely Bones: A Novel
The Da Vinci Code
A Painted House
The Summons


In [134]:
for i in top5['Books']:
    top_book = um.loc[i].values.reshape(1,-1)
    knn = NearestNeighbors(metric='cosine')
    knn.fit(book_data_matrix) # passing the compressed matrix
    d ,ind = knn.kneighbors(top_book, n_neighbors = 6)
    u = um.loc[i]
    user = u[u.values>5].index # Where the rating is above 5, books will be recommended to those users only
    print("Similar books to {}: \n".format(i))
    for j in ind.flatten()[1:]:
        print(um.index[j])
    print()
    print("Number of users to which book these will be recommended are: ", len(user))
    print('*'*50)
    

Similar books to Wild Animus: 

The Law of Love
False Memory
The Bonesetter's Daughter
Back Roads
Flesh Tones: A Novel

Number of users to which book these will be recommended are:  109
**************************************************
Similar books to The Lovely Bones: A Novel: 

The Da Vinci Code
A Painted House
The Pilot's Wife : A Novel
She's Come Undone (Oprah's Book Club)
She's Come Undone (Oprah's Book Club (Paperback))

Number of users to which book these will be recommended are:  556
**************************************************
Similar books to The Da Vinci Code: 

The Lovely Bones: A Novel
Angels &amp; Demons
Harry Potter and the Order of the Phoenix (Book 5)
City of Bones
The Sweet Potato Queens' Book of Love

Number of users to which book these will be recommended are:  384
**************************************************
Similar books to A Painted House: 

The Summons
The Lovely Bones: A Novel
The Brethren
Breathing Lessons
The Chamber

Number of users to which bo