## Model-based Collaborative Filtering System
## Using SVD Matrix Factorization

In [1]:
import numpy as np
import pandas as pd

import sklearn
from sklearn.decomposition import TruncatedSVD

In [2]:
Ratings = pd.read_csv('Ratings.csv')

In [3]:
Ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,155061224,5
2,276727,446520802,0
3,276729,052165615X,3
4,276729,521795028,6


In [4]:
Books = pd.read_csv('Books.csv')

In [5]:
Books.head()

Unnamed: 0,ISBN,Book-Title
0,195153448,Classical Mythology
1,2005018,Clara Callan
2,60973129,Decision in Normandy
3,374157065,Flu: The Story of the Great Influenza Pandemic...
4,393045218,The Mummies of Urumchi


In [6]:
combined_data = pd.merge(Ratings, Books, on='ISBN')
combined_data.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,276726,155061224,5,Rites of Passage
2,276727,446520802,0,The Notebook
3,276729,052165615X,3,Help!: Level 1
4,276729,521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...


In [7]:
combined_data.groupby('ISBN')['Book-Rating'].count().sort_values(ascending=False).head()

ISBN
971880107    7
316666343    5
440241073    3
425175405    3
385504209    3
Name: Book-Rating, dtype: int64

In [8]:
filter = combined_data['ISBN']=='971880107'
combined_data[filter]['Book-Title'].unique()

array(['Wild Animus'], dtype=object)

In [9]:
# Build utitlity matrix
rating_crosstab = combined_data.pivot_table(values='Book-Rating', index='User-ID', columns='Book-Title', fill_value=0)
rating_crosstab.head()

Book-Title,007 El Mundo Nunca Es Suficiente,01-01-00: The Novel of the Millennium,101 Dalmatians,101 Great Resumes,1421 : The Year China Discovered America,1st to Die: A Novel,24 Hours,"28 Barbary Lane : A \Tales of the City\"" Omnibus""",2nd Chance,31 Days of Praise : Enjoying God Anew,...,World's End,Writ of Execution,Writer's Digest Handbook of Magazine Article Writing,Year of Wonders: A Novel of the Plague,Your Finances in Changing Times (The Christian Financial Concepts Series),"Your Life in Your Hands: Understanding, Preventing and Overcoming Breast Cancer",Your Personal Trainer,Your Second Child,Zen Essence : The Science of Freedom (Shambhala Pocket Classics),Â¡No bajes al sÃ³tano! (EscalofrÃ­os No. 2)
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
276725,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
276726,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
276727,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
276729,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
276733,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
rating_crosstab.shape

(214, 1604)

In [11]:
X = rating_crosstab.T
X.shape

(1604, 214)

In [12]:
# Decompose Matrix

SVD = TruncatedSVD(n_components=12, random_state=17)

resultant_matrix = SVD.fit_transform(X)

resultant_matrix.shape

(1604, 12)

In [13]:
# Generate correlation matrix

corr_mat = np.corrcoef(resultant_matrix)
corr_mat.shape

  c /= stddev[:, None]
  c /= stddev[None, :]


(1604, 1604)

In [14]:
movie_names = rating_crosstab.columns
movies_list = list(movie_names)

movie = movies_list.index('Wild Animus')
movie

1578

In [15]:
corr_movie = corr_mat[1578]
corr_movie.shape

(1604,)

In [16]:
list(movie_names[(corr_movie<1.0) & (corr_movie > 0.95)])


['A Walk to Remember',
 'Bridget Jones : The Edge of Reason',
 'Das zerstÃ?Â¶rte Nest.',
 'Der Tod in Venedig',
 'Die WÃ¼rde des Menschen ist antastbar: AufsÃ¤tze und Polemiken (Wagenbachs TaschenbÃ¼cherei)',
 'Dreamcatcher',
 'Go Ask Alice',
 'Heat and Dust',
 'How to be good.',
 'Im Luftschloss meines Vaters: ErzÃ¤hlung',
 'La vuelta al mundo en 80 dÃ\xadas',
 "Northanger Abbey, Lady Susan, the Watsons and Sanditon (World's Classics)",
 'Pandora: New Tales of the Vampires (New Tales of the Vampires)',
 'People of the Lightning (The First North Americans series, Book 7)',
 'People of the Sea (The First North Americans series, Book 5)',
 'People of the Silence (The First North Americans series, Book 8)',
 'Rites of Passage',
 'She Said Yes: The Unlikely Martyrdom of Cassie Bernall',
 'The Bell Jar : A Novel (Perennial Classics)',
 "The Bonesetter's Daughter",
 'The Circle And The Cross 1: Playing Of',
 'The Clear Light of Day (King Penguin S.)',
 'The Devil Wears Prada : A Novel',
 "Th

In [17]:
list(movie_names[(corr_movie<1.0) & (corr_movie > 0.99)])

['Go Ask Alice',
 'Pandora: New Tales of the Vampires (New Tales of the Vampires)',
 'She Said Yes: The Unlikely Martyrdom of Cassie Bernall',
 'The Bell Jar : A Novel (Perennial Classics)',
 "The Idiot Girls' Action Adventure Club"]