In [37]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [38]:
books_url = '/content/drive/MyDrive/AlmaBetter /Capstone Project/Book Recommendation System/unique_books.csv'
url = '/content/drive/MyDrive/AlmaBetter /Capstone Project/Book Recommendation System/Books_data.csv'

In [39]:
import numpy as np
import scipy
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
import pickle 

In [40]:
df = pd.read_csv(url , dtype = {'ISBN': str, 'Rating': int, 'Uid': str} )
books = pd.read_csv(books_url)

In [41]:
books.head()

Unnamed: 0.1,Unnamed: 0,Uid,ISBN,Rating,Title,Author,Pub_Year,Publisher,Image_Url,Location,Age,age_segment
0,3,278418,446520802,0,the notebook,nicholas sparks,1996,warner books,http://images.amazon.com/images/p/0446520802.0...,"omaha, nebraska, usa",37.394772,Adult
1,4,278418,671537458,0,waiting to exhale,terry mcmillan,1995,pocket,http://images.amazon.com/images/p/0671537458.0...,"omaha, nebraska, usa",37.394772,Adult
2,5,278418,786817070,0,"artemis fowl (artemis fowl, book 1)",eoin colfer,2002,miramax kids,http://images.amazon.com/images/p/0786817070.0...,"omaha, nebraska, usa",37.394772,Adult
3,6,278418,446364193,0,along came a spider (alex cross novels),james patterson,1993,warner books,http://images.amazon.com/images/p/0446364193.0...,"omaha, nebraska, usa",37.394772,Adult
4,7,278418,449217264,0,alaska,james a. michener,1994,fawcett books,http://images.amazon.com/images/p/0449217264.0...,"omaha, nebraska, usa",37.394772,Adult


In [42]:
books.shape

(1144, 12)

In [43]:
books.head()

Unnamed: 0.1,Unnamed: 0,Uid,ISBN,Rating,Title,Author,Pub_Year,Publisher,Image_Url,Location,Age,age_segment
0,3,278418,446520802,0,the notebook,nicholas sparks,1996,warner books,http://images.amazon.com/images/p/0446520802.0...,"omaha, nebraska, usa",37.394772,Adult
1,4,278418,671537458,0,waiting to exhale,terry mcmillan,1995,pocket,http://images.amazon.com/images/p/0671537458.0...,"omaha, nebraska, usa",37.394772,Adult
2,5,278418,786817070,0,"artemis fowl (artemis fowl, book 1)",eoin colfer,2002,miramax kids,http://images.amazon.com/images/p/0786817070.0...,"omaha, nebraska, usa",37.394772,Adult
3,6,278418,446364193,0,along came a spider (alex cross novels),james patterson,1993,warner books,http://images.amazon.com/images/p/0446364193.0...,"omaha, nebraska, usa",37.394772,Adult
4,7,278418,449217264,0,alaska,james a. michener,1994,fawcett books,http://images.amazon.com/images/p/0449217264.0...,"omaha, nebraska, usa",37.394772,Adult


In [44]:
 df.head()

Unnamed: 0.1,Unnamed: 0,Uid,ISBN,Rating,Title,Author,Pub_Year,Publisher,Image_Url,Location,Age
0,3,278418,446520802,0,the notebook,nicholas sparks,1996,warner books,http://images.amazon.com/images/p/0446520802.0...,"omaha, nebraska, usa",37.394772
1,4,278418,671537458,0,waiting to exhale,terry mcmillan,1995,pocket,http://images.amazon.com/images/p/0671537458.0...,"omaha, nebraska, usa",37.394772
2,5,278418,786817070,0,"artemis fowl (artemis fowl, book 1)",eoin colfer,2002,miramax kids,http://images.amazon.com/images/p/0786817070.0...,"omaha, nebraska, usa",37.394772
3,6,278418,446364193,0,along came a spider (alex cross novels),james patterson,1993,warner books,http://images.amazon.com/images/p/0446364193.0...,"omaha, nebraska, usa",37.394772
4,7,278418,449217264,0,alaska,james a. michener,1994,fawcett books,http://images.amazon.com/images/p/0449217264.0...,"omaha, nebraska, usa",37.394772


In [45]:
def preprocess_and_model(df):

    try:
      df.drop('Unnamed: 0', axis = 1, inplace = True)
    except:
      pass
    # rating df, to work on the useful features
    rating_df = df[['Uid',	'ISBN',	'Rating',	'Title']].reset_index().drop('index', axis = 1) 
  
    # create a pivot of data 
    ratings_pivot_df = rating_df.pivot_table(values = 'Rating', columns = 'Title', index = 'Uid').fillna(0) 

    # keep a list of user id's for an instace to use them further
    user_ids = list(ratings_pivot_df.index) 
    # keep values of ratings train df as a matrix
    ratings_values = ratings_pivot_df.values

    # Svd Model to create factor matrix of original matrix
    # The number of factors to factor the user-item matrix.
    NUMBER_OF_FACTORS_MF = 15

    #Performs matrix factorization of the original user item matrix
    U, sigma, Vt = svds(ratings_values, k = NUMBER_OF_FACTORS_MF)
    sigma = np.diag(sigma) # To Create a diagnol matrix of the sigma

    # multiply all small matrix to create a predicted final matrix for user ratings 
    predicted_rating_for_users = np.dot(np.dot(U,sigma),Vt) 

    #Converting the reconstructed matrix back to a Pandas dataframe  
    user_ids = [str(s) for s in user_ids]
    book_name = [str(c) for c in  ratings_pivot_df.columns]
    pred_df = pd.DataFrame(predicted_rating_for_users, columns = book_name, index=user_ids).transpose()  

    return pred_df


In [46]:
pred_df = preprocess_and_model(df)

In [47]:
books.head(1)

Unnamed: 0.1,Unnamed: 0,Uid,ISBN,Rating,Title,Author,Pub_Year,Publisher,Image_Url,Location,Age,age_segment
0,3,278418,446520802,0,the notebook,nicholas sparks,1996,warner books,http://images.amazon.com/images/p/0446520802.0...,"omaha, nebraska, usa",37.394772,Adult


In [48]:
pred_df.shape

(1144, 1148)

In [49]:
def recommend_items_by_item(book_name, predictions_df, items_df=None ,topn=10, verbose=False):
    
    pdt = predictions_df.transpose() 
    cosine_similarities = cosine_similarity(np.array(pdt[book_name]).reshape(1,-1),predictions_df )
    recommended_df = pd.DataFrame( {'recStrength': cosine_similarities[0] }, index = predictions_df.index ).sort_values(by = 'recStrength',ascending = False)
    recommended_df = recommended_df.iloc[:topn,]
    # print(recommended_df)
    if verbose:
          if items_df is None:        
              raise Exception('"items_df" is required in verbose mode')
          # print(recommended_df) 
          # lower case the Book-Title of books DataFrame if not to avoid errors
          items_df['Title'] = items_df['Title'].str.lower()

          # merge recommended items with other details
          recommended_df = recommended_df.merge(items_df, how = 'left', 
                                                        # left_on = 'index', 
                                                        left_index = True,
                                                        right_on = 'Title')[['recStrength', 'ISBN', 'Title', 
                                                                            'Pub_Year','Publisher',
                                                                            'Image_Url','Author']]
          recommended_df = recommended_df.drop_duplicates(subset='Title', keep='first')
    return recommended_df

In [50]:


def recommend(name): 
  name = name.lower()
  name2 = name.lower().strip()
  name3 = name.replace(',','')
  name4 = name2.replace(':','') 
  global title
  title = df[df['Title'].str.contains(f'{name}|{name2}|{name3}|{name4}')]['Title'].values 
  try: 
    title = title[0] 
    books_recommendations = recommend_items_by_item(title, pred_df, df,topn=5, verbose = True)
    return books_recommendations
  except Exception as error:
    return f"Oops! Book Not Found.....\n Try Again\n{error}"

In [51]:
recommendation_df = recommend("notebook") 
recommendation_df

Unnamed: 0,recStrength,ISBN,Title,Pub_Year,Publisher,Image_Url,Author
0,1.0,446520802,the notebook,1996,warner books,http://images.amazon.com/images/p/0446520802.0...,nicholas sparks
14,0.90257,446608955,a walk to remember,2000,warner books,http://images.amazon.com/images/p/0446608955.0...,nicholas sparks
384,0.895498,440211263,circle of friends,1991,dell,http://images.amazon.com/images/p/0440211263.0...,maeve binchy
204,0.890645,446525502,the rescue,2000,warner books,http://images.amazon.com/images/p/0446525502.0...,nicholas sparks
949,0.881946,688170528,the pact: a love story,1999,quill,http://images.amazon.com/images/p/0688170528.0...,jodi picoult


In [None]:
# save the pickle file of pred_df
import pickle
file = 'pred_df.pkl'
file_opn = open(file,'wb')
pickle.dump(pred_df, file_opn)