In [1]:
# instructions : 1. provide user id input and query input in the end of the file to check recommendation
#                2. Model is divide into two parts:
#                   1. Model that generates recommendations based on similar content of the book
#                   2. Model that generates recommendations based on similar books rated by the user

In [2]:
# importing all required libraries

import numpy as np
import pandas as pd
import scipy.stats
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

## FIRST PART OF THE MODEL 

In [3]:
# reading books dataset and storing it in dataframe named df
df = pd.read_csv('books.csv')
df.head()

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   book_id                    10000 non-null  int64  
 1   goodreads_book_id          10000 non-null  int64  
 2   best_book_id               10000 non-null  int64  
 3   work_id                    10000 non-null  int64  
 4   books_count                10000 non-null  int64  
 5   isbn                       9300 non-null   object 
 6   isbn13                     9415 non-null   float64
 7   authors                    10000 non-null  object 
 8   original_publication_year  9979 non-null   float64
 9   original_title             9415 non-null   object 
 10  title                      10000 non-null  object 
 11  language_code              8916 non-null   object 
 12  average_rating             10000 non-null  float64
 13  ratings_count              10000 non-null  int6

In [5]:
# reading tags scv file 
tags = pd.read_csv('tags.csv')
tags.head()

Unnamed: 0,tag_id,tag_name
0,0,-
1,1,--1-
2,2,--10-
3,3,--12-
4,4,--122-


In [6]:
# reading and stroing book_tags csv file
goodbook = pd.read_csv('book_tags.csv')
goodbook.head()

Unnamed: 0,goodreads_book_id,tag_id,count
0,1,30574,167697
1,1,11305,37174
2,1,11557,34173
3,1,8717,12986
4,1,33114,12716


In [7]:
# merging goodbook and tags dataframes on the basis of tag_id and applying inner join
df1 = pd.merge(goodbook, tags, on = 'tag_id', how ='inner')
df1.head()

Unnamed: 0,goodreads_book_id,tag_id,count,tag_name
0,1,30574,167697,to-read
1,2,30574,24549,to-read
2,3,30574,496107,to-read
3,5,30574,11909,to-read
4,6,30574,298,to-read


In [8]:
# df1 dataframe contains merged goodbook and tags dataframes 
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 999912 entries, 0 to 999911
Data columns (total 4 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   goodreads_book_id  999912 non-null  int64 
 1   tag_id             999912 non-null  int64 
 2   count              999912 non-null  int64 
 3   tag_name           999912 non-null  object
dtypes: int64(3), object(1)
memory usage: 38.1+ MB


## EXPLORING DATA ANALYSIS OF FIRST PART OF MODEL 

In [9]:
# merging df and df1 dataframes and storing it into df2 dataframe
df2 = pd.merge(df, df1, on= 'goodreads_book_id', how = 'inner')
df2.head()

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,tag_id,count,tag_name
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,30574,11314,to-read
1,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,11305,10836,fantasy
2,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,11557,50755,favorites
3,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,8717,35418,currently-reading
4,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,33114,25968,young-adult


In [10]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 999912 entries, 0 to 999911
Data columns (total 26 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   book_id                    999912 non-null  int64  
 1   goodreads_book_id          999912 non-null  int64  
 2   best_book_id               999912 non-null  int64  
 3   work_id                    999912 non-null  int64  
 4   books_count                999912 non-null  int64  
 5   isbn                       929994 non-null  object 
 6   isbn13                     941494 non-null  float64
 7   authors                    999912 non-null  object 
 8   original_publication_year  997812 non-null  float64
 9   original_title             941462 non-null  object 
 10  title                      999912 non-null  object 
 11  language_code              891518 non-null  object 
 12  average_rating             999912 non-null  float64
 13  ratings_count              99

In [11]:
# extracting important features ie. title, authors and tags required for the model from dataframe named df
# stroing that data into dataframe named final_dataframe
final_dataframe = df2[['title','authors','tag_name']]
final_dataframe.head()

Unnamed: 0,title,authors,tag_name
0,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,to-read
1,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,fantasy
2,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,favorites
3,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,currently-reading
4,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,young-adult


In [12]:
# aggregating data of tags pf same books and storing in df3 dataframe
final_dataframe['tag_name'].str.lower()
aggregate_funcs = {
    'tag_name': lambda s: ',' .join(set(s))
}
df3 = final_dataframe.groupby(final_dataframe['title']).aggregate(aggregate_funcs)

In [13]:
df3.head()

Unnamed: 0_level_0,tag_name
title,Unnamed: 1_level_1
"Angels (Walsh Family, #3)","english,read-in-2010,reread,paperback,re-read,..."
"""حكايات فرغلي المستكاوي ""حكايتى مع كفر السحلاوية","e-books,كتب-ساخرة,comedy,أدب_ساخر,مجموعة-قصص,s..."
#GIRLBOSS,"english,e-books,management,nonfiction-to-read,..."
'Salem's Lot,"english,sk,not-interested,suspense,the-king,ha..."
"'Tis (Frank McCourt, #2)","english,classics,all-time-favorites,coming-of-..."


In [14]:
# renaming tag_name to tag in dataframe df3
df3 = df3.rename(columns={'tag_name': 'tag'})

In [15]:
df3.head()

Unnamed: 0_level_0,tag
title,Unnamed: 1_level_1
"Angels (Walsh Family, #3)","english,read-in-2010,reread,paperback,re-read,..."
"""حكايات فرغلي المستكاوي ""حكايتى مع كفر السحلاوية","e-books,كتب-ساخرة,comedy,أدب_ساخر,مجموعة-قصص,s..."
#GIRLBOSS,"english,e-books,management,nonfiction-to-read,..."
'Salem's Lot,"english,sk,not-interested,suspense,the-king,ha..."
"'Tis (Frank McCourt, #2)","english,classics,all-time-favorites,coming-of-..."


In [16]:
# getting information about df3 
df3.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9964 entries,  Angels (Walsh Family, #3) to 美少女戦士セーラームーン新装版 1 [Bishōjo Senshi Sailor Moon Shinsōban 1]
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tag     9964 non-null   object
dtypes: object(1)
memory usage: 155.7+ KB


In [17]:
# extrating columns from dataframe df and stroing in dataframe named df
check = df[['title','authors']]
check.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    10000 non-null  object
 1   authors  10000 non-null  object
dtypes: object(2)
memory usage: 156.4+ KB


In [18]:
# merging check and df3 dataframes on the basis of title and applying inner join and stroing in merge dataframe
merge = pd.merge(check,df3,on = 'title', how = 'inner')

In [19]:
merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10000 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    10000 non-null  object
 1   authors  10000 non-null  object
 2   tag      10000 non-null  object
dtypes: object(3)
memory usage: 312.5+ KB


In [20]:
merge.head()

Unnamed: 0,title,authors,tag
0,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,"english,suzanne-collins,suspense,completed-ser..."
1,Harry Potter and the Sorcerer's Stone (Harry P...,"J.K. Rowling, Mary GrandPré","english,children-s,england,magic,classics,rere..."
2,"Twilight (Twilight, #1)",Stephenie Meyer,"english,completed-series,séries,reread,on-my-s..."
3,To Kill a Mockingbird,Harper Lee,"english,bookclub,banned-books,race,realistic-f..."
4,The Great Gatsby,F. Scott Fitzgerald,"english,bookclub,banned-books,realistic-fictio..."


In [21]:
# defining a functions that takes dataframe and combine the features of title, authors and tags in one column 
def combine_features(data):
    features = []
    for i in range(0,data.shape[0]):
        features.append(data['title'][i] + ' ' + data['authors'][i] + ' ' + data['tag'][i])
    return features


In [22]:
# adding one more column named combined_features to get all features at one place while using model
merge['combined_features'] = combine_features(merge)

In [23]:
merge.head()

Unnamed: 0,title,authors,tag,combined_features
0,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,"english,suzanne-collins,suspense,completed-ser...","The Hunger Games (The Hunger Games, #1) Suzann..."
1,Harry Potter and the Sorcerer's Stone (Harry P...,"J.K. Rowling, Mary GrandPré","english,children-s,england,magic,classics,rere...",Harry Potter and the Sorcerer's Stone (Harry P...
2,"Twilight (Twilight, #1)",Stephenie Meyer,"english,completed-series,séries,reread,on-my-s...","Twilight (Twilight, #1) Stephenie Meyer englis..."
3,To Kill a Mockingbird,Harper Lee,"english,bookclub,banned-books,race,realistic-f...","To Kill a Mockingbird Harper Lee english,bookc..."
4,The Great Gatsby,F. Scott Fitzgerald,"english,bookclub,banned-books,realistic-fictio...","The Great Gatsby F. Scott Fitzgerald english,b..."


In [24]:
# convert text from new column to a new matrix of word count
cm = CountVectorizer().fit_transform(merge['combined_features'])

In [25]:
# generating cosine similarity matrix
cs = cosine_similarity(cm)

In [26]:
# generated cosine similarity matrix
print(cs)

[[1.         0.74890498 0.74044859 ... 0.53078386 0.57914156 0.21861219]
 [0.74890498 1.         0.71965814 ... 0.49902814 0.57754028 0.23134404]
 [0.74044859 0.71965814 1.         ... 0.42142676 0.42068231 0.18927245]
 ...
 [0.53078386 0.49902814 0.42142676 ... 1.         0.45705677 0.34608903]
 [0.57914156 0.57754028 0.42068231 ... 0.45705677 1.         0.32743662]
 [0.21861219 0.23134404 0.18927245 ... 0.34608903 0.32743662 1.        ]]


In [27]:
# building the model bu using title and book id
title = df['title'][5]
title

'The Fault in Our Stars'

In [28]:
book_id = df[df.title == title]['book_id'].values[0]
book_id

6

In [29]:
# listing all cosine similarity scores and book id 
# creating a list of tuples in the form of (book id and similarity score)
scores = list(enumerate(cs[book_id]))
print(scores)

[(0, 0.7506524529170511), (1, 0.8446332807890714), (2, 0.7169524248514892), (3, 0.7082422084825127), (4, 0.7223771149717381), (5, 0.6769022535733206), (6, 1.0000000000000004), (7, 0.7076425723487434), (8, 0.49463963651056125), (9, 0.7545840750715936), (10, 0.6864197014607737), (11, 0.761357768985388), (12, 0.8119136846411826), (13, 0.7618957948907634), (14, 0.5424640328823783), (15, 0.5580198642617026), (16, 0.7822408145464871), (17, 0.8339902974452235), (18, 0.8958634055490271), (19, 0.7634345553457631), (20, 0.7998295175735526), (21, 0.5775446787283185), (22, 0.8481833168378242), (23, 0.836389894701809), (24, 0.8264517174339594), (25, 0.513878017704744), (26, 0.8233453021751698), (27, 0.7492785982528475), (28, 0.6197119244734878), (29, 0.5852615909400696), (30, 0.6491264361178811), (31, 0.6908632989916191), (32, 0.6508192595829829), (33, 0.5481207451156825), (34, 0.715499705208843), (35, 0.7386544964435532), (36, 0.8277936191991724), (37, 0.6716057289087678), (38, 0.814712430219874),

In [30]:
# sorting scores and extracting books other than itself
sorted_scores = sorted(scores,key = lambda x:x[1], reverse = True)
sorted_scores = sorted_scores[1:]
sorted_scores

[(190, 0.9066173995361744),
 (18, 0.8958634055490271),
 (155, 0.8727688032017613),
 (616, 0.870963604365417),
 (53, 0.8693557668258532),
 (161, 0.860459322654348),
 (1328, 0.8548916959580847),
 (1373, 0.8483046903585889),
 (22, 0.8481833168378242),
 (193, 0.8474206911416954),
 (69, 0.8465318577774774),
 (942, 0.846447971748878),
 (334, 0.8452673499435012),
 (1, 0.8446332807890714),
 (3371, 0.8445901739874917),
 (822, 0.843636586746895),
 (371, 0.843364951885228),
 (969, 0.8417605612249233),
 (299, 0.8416484832886705),
 (5124, 0.8394454149324793),
 (1766, 0.839403912340374),
 (109, 0.838262653600784),
 (566, 0.8376892664992299),
 (23, 0.836389894701809),
 (751, 0.8357113619478717),
 (2317, 0.8348990768101447),
 (569, 0.8344712065828647),
 (4015, 0.8340646016286284),
 (17, 0.8339902974452235),
 (2132, 0.8335547727092728),
 (380, 0.8326431077538153),
 (308, 0.8321689225908518),
 (323, 0.8320811221884793),
 (5646, 0.8318097212795245),
 (4016, 0.8301895536430778),
 (2432, 0.8300670530159765

In [31]:
# creating loop to print top 5 books 
j = 0
for item in sorted_scores:
    book_title = df[df.book_id == item[0]]['title'].values[0]
    print(j+1,book_title)
    j = j+1
    if(j == 5):
        break


1 Wild: From Lost to Found on the Pacific Crest Trail
2 Harry Potter and the Prisoner of Azkaban (Harry Potter, #3)
3 The Two Towers (The Lord of the Rings, #2)
4 Plain Truth
5 Eragon (The Inheritance Cycle, #1)


In [32]:
# defining a function that uses book_ids list and generate result os similar books of the ith book id
def user_id_func(book_ids):
    j = 0
    for i in range(len(book_ids)):
        scores = list(enumerate(cs[int(book_ids[i])]))
        sorted_scores = sorted(scores,key = lambda x:x[1], reverse = True)
        sorted_scores = sorted_scores[1:]
        for item in sorted_scores:
            book_title = df[df.book_id == item[0]]['title'].values[0]
            print(j+1,book_title)
            j = j+1
            if(j%5 == 0):
                break
        

In [76]:
# defining a function that uses title of the book provided and generate recommendations based on the item
def user_title(title):
    book_id = df[df['title'] == title]['book_id'].values[0]
    j = 0
    scores = list(enumerate(cs[int(book_id)]))
    sorted_scores = sorted(scores,key = lambda x:x[1], reverse = True)
    sorted_scores = sorted_scores[1:]
    for item in sorted_scores:
        book_title = df[df.book_id == item[0]]['title'].values[0]
        print(j+1,book_title)
        j = j+1
        if(j%5 == 0):
            break
    

## SECOND PART OF MODEL 

In [34]:
# reding and stroing ratings csv file
ratings = pd.read_csv('ratings.csv')
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5976479 entries, 0 to 5976478
Data columns (total 3 columns):
 #   Column   Dtype
---  ------   -----
 0   user_id  int64
 1   book_id  int64
 2   rating   int64
dtypes: int64(3)
memory usage: 136.8 MB


In [35]:
# creating other dataframe named books that stores information about books
books = df

In [36]:
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   book_id                    10000 non-null  int64  
 1   goodreads_book_id          10000 non-null  int64  
 2   best_book_id               10000 non-null  int64  
 3   work_id                    10000 non-null  int64  
 4   books_count                10000 non-null  int64  
 5   isbn                       9300 non-null   object 
 6   isbn13                     9415 non-null   float64
 7   authors                    10000 non-null  object 
 8   original_publication_year  9979 non-null   float64
 9   original_title             9415 non-null   object 
 10  title                      10000 non-null  object 
 11  language_code              8916 non-null   object 
 12  average_rating             10000 non-null  float64
 13  ratings_count              10000 non-null  int6

In [37]:
# using book_id as matching key we can merge the two tables
dataframe = pd.merge(ratings, books, on='book_id', how='inner')
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5976479 entries, 0 to 5976478
Data columns (total 25 columns):
 #   Column                     Dtype  
---  ------                     -----  
 0   user_id                    int64  
 1   book_id                    int64  
 2   rating                     int64  
 3   goodreads_book_id          int64  
 4   best_book_id               int64  
 5   work_id                    int64  
 6   books_count                int64  
 7   isbn                       object 
 8   isbn13                     float64
 9   authors                    object 
 10  original_publication_year  float64
 11  original_title             object 
 12  title                      object 
 13  language_code              object 
 14  average_rating             float64
 15  ratings_count              int64  
 16  work_ratings_count         int64  
 17  work_text_reviews_count    int64  
 18  ratings_1                  int64  
 19  ratings_2                  int64  
 20  ra

## EXPLORING DATA ANALYSIS OF SECOND PART OF MODEL 

In [38]:
# aggregating data of book_id and rating of same books and storing in dataframe named dataframe
aggregate_funcs = {
    'book_id': lambda s: ',' .join(map(str, s)),
    'rating': lambda t: ',' .join(map(str, t))
}
dataframe = ratings.groupby(['user_id']).aggregate(aggregate_funcs)

In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   book_id                    10000 non-null  int64  
 1   goodreads_book_id          10000 non-null  int64  
 2   best_book_id               10000 non-null  int64  
 3   work_id                    10000 non-null  int64  
 4   books_count                10000 non-null  int64  
 5   isbn                       9300 non-null   object 
 6   isbn13                     9415 non-null   float64
 7   authors                    10000 non-null  object 
 8   original_publication_year  9979 non-null   float64
 9   original_title             9415 non-null   object 
 10  title                      10000 non-null  object 
 11  language_code              8916 non-null   object 
 12  average_rating             10000 non-null  float64
 13  ratings_count              10000 non-null  int6

In [40]:
# coverting rating columns data into strings
dataframe['rating'] = dataframe['rating'].explode().astype(str)
dataframe.head()

Unnamed: 0_level_0,book_id,rating
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"258,268,5556,3638,1796,867,47,2738,4691,238,20...","5,3,3,3,5,3,3,3,4,2,4,3,1,3,5,5,3,5,4,3,5,4,3,..."
2,"4081,260,9296,2318,26,315,33,301,2686,3753,851...","4,5,5,3,4,3,4,5,5,5,5,5,4,4,4,5,5,5,5,4,5,5,3,..."
3,"8452,522,4788,150,9962,9790,1283,9049,9215,4,6...","1,1,2,2,1,1,1,1,1,3,2,3,1,1,1,1,1,1,1,1,1,1,3,..."
4,"70,264,388,18,27,21,2,23,24,964,103,255,35,287...","4,3,4,5,5,5,5,5,5,4,5,2,5,3,4,3,4,4,3,3,2,4,4,..."
5,"7601,441,6344,4335,1485,1145,2592,431,326,887,...","4,4,4,4,4,4,4,3,3,3,3,4,3,4,3,4,5,4,4,4,3,4,4,..."


In [41]:
# checking with user id 1 and extrating information of picked user id
picked_user_id = 1
choosed_frame = dataframe.loc[picked_user_id]

In [42]:
choosed_frame.info()

<class 'pandas.core.series.Series'>
Index: 2 entries, book_id to rating
Series name: 1
Non-Null Count  Dtype 
--------------  ----- 
2 non-null      object
dtypes: object(1)
memory usage: 140.0+ bytes


In [43]:
# extrating rating and book_id columns from choosed_frame dataframe and
# storing in user_rating and user_book respectively
user_rating = choosed_frame['rating']
user_book = choosed_frame['book_id']

In [44]:
user_rating

'5,3,3,3,5,3,3,3,4,2,4,3,1,3,5,5,3,5,4,3,5,4,3,3,4,1,5,2,3,5,5,3,3,4,2,3,4,4,4,2,5,4,3,5,5,2,3,4,3,3,4,3,3,2,2,3,3,3,4,3,3,4,3,4,3,4,4,3,3,4,5,3,4,5,4,4,3,4,3,2,1,4,3,4,3,4,3,4,3,3,3,4,4,4,4,4,3,5,5,4,5,5,5,3,4,4,5,4,3,4,5,5,4,4,4,3,3'

In [45]:
user_book

'258,268,5556,3638,1796,867,47,2738,4691,238,2063,916,4614,111,11,1644,3889,136,6665,150,35,33,60,148,10,94,4,492,57,1521,70,42,103,36,138,119,32,13,66,3406,2002,43,287,1041,45,38,67,46,22,115,31,16,132,40,407,256,273,378,329,98,216,1176,140,869,2679,1310,414,54,85,219,177,109,131,102,95,225,76,100,171,179,255,485,325,498,323,162,72,233,496,306,354,1030,1055,2770,198,1761,1942,128,81,5191,1187,2535,3294,4893,1180,6285,2133,1011,262,437,421,143,142,642,901,212,231'

In [46]:
type(user_rating)

str

In [47]:
user_rating = user_rating.split(',')

In [48]:
type(user_rating)

list

In [49]:
user_rating = list(map(int, user_rating))

In [50]:
user_rating

[5,
 3,
 3,
 3,
 5,
 3,
 3,
 3,
 4,
 2,
 4,
 3,
 1,
 3,
 5,
 5,
 3,
 5,
 4,
 3,
 5,
 4,
 3,
 3,
 4,
 1,
 5,
 2,
 3,
 5,
 5,
 3,
 3,
 4,
 2,
 3,
 4,
 4,
 4,
 2,
 5,
 4,
 3,
 5,
 5,
 2,
 3,
 4,
 3,
 3,
 4,
 3,
 3,
 2,
 2,
 3,
 3,
 3,
 4,
 3,
 3,
 4,
 3,
 4,
 3,
 4,
 4,
 3,
 3,
 4,
 5,
 3,
 4,
 5,
 4,
 4,
 3,
 4,
 3,
 2,
 1,
 4,
 3,
 4,
 3,
 4,
 3,
 4,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 3,
 5,
 5,
 4,
 5,
 5,
 5,
 3,
 4,
 4,
 5,
 4,
 3,
 4,
 5,
 5,
 4,
 4,
 4,
 3,
 3]

In [51]:
type(user_rating)

list

In [52]:
type(user_book)

str

In [53]:
user_book = user_book.split(',')

In [54]:
type(user_book)

list

In [55]:
# making list of user_rating named list1 and list of user_book named list2
list1 = user_rating
list2 = user_book

index = list(range(len(list1)))
print(index)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116]


In [56]:
# soring lists based on user_rating and stored in index dataset
index.sort(reverse = True, key = list1.__getitem__)
print(index)

[0, 4, 14, 15, 17, 20, 26, 29, 30, 40, 43, 44, 70, 73, 97, 98, 100, 101, 102, 106, 110, 111, 8, 10, 18, 21, 24, 33, 36, 37, 38, 41, 47, 50, 58, 61, 63, 65, 66, 69, 72, 74, 75, 77, 81, 83, 85, 87, 91, 92, 93, 94, 95, 99, 104, 105, 107, 109, 112, 113, 114, 1, 2, 3, 5, 6, 7, 11, 13, 16, 19, 22, 23, 28, 31, 32, 35, 42, 46, 48, 49, 51, 52, 55, 56, 57, 59, 60, 62, 64, 67, 68, 71, 76, 78, 82, 84, 86, 88, 89, 90, 96, 103, 108, 115, 116, 9, 27, 34, 39, 45, 53, 54, 79, 12, 25, 80]


In [57]:
# getting highest rated books that user likes previously
list1[:] = [list1[i] for i in index]
list2[:] = [list2[i] for i in index]
print(list1)

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]


In [58]:
print(list2)

['258', '1796', '11', '1644', '136', '35', '4', '1521', '70', '2002', '1041', '45', '177', '102', '128', '81', '1187', '2535', '3294', '2133', '421', '143', '4691', '2063', '6665', '33', '10', '36', '32', '13', '66', '43', '46', '31', '329', '1176', '869', '1310', '414', '219', '131', '95', '225', '100', '485', '498', '162', '233', '1030', '1055', '2770', '198', '1761', '5191', '1180', '6285', '1011', '437', '142', '642', '901', '268', '5556', '3638', '867', '47', '2738', '916', '111', '3889', '150', '60', '148', '57', '42', '103', '119', '287', '67', '22', '115', '16', '132', '256', '273', '378', '98', '216', '140', '2679', '54', '85', '109', '76', '171', '325', '323', '72', '496', '306', '354', '1942', '4893', '262', '212', '231', '238', '492', '138', '3406', '38', '40', '407', '179', '4614', '94', '255']


In [59]:
# generating list of book_id of favourite books liked by the user 
fav_book_id = []

if(len(list2) >=3 ):
    fav_book_id.append(list2[0])
    fav_book_id.append(list2[1])
    fav_book_id.append(list2[2])
elif(len(list2) ==2 ):
    fav_book_id.append(list2[0])
    fav_book_id.append(list2[1])
elif(len(list2) ==1 ):
    fav_book_id.append(list2[0])

print(fav_book_id)

['258', '1796', '11']


In [60]:
#
user_id_func(fav_book_id)

1 The Desert Spear (Demon Cycle, #2)
2 We Were Liars
3 Persuasion
4 Cloud Atlas
5 Interview with the Vampire (The Vampire Chronicles, #1)
6 The Body Farm (Kay Scarpetta, #5)
7 com نسيان
8 Heir of Fire (Throne of Glass, #3)
9 NARUTO -ナルト- 巻ノ四十三
10 Leave Me Breathless (Ross Siblings, #3)
11 The Perks of Being a Wallflower
12 The Road
13 The Girl with the Dragon Tattoo (Millennium, #1)
14 Nineteen Minutes
15 The Children of Húrin


In [61]:
# defining a function that uses user id to generate a recommendation and print list of similar books read by the user 
def picking_user(picked_user_id):
    choosed_frame = dataframe.loc[picked_user_id]
    user_rating = choosed_frame['rating']
    user_book = choosed_frame['book_id']
    user_rating = list(map(int, user_rating.split(',')))
    user_book = user_book.split(',')
    list1 = user_rating
    list2 = user_book
    index = list(range(len(list1)))
    index.sort(reverse = True, key = list1.__getitem__)
    list1[:] = [list1[i] for i in index]
    list2[:] = [list2[i] for i in index]
    
    fav_book_id = []
    if(len(list2) >=3 ):
        fav_book_id.append(list2[0])
        fav_book_id.append(list2[1])
        fav_book_id.append(list2[2])
    elif(len(list2) ==2 ):
        fav_book_id.append(list2[0])
        fav_book_id.append(list2[1])
    elif(len(list2) ==1 ):
        fav_book_id.append(list2[0])
    user_id_func(fav_book_id)

## Taking user id input 

In [68]:
# taking user id input form the user
u_id = int(input('Type user id'))

Type user id3


In [69]:
# picking_user(u_id)
print(type(u_id))

<class 'int'>


In [70]:
# picking_user(u_id)
print(type(user_rating))

<class 'list'>


In [71]:
# generating recommendation by user_id 
picking_user(u_id)

1 Ariel
2 Lone Wolf
3 Down and Out in Paris and London
4 The Chronicles of Narnia (Chronicles of Narnia, #1-7)
5 1st to Die (Women's Murder Club, #1)
6 The Royal We
7 Finding Nemo
8 The Murder on the Links (Hercule Poirot, #2)
9 The Keeping (Law of the Lycans, #3)
10 Wayside School Is Falling Down (Wayside School #2)
11 The Hobbit
12 The Help
13 Twilight (Twilight, #1)
14 Harry Potter and the Half-Blood Prince (Harry Potter, #6)
15 1984


## Taking query of title as input

In [80]:
# Example title = "Plain Truth"
query = input("Title of the book")

Title of the bookThe Help


In [81]:
# generating output based on similar books
user_title(query)

1 The Hobbit
2 To Kill a Mockingbird
3 Harry Potter and the Half-Blood Prince (Harry Potter, #6)
4 Twilight (Twilight, #1)
5 The Grapes of Wrath
