In [1]:
import dynamodb_mapper
import boto3
import numpy as np
import matplotlib.pyplot as plt
import csv
import json
import pandas as pd
import os
import shutil
from boto3 import resource
from boto3.dynamodb.conditions import Key, Attr
from sklearn.neighbors import NearestNeighbors

In [2]:
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')

In [3]:
def download_and_convert_data(table , filename):
    data_downloaded = download_data(table)
    convert_to_csv(data_downloaded, filename)

In [4]:
def download_data(table_name):
    table = dynamodb.Table(table_name)
    key = []
    for obj in table.attribute_definitions:
        key.append(obj['AttributeName'])
    set_of_key = set(key)
    item_count = table.item_count

    data_downloaded = table.scan()
    if data_downloaded is None:
        return None

    items = data_downloaded['Items']
    column_names = set([])
    for item in items:
        column_names = column_names.union(set(item.keys()))
    print(column_names)
    print("Total downloaded records: ",len(items))

    for c_name in column_names:
        if c_name not in set_of_key:
            key.append(c_name)
    return {'items': items, 'keys': key}

In [5]:
def convert_to_csv(data_downloaded, filename):
    if data_downloaded is None:
        return
    if os.path.exists(filename):
        shutil.rmtree(filename, ignore_errors=True)
    with open(filename, 'w' ,encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=data_downloaded['keys'],quotechar='"')
        writer.writeheader()
        writer.writerows(data_downloaded['items'])

In [6]:
download_and_convert_data("hungrymind-mobilehub-593518188-Customer_table" , "user.csv")

{'custPass', 'custName', 'custNumber', 'custEmail', 'userId', 'custAddress'}
Total downloaded records:  450


In [7]:
download_and_convert_data("hungrymind-mobilehub-593518188-Books" , "books.csv")

{'Price', 'Genre', 'ImageUrl', 'Author', 'ISBN', 'PublicationYear', 'Rating', 'BookName'}
Total downloaded records:  5687


In [8]:
download_and_convert_data("hungrymind-mobilehub-593518188-BookBorrow" , "borrowed.csv")

{'ActualRetDate', 'DateOfBorrow', 'BookID', 'CustID', 'BorrowId', 'DateClaimToRet', 'Rating', 'SupplierID'}
Total downloaded records:  1211


In [9]:
books = pd.read_csv('books.csv',sep = ',' ,error_bad_lines = False , encoding = 'latin-1')
users = pd.read_csv('user.csv',sep = ',' ,error_bad_lines = False , encoding = 'latin-1')
borrow = pd.read_csv('borrowed.csv',sep = ',' ,error_bad_lines = False , encoding = 'latin-1')

In [10]:
borrow.head()

Unnamed: 0,BorrowId,ActualRetDate,DateOfBorrow,BookID,CustID,DateClaimToRet,Rating,SupplierID
0,251.0,16-6-1907,12-6-1907,1843914,1350,19-6-1907,0.797226,1
1,1838.0,2-11-1974,18-10-1974,6898682,1395,6-11-1974,2.316953,1
2,4119.0,21-2-2016,16-2-2016,1439148015,1462,22-2-2016,3.667239,1
3,2289.0,16-10-1986,13-10-1986,380815583,1469,17-10-1986,0.680867,1
4,4158.0,6-4-2011,20-3-2011,9799731232,1082,5-4-2011,1.505543,1


In [11]:
books.head()

Unnamed: 0,ISBN,Price,Genre,ImageUrl,Author,PublicationYear,Rating,BookName
0,803731558,813.0,Fantasy,https://images.gr-assets.com/books/1354846350m...,Frank Beddor,2007.0,3.85,Seeing Redd
1,385333218,1084.0,Literature,https://images.gr-assets.com/books/1320533768m...,Homer Hickam,1998.0,4.18,Rocket Boys
2,394837363,1049.0,Children,https://images.gr-assets.com/books/1344369033m...,"""Stan Berenstain, Jan Berenstain""",1978.0,4.28,The Big Honey Hunt
3,78511477,248.0,Horror,https://s.gr-assets.com/assets/nophoto/book/11...,"""Roy Thomas, Dick Giordano, Mark D. Beazley, B...",2005.0,3.93,Stoker's Dracula
4,345403959,297.0,Fantasy,https://images.gr-assets.com/books/1333692194m...,"""David Eddings, Leigh Eddings""",1995.0,4.07,Belgarath the Sorcerer


In [12]:
users.head()

Unnamed: 0,custEmail,custPass,custName,custNumber,userId,custAddress
0,lburnard@burnard.com,WY82501,LauranBurnard,3073427795,1241,"Riverton,Fremont,WY"
1,sheron@aol.com,NY11101,SheronLouissant,7189768610,1370,"Long Island City,Queens,NY"
2,lizbeth@yahoo.com,CA90248,LizbethKohl,3106991222,1389,"Gardena,Los Angeles,CA"
3,mpelkowski@pelkowski.org,CA94080,MozellPelkowski,6509471215,1208,"South San Francisco,San Mateo,CA"
4,kayleigh.lace@yahoo.com,LA70508,KayleighLace,3377409323,1297,"Lafayette,Lafayette,LA"


In [13]:
new_book = books.sort_values('Rating' , ascending = False)
new_book.head()

Unnamed: 0,ISBN,Price,Genre,ImageUrl,Author,PublicationYear,Rating,BookName
1805,74074847,439.0,,https://images.gr-assets.com/books/1473064526m...,Bill Watterson,2005.0,4.82,The Complete Calvin and Hobbes
4232,1569319626,167.0,,https://images.gr-assets.com/books/1473064526m...,"Volume 1"",,Rumiko Takahashi",1988.0,4.82,"""Ranma Â½"
3910,740748475,167.0,,https://images.gr-assets.com/books/1473064526m...,Bill Watterson,2005.0,4.82,The Complete Calvin and Hobbes
4397,1433502410,1514.0,,https://images.gr-assets.com/books/1410151002m...,"""Anonymous, Lane T. Dennis, Wayne A. Grudem""",2002.0,4.76,
2340,842339523,331.0,,https://images.gr-assets.com/books/1349032180m...,Francine Rivers,1993.0,4.76,Mark of the Lion Trilogy


In [14]:
book_without_genre = new_book.drop(['Genre'] , axis = 1)

In [15]:
valid_books = book_without_genre.dropna()

In [16]:
top_rated = valid_books.head(10)
top_rated_id = top_rated.ISBN

In [17]:
print ('Top Rated books are :')
top_rated.BookName

Top Rated books are :


1805                       The Complete Calvin and Hobbes
4232                                            "Ranma Â½
3910                       The Complete Calvin and Hobbes
2340                             Mark of the Lion Trilogy
4441    There's Treasure Everywhere: A Calvin and Hobb...
3520                      Complete Harry Potter Boxed Set
2231               "Harry Potter Collection (Harry Potter
827                   The Authoritative Calvin and Hobbes
3962    The Days Are Just Packed: A Calvin and Hobbes ...
3091                                    "The Way of Kings
Name: BookName, dtype: object

In [18]:
borrow.head()

Unnamed: 0,BorrowId,ActualRetDate,DateOfBorrow,BookID,CustID,DateClaimToRet,Rating,SupplierID
0,251.0,16-6-1907,12-6-1907,1843914,1350,19-6-1907,0.797226,1
1,1838.0,2-11-1974,18-10-1974,6898682,1395,6-11-1974,2.316953,1
2,4119.0,21-2-2016,16-2-2016,1439148015,1462,22-2-2016,3.667239,1
3,2289.0,16-10-1986,13-10-1986,380815583,1469,17-10-1986,0.680867,1
4,4158.0,6-4-2011,20-3-2011,9799731232,1082,5-4-2011,1.505543,1


In [19]:
ratings = borrow.drop(['BorrowId','DateClaimToRet','SupplierID','DateOfBorrow','ActualRetDate'] , axis = 1)

In [20]:
ratings.head()

Unnamed: 0,BookID,CustID,Rating
0,1843914,1350,0.797226
1,6898682,1395,2.316953
2,1439148015,1462,3.667239
3,380815583,1469,0.680867
4,9799731232,1082,1.505543


In [21]:
ratings.shape

(1211, 3)

In [22]:
rat_new = ratings[ratings.BookID.isin(books.ISBN)]
rat_new = rat_new[rat_new.CustID.isin(users.userId)]
rat_new.shape

(1052, 3)

In [23]:
ratings_ex = rat_new[rat_new.Rating != 0]
rat_count = pd.DataFrame(ratings_ex.groupby('BookID')['Rating'].count())
rat_count.sort_values('Rating' , ascending = False ,inplace = True)
rat_count.head()

Unnamed: 0_level_0,Rating
BookID,Unnamed: 1_level_1
806527285,4
59041602,4
446696129,4
743297334,3
439995442,3


In [24]:
most_rated = books[books.ISBN.isin(rat_count.index)]
most_r = most_rated.head(10)
most_rat_Id = most_r.ISBN

In [25]:
print ('Most Rated Books are : ')
most_r.BookName

Most Rated Books are : 


1                                           Rocket Boys
4                                Belgarath the Sorcerer
5                                                  East
9     The Knowledge of the Holy: The Attributes of G...
12                                The Phantom Tollbooth
13                                        Death du Jour
15                                     Fates and Furies
27                                      Aloha from Hell
28                                      The Ruby Knight
31                                   The Caves of Steel
Name: BookName, dtype: object

In [26]:
ratings_ex.head()

Unnamed: 0,BookID,CustID,Rating
2,1439148015,1462,3.667239
3,380815583,1469,0.680867
4,9799731232,1082,1.505543
5,1420925229,1200,1.859964
6,689865406,1115,1.903698


In [27]:
r_matrix = ratings_ex.pivot_table(
    index='CustID',
    columns='BookID',
    values='Rating',
    aggfunc = np.mean
).fillna(0)

In [28]:
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=5, n_jobs=-1)

In [29]:
model_knn.fit(r_matrix.T)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=-1, n_neighbors=5, p=2, radius=1.0)

In [30]:
def item_similarity(isbn , rat):
    similarities = []
    indices = []
    rat = rat.T
    loc = rat.index.get_loc(isbn)
    distances ,indices = model_knn.kneighbors(rat.iloc[loc , :].values.reshape(1,-1))
    similarities = 1 - distances.flatten()
    return similarities, indices

In [31]:
def predict_item(uid , isbn , rat):
    prediction = w_sum = 0
    uloc = rat.index.get_loc(uid)
    itloc = rat.columns.get_loc(isbn)
    similarities , indices = item_similarity(isbn , rat)
    sum_of_similarities = np.sum(similarities) - 1
    if sum_of_similarities == 0:
        sum_of_similarities = 1
    product = 1
    for i in range(0 , len(indices.flatten())):
        if indices.flatten()[i] == itloc:
            continue;
        else:
            product = rat.iloc[uloc,indices.flatten()[i]]
            product = product*(similarities[i])
            w_sum = w_sum + product
            
    prediction = round(w_sum/sum_of_similarities)
    if prediction <=0:
        prediction = 1
    elif prediction >10:
        prediction = 10

    return prediction

In [32]:
#Takes user input
#In the app it will be the id of logged in user
uid = int(input(""))
rat = r_matrix
predi = []
for i in range(rat.shape[1]):
    if (rat[rat.columns[i]][uid] != 0):
        predi.append(predict_item(uid , rat.columns[i], rat))
    else:
        predi.append(-1)
predi = pd.Series(predi)
predi = predi.sort_values(ascending = False)
recommend = predi[:10]
print ("Recommended books are: ")
for o in range(len(recommend)):
    print ( o+1 ,'.' , books.BookName[recommend.index[o]])

1200
Recommended books are: 
1 . Habibi
2 . The Year of Living Biblically: One Man's Humble Quest to Follow the Bible As Literally As Possible
3 . Valis
4 . Under the Banner of Heaven: A Story of Violent Faith
5 . Plateforme
6 . The Defining Decade: Why Your Twenties Matter--And How to Make the Most of Them Now
7 . Triptych
8 . Il Principe
9 . Finale
10 . The Wedding Girl


In [33]:
recommend_id = books.ISBN[recommend.index]

In [34]:
#All the recommended book ids stored in arrays

In [35]:
recommend_id.head()

36       37542414
834     743291476
416     812971892
401     330419129
850    1400030269
Name: ISBN, dtype: int64

In [36]:
most_rat_Id.head()

1     385333218
4     345403959
5      15205221
9      60684127
12    394820371
Name: ISBN, dtype: int64

In [37]:
top_rated_id.head()

1805      74074847
4232    1569319626
3910     740748475
2340     842339523
4441     836213122
Name: ISBN, dtype: int64