## Import necessary modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nltk
import string
import re
from nltk.corpus import stopwords
import networkx as nx
from stemming.porter2 import stem
import csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from operator import itemgetter

## Using processed Books file

In [2]:
Books_file = open('./Books.txt', 'r', encoding='utf-8', errors='ignore')
Books = {}
Books_file.readline()
for line in Books_file:
    item = line.split('\t')
    MetaData = {}
    MetaData['Id'] = item[0].strip() 
    ASIN = item[1].strip()
    MetaData['Title'] = item[2].strip()
    MetaData['Categories'] = item[3].strip()
    MetaData['Group'] = item[4].strip()
    MetaData['SalesRank'] = int(item[5].strip())
    MetaData['TotalReviews'] = int(item[6].strip())
    MetaData['AvgRating'] = float(item[7].strip())
    MetaData['DegreeCentrality'] = int(item[8].strip())
    MetaData['ClusteringCoeff'] = float(item[9].strip())
    Books[ASIN] = MetaData
Books_file.close()

## Importing Books Edge List
File for the same is uploaded.

In [3]:
Books_edgelist=open("Books.edgelist", 'rb')
Books_Graph=nx.read_weighted_edgelist(Books_edgelist)
Books_edgelist.close()

## Taking input of the purchased item's ASIN

In [6]:
Item_ASIN = input()

0842328327


## Printing it's details

In [7]:
print ("ASIN = ", Item_ASIN) 
print ("Title = ", Books[Item_ASIN]['Title'])
print ("SalesRank = ", Books[Item_ASIN]['SalesRank'])
print ("TotalReviews = ", Books[Item_ASIN]['TotalReviews'])
print ("AvgRating = ", Books[Item_ASIN]['AvgRating'])
print ("DegreeCentrality = ", Books[Item_ASIN]['DegreeCentrality'])
print ("ClusteringCoeff = ", Books[Item_ASIN]['ClusteringCoeff'])

ASIN =  0842328327
Title =  Life Application Bible Commentary: 1 and 2 Timothy and Titus
SalesRank =  631289
TotalReviews =  1
AvgRating =  4.0
DegreeCentrality =  6
ClusteringCoeff =  0.79


## Computing degree of the node

In [8]:
degree = nx.ego_graph(Books_Graph,Item_ASIN,radius=1)

## Setting similarity threshold

In [9]:
threshold = 0.5
Out_Graph = nx.Graph()

## Finding weights and neighbours

In [10]:
Weights={}

for source, destination, edge in degree.edges(data=True):
    if edge['weight'] >= threshold:
        Out_Graph.add_edge(source,destination,edge=edge['weight'])
        if (source==Item_ASIN):
            Weights[destination]=edge['weight']

In [11]:
Neighbors = [ASIN for ASIN in Out_Graph.neighbors(Item_ASIN)]

In [12]:
SalesRank=[]
TotalReviews=[]
AvgRating=[]
DegreeCentrality=[]
ClusteringCoeff=[]

for ASIN in Neighbors:
    SalesRank.append(Books[ASIN]['SalesRank'])
    TotalReviews.append(Books[ASIN]['TotalReviews'])
    AvgRating.append(Books[ASIN]['AvgRating'])
    DegreeCentrality.append(Books[ASIN]['DegreeCentrality'])
    ClusteringCoeff.append(Books[ASIN]['ClusteringCoeff'])

## Creating dataset of all it's neighbours

In [13]:
columns=['SalesRank','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']
pd.set_option('display.max_columns',10)

In [14]:
df=pd.DataFrame(data={'Sales':SalesRank,'Reviews':TotalReviews,'Avg':AvgRating,'DC':DegreeCentrality,'CC':ClusteringCoeff},index=Neighbors)
df['DcCC']=pow(df['DC'],df['CC'])

In [15]:
df

Unnamed: 0,Sales,Reviews,Avg,DC,CC,DcCC
842328130,180843,0,0.0,8,0.81,5.388934
842330313,447658,0,0.0,11,0.72,5.62086
842328610,203202,1,5.0,14,0.68,6.016821
842328572,339372,0,0.0,12,0.75,6.44742
842328629,737882,0,0.0,3,0.83,2.488918
842329749,621548,1,5.0,5,0.69,3.035913


In [16]:
def logtrans(n):
    if n ==0:
        return 0
    else:
        logn=np.log(n)
        return round(logn,2) 

## Extracting features of neighbours

In [17]:
df['log_Review']=df['Reviews'].apply(logtrans)
df['log_Avg']=df['Avg'].apply(logtrans)
df['log_Sales']=df['Sales'].apply(logtrans)


df['Rating&Review_Score']=df['log_Review']*df['log_Avg']
df['DC&ClusterCoef_Score']=pow(df['DC'],df['CC'])

In [18]:
minmax=MinMaxScaler()
dftrans=pd.DataFrame(minmax.fit_transform(df[['DC&ClusterCoef_Score','log_Sales','Rating&Review_Score']]), columns=['DcCC_Score','minmax_Sales','R&R_Score'], index=df.index)
dftrans=pd.concat([df,dftrans],axis=1)

dftrans['Sales_Score']=round(1-dftrans['minmax_Sales'],2)
dftrans=dftrans.drop(['DC&ClusterCoef_Score','minmax_Sales','Rating&Review_Score'],axis=1)
dftrans['composite_score']=dftrans['DcCC_Score']+dftrans['R&R_Score']+(dftrans['Sales_Score']*1/5)

In [19]:
dftrans

Unnamed: 0,Sales,Reviews,Avg,DC,CC,...,log_Sales,DcCC_Score,R&R_Score,Sales_Score,composite_score
842328130,180843,0,0.0,8,0.81,...,12.11,0.732605,0.0,1.0,0.932605
842330313,447658,0,0.0,11,0.72,...,13.01,0.791194,0.0,0.36,0.863194
842328610,203202,1,5.0,14,0.68,...,12.22,0.891222,0.0,0.92,1.075222
842328572,339372,0,0.0,12,0.75,...,12.73,1.0,0.0,0.56,1.112
842328629,737882,0,0.0,3,0.83,...,13.51,0.0,0.0,0.0,0.0
842329749,621548,1,5.0,5,0.69,...,13.34,0.138182,0.0,0.12,0.162182


In [20]:
compositemeasure={}
for asin in dftrans.index:
    compositemeasure[asin]=round(dftrans['composite_score'].loc[asin],2)

compositemeasure_sorted=dict(sorted(compositemeasure.items(), key=itemgetter(1),reverse=True)[:5])

## Top Recommendations

In [23]:
print("The top recommendations are:")
items=['Title','SalesRank','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']
for asin in compositemeasure_sorted.keys():
    print("----------------------")
    print("recommendation:",asin)
    for i in items:
        print(i,":",Books[asin][i])

The top recommendations are:
----------------------
recommendation: 0842328572
Title : 1, 2, & 3 John (Life Application Bible Commentary)
SalesRank : 339372
TotalReviews : 0
AvgRating : 0.0
DegreeCentrality : 12
ClusteringCoeff : 0.75
----------------------
recommendation: 0842328610
Title : Acts (Life Application Bible Commentary)
SalesRank : 203202
TotalReviews : 1
AvgRating : 5.0
DegreeCentrality : 14
ClusteringCoeff : 0.68
----------------------
recommendation: 0842328130
Title : Ephesians (Life Application Bible Commentary)
SalesRank : 180843
TotalReviews : 0
AvgRating : 0.0
DegreeCentrality : 8
ClusteringCoeff : 0.81
----------------------
recommendation: 0842330313
Title : 1 Peter 2 Peter Jude (Life Application Bible Commentary)
SalesRank : 447658
TotalReviews : 0
AvgRating : 0.0
DegreeCentrality : 11
ClusteringCoeff : 0.72
----------------------
recommendation: 0842329749
Title : Life Application Bible Commentary: Philippians Colossians and Philemon (Life Application Bible Comm

## Compiled code

In [22]:
Item_ASIN = input()

degree = nx.ego_graph(Books_Graph,Item_ASIN,radius=1)
threshold = 0.5
Out_Graph = nx.Graph()

Weights={}

for source, destination, edge in degree.edges(data=True):
    if edge['weight'] >= threshold:
        Out_Graph.add_edge(source,destination,edge=edge['weight'])
        if (source==Item_ASIN):
            Weights[destination]=edge['weight']
            
Neighbors = [ASIN for ASIN in Out_Graph.neighbors(Item_ASIN)]

SalesRank=[]
TotalReviews=[]
AvgRating=[]
DegreeCentrality=[]
ClusteringCoeff=[]

for ASIN in Neighbors:
    SalesRank.append(Books[ASIN]['SalesRank'])
    TotalReviews.append(Books[ASIN]['TotalReviews'])
    AvgRating.append(Books[ASIN]['AvgRating'])
    DegreeCentrality.append(Books[ASIN]['DegreeCentrality'])
    ClusteringCoeff.append(Books[ASIN]['ClusteringCoeff'])
    
columns=['SalesRank','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']

df=pd.DataFrame(data={'Sales':SalesRank,'Reviews':TotalReviews,'Avg':AvgRating,'DC':DegreeCentrality,'CC':ClusteringCoeff},index=Neighbors)
df['DcCC']=pow(df['DC'],df['CC'])

def logtrans(n):
    if n ==0:
        return 0
    else:
        logn=np.log(n)
        return round(logn,2)

df['log_Review']=df['Reviews'].apply(logtrans)
df['log_Avg']=df['Avg'].apply(logtrans)
df['log_Sales']=df['Sales'].apply(logtrans)


df['Rating&Review_Score']=df['log_Review']*df['log_Avg']
df['DC&ClusterCoef_Score']=pow(df['DC'],df['CC'])

minmax=MinMaxScaler()
dftrans=pd.DataFrame(minmax.fit_transform(df[['DC&ClusterCoef_Score','log_Sales','Rating&Review_Score']]), columns=['DcCC_Score','minmax_Sales','R&R_Score'], index=df.index)
dftrans=pd.concat([df,dftrans],axis=1)

dftrans['Sales_Score']=round(1-dftrans['minmax_Sales'],2)
dftrans=dftrans.drop(['DC&ClusterCoef_Score','minmax_Sales','Rating&Review_Score'],axis=1)
dftrans['composite_score']=dftrans['DcCC_Score']+dftrans['R&R_Score']+(dftrans['Sales_Score']*1/5)

compositemeasure={}
for asin in dftrans.index:
    compositemeasure[asin]=round(dftrans['composite_score'].loc[asin],2)

compositemeasure_sorted=dict(sorted(compositemeasure.items(), key=itemgetter(1),reverse=True)[:5])

print("The top recommendations are:")
items=['Title','SalesRank','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']
for asin in compositemeasure_sorted.keys():
    print("----------------------")
    print("recommendation:",asin)
    for i in items:
        print(i,":",Books[asin][i])

0842328327
The top recommendations are:
----------------------
recommendation: 0842328572
Title : 1, 2, & 3 John (Life Application Bible Commentary)
SalesRank : 339372
TotalReviews : 0
AvgRating : 0.0
DegreeCentrality : 12
ClusteringCoeff : 0.75
----------------------
recommendation: 0842328610
Title : Acts (Life Application Bible Commentary)
SalesRank : 203202
TotalReviews : 1
AvgRating : 5.0
DegreeCentrality : 14
ClusteringCoeff : 0.68
----------------------
recommendation: 0842328130
Title : Ephesians (Life Application Bible Commentary)
SalesRank : 180843
TotalReviews : 0
AvgRating : 0.0
DegreeCentrality : 8
ClusteringCoeff : 0.81
----------------------
recommendation: 0842330313
Title : 1 Peter 2 Peter Jude (Life Application Bible Commentary)
SalesRank : 447658
TotalReviews : 0
AvgRating : 0.0
DegreeCentrality : 11
ClusteringCoeff : 0.72
----------------------
recommendation: 0842329749
Title : Life Application Bible Commentary: Philippians Colossians and Philemon (Life Application