In [1]:
import networkx
from operator import itemgetter
import matplotlib.pyplot
import pandas as pd

In [2]:
amazonBooks = pd.read_csv('./amazon-books.csv', index_col=0)

In [3]:
# Read the data from amazon-books-copurchase.adjlist;
# assign it to copurchaseGraph weighted Graph;
fhr=open("amazon-books-copurchase.edgelist", 'rb')
copurchaseGraph=networkx.read_weighted_edgelist(fhr)
fhr.close()

In [4]:
# Now let's assume a person is considering buying the following book;
# what else can we recommend to them based on copurchase behavior 
# we've seen from other users?
print ("Looking for Recommendations for Customer Purchasing this Book:")
purchasedAsin = '0805047905'

Looking for Recommendations for Customer Purchasing this Book:


In [5]:
# Let's first get some metadata associated with this book
print ("ASIN = ", purchasedAsin) 
print ("Title = ", amazonBooks.loc[purchasedAsin,'Title'])
print ("SalesRank = ", amazonBooks.loc[purchasedAsin,'SalesRank'])
print (" TotalReviews ", amazonBooks.loc[purchasedAsin,'TotalReviews'])
print ("AvgRating = ", amazonBooks.loc[purchasedAsin,'AvgRating'])
print ("DegreeCentrality = ", amazonBooks.loc[purchasedAsin,'DegreeCentrality'])
print ("ClusteringCoeff = ", amazonBooks.loc[purchasedAsin,'ClusteringCoeff'])

ASIN =  0805047905
Title =  Brown Bear, Brown Bear, What Do You See?
SalesRank =  171
 TotalReviews  172
AvgRating =  5.0
DegreeCentrality =  216
ClusteringCoeff =  0.01976744186046512


In [6]:
# Now let's look at the ego network associated with purchasedAsin in the copurchaseGraph - 
# which is esentially comprised of all the books that have been copurchased with this book in the past

purchasedAsinEgoGraph = networkx.Graph()
purchasedAsinEgoGraph = networkx.ego_graph(copurchaseGraph, purchasedAsin, radius=1)

In [7]:
# Next, recall that the edge weights in the copurchaseGraph is a measure of the similarity between the books connected by the edge.
#So we can use the island method to only retain those books that are highly simialr to the purchasedAsin

threshold = 0.5
purchasedAsinEgoTrimGraph = networkx.Graph()

for f, t, e in purchasedAsinEgoGraph.edges(data=True):
    if e['weight'] >= threshold:
        purchasedAsinEgoTrimGraph.add_edge(f,t,weight=e['weight'])

In [8]:
# Next, recall that given the purchasedAsinEgoTrimGraph you constructed above, you can get at the list of nodes connected to the purchasedAsin by a single 
        
purchasedAsinNeighbors = [i for i in purchasedAsinEgoTrimGraph.neighbors(purchasedAsin)] 

In [9]:
# Next, let's pick the Top Five book recommendations from among the purchasedAsinNeighbors based on one or more of the following data of the 
# neighboring nodes: SalesRank, AvgRating, TotalReviews, DegreeCentrality, and ClusteringCoeff

dfAB=pd.DataFrame(amazonBooks)

from sklearn.preprocessing import MinMaxScaler

mms = MinMaxScaler()
dfmms = pd.DataFrame(mms.fit_transform(dfAB[['TotalReviews','DegreeCentrality']]),
                       index = dfAB.index,
                       columns=['mms_TotalReviews', 'mms_DegreeCentrality']) 
df = pd.concat([dfAB, dfmms], axis=1)

df['AvgRating'] = df['AvgRating']/5

df['Composite_Measure'] = df['mms_TotalReviews']*df['AvgRating']+df['ClusteringCoeff']*df['mms_DegreeCentrality']

df = df.loc[purchasedAsinNeighbors]

df.sort_values(by=['Composite_Measure'], ascending=False, inplace = True)
df.drop(['Composite_Measure','mms_DegreeCentrality','mms_TotalReviews', 'Id'], axis = 1 , inplace = True)


Top_5_reco = df.iloc[0:5]
print('MY top 5 recommandations: ', Top_5_reco)

MY top 5 recommandations:                                                          Title  \
0399208534                        The Very Hungry Caterpillar   
0399220496  The Very Hungry Caterpillar Book with Plush Ca...   
0399237720  Very Hungry Caterpillar Giant Board Book and P...   
0399213015    The Very Hungry Caterpillar (MINIATURE EDITION)   
0399216596       Very Hungry Caterpillar Plush Toy (Toy Only)   

                                                   Categories  SalesRank  \
0399208534  stores animals specialty z favorites subjects ...       2117   
0399220496  stores animals specialty z subjects illustrato...     181105   
0399237720  movable stores animals specialty z subjects il...     112356   
0399213015  stores animals specialty z subjects illustrato...     431501   
0399216596  stores animals specialty z subjects illustrato...     467296   

            TotalReviews  AvgRating  DegreeCentrality  ClusteringCoeff  
0399208534           164        0.9                 