## Import necessary modules

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nltk
import string
import re
from nltk.corpus import stopwords
import networkx as nx
from stemming.porter2 import stem
import csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from operator import itemgetter

## Extracting features using Music.txt
File for the same is also uploaded

In [7]:
Music_file = open('./Music.txt', 'r', encoding='utf-8', errors='ignore')
Music = {}
Music_file.readline()
for line in Music_file:
    item = line.split('\t')
    MetaData = {}
    MetaData['Id'] = item[0].strip() 
    ASIN = item[1].strip()
    MetaData['Title'] = item[2].strip()
    MetaData['Categories'] = item[3].strip()
    MetaData['Group'] = item[4].strip()
    MetaData['SalesRank'] = int(item[5].strip())
    MetaData['TotalReviews'] = int(item[6].strip())
    MetaData['AvgRating'] = float(item[7].strip())
    MetaData['DegreeCentrality'] = int(item[8].strip())
    MetaData['ClusteringCoeff'] = float(item[9].strip())
    Music[ASIN] = MetaData
Music_file.close()

## Using Music CDs edge list

In [8]:
Music_edgelist=open("Music.edgelist", 'rb')
Music_Graph=nx.read_weighted_edgelist(Music_edgelist)
Music_edgelist.close()

## Predicting co-purchased Music CDs
Using constraints like composite_measure and degrees, we are predicting top co-purchased Music CD

In [9]:
Item_ASIN = input()

degree = nx.ego_graph(Music_Graph,Item_ASIN,radius=1)
threshold = 0.1
Out_Graph = nx.Graph()

Weights={}

for source, destination, edge in degree.edges(data=True):
    if edge['weight'] >= threshold:
        Out_Graph.add_edge(source,destination,edge=edge['weight'])
        if (source==Item_ASIN):
            Weights[destination]=edge['weight']
            
Neighbors = [ASIN for ASIN in Out_Graph.neighbors(Item_ASIN)]

SalesRank=[]
TotalReviews=[]
AvgRating=[]
DegreeCentrality=[]
ClusteringCoeff=[]
Group=[]

for ASIN in Neighbors:
    SalesRank.append(Music[ASIN]['SalesRank'])
    Group.append(Music[ASIN]['Group'])
    TotalReviews.append(Music[ASIN]['TotalReviews'])
    AvgRating.append(Music[ASIN]['AvgRating'])
    DegreeCentrality.append(Music[ASIN]['DegreeCentrality'])
    ClusteringCoeff.append(Music[ASIN]['ClusteringCoeff'])
    
columns=['SalesRank','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']

df=pd.DataFrame(data={'Sales':SalesRank,'Reviews':TotalReviews,'Avg':AvgRating,'DC':DegreeCentrality,'CC':ClusteringCoeff},index=Neighbors)
df['DcCC']=pow(df['DC'],df['CC'])

def logtrans(n):
    if n ==0:
        return 0
    else:
        logn=np.log(n)
        return round(logn,2)

df['log_Review']=df['Reviews'].apply(logtrans)
df['log_Avg']=df['Avg'].apply(logtrans)
df['log_Sales']=df['Sales'].apply(logtrans)


df['Rating&Review_Score']=df['log_Review']*df['log_Avg']
df['DC&ClusterCoef_Score']=pow(df['DC'],df['CC'])

minmax=MinMaxScaler()
dftrans=pd.DataFrame(minmax.fit_transform(df[['DC&ClusterCoef_Score','log_Sales','Rating&Review_Score']]), columns=['DcCC_Score','minmax_Sales','R&R_Score'], index=df.index)
dftrans=pd.concat([df,dftrans],axis=1)

dftrans['Sales_Score']=round(1-dftrans['minmax_Sales'],2)
dftrans=dftrans.drop(['DC&ClusterCoef_Score','minmax_Sales','Rating&Review_Score'],axis=1)
dftrans['composite_score']=dftrans['DcCC_Score']+dftrans['R&R_Score']+(dftrans['Sales_Score']*1/5)

compositemeasure={}
for asin in dftrans.index:
    compositemeasure[asin]=round(dftrans['composite_score'].loc[asin],2)

compositemeasure_sorted=dict(sorted(compositemeasure.items(), key=itemgetter(1),reverse=True)[:5])

print("The top recommendations are:")
items=['Title','SalesRank','Group','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']
for asin in compositemeasure_sorted.keys():
    print("----------------------")
    print("recommendation:",asin)
    for i in items:
        print(i,":",Music[asin][i])

B00000AU3R
The top recommendations are:
----------------------
recommendation: B0000261KX
Title : Solo Concert
SalesRank : 72076
Group : Music
TotalReviews : 9
AvgRating : 5.0
DegreeCentrality : 9
ClusteringCoeff : 0.47
----------------------
recommendation: B000059OB9
Title : Anthem
SalesRank : 56048
Group : Music
TotalReviews : 9
AvgRating : 4.5
DegreeCentrality : 3
ClusteringCoeff : 0.83
----------------------
recommendation: B00006AM8D
Title : Songs Without End
SalesRank : 174906
Group : Music
TotalReviews : 1
AvgRating : 5.0
DegreeCentrality : 2
ClusteringCoeff : 0.0
