## Import necessary modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nltk
import string
import re
from nltk.corpus import stopwords
import networkx as nx
from stemming.porter2 import stem
import csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from operator import itemgetter

## Extracting features using Videos.txt
File for the same is also uploaded

In [2]:
Videos_file = open('./Videos.txt', 'r', encoding='utf-8', errors='ignore')
Videos = {}
Videos_file.readline()
for line in Videos_file:
    item = line.split('\t')
    MetaData = {}
    MetaData['Id'] = item[0].strip() 
    ASIN = item[1].strip()
    MetaData['Title'] = item[2].strip()
    MetaData['Categories'] = item[3].strip()
    MetaData['Group'] = item[4].strip()
    MetaData['SalesRank'] = int(item[5].strip())
    MetaData['TotalReviews'] = int(item[6].strip())
    MetaData['AvgRating'] = float(item[7].strip())
    MetaData['DegreeCentrality'] = int(item[8].strip())
    MetaData['ClusteringCoeff'] = float(item[9].strip())
    Videos[ASIN] = MetaData
Videos_file.close()

## Using Videos edge list

In [3]:
Videos_edgelist=open("Videos.edgelist", 'rb')
Videos_Graph=nx.read_weighted_edgelist(Videos_edgelist)
Videos_edgelist.close()

## Predicting co-purchased Videos's
Using constraints like composite_measure and degrees, we are predicting top co-purchased Videos's

In [4]:
Item_ASIN = input()

degree = nx.ego_graph(Videos_Graph,Item_ASIN,radius=1)
threshold = 0.1
Out_Graph = nx.Graph()

Weights={}

for source, destination, edge in degree.edges(data=True):
    if edge['weight'] >= threshold:
        Out_Graph.add_edge(source,destination,edge=edge['weight'])
        if (source==Item_ASIN):
            Weights[destination]=edge['weight']
            
Neighbors = [ASIN for ASIN in Out_Graph.neighbors(Item_ASIN)]

SalesRank=[]
TotalReviews=[]
AvgRating=[]
DegreeCentrality=[]
ClusteringCoeff=[]
Group=[]

for ASIN in Neighbors:
    SalesRank.append(Videos[ASIN]['SalesRank'])
    Group.append(Videos[ASIN]['Group'])
    TotalReviews.append(Videos[ASIN]['TotalReviews'])
    AvgRating.append(Videos[ASIN]['AvgRating'])
    DegreeCentrality.append(Videos[ASIN]['DegreeCentrality'])
    ClusteringCoeff.append(Videos[ASIN]['ClusteringCoeff'])
    
columns=['SalesRank','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']

df=pd.DataFrame(data={'Sales':SalesRank,'Reviews':TotalReviews,'Avg':AvgRating,'DC':DegreeCentrality,'CC':ClusteringCoeff},index=Neighbors)
df['DcCC']=pow(df['DC'],df['CC'])

def logtrans(n):
    if n ==0:
        return 0
    else:
        logn=np.log(n)
        return round(logn,2)

df['log_Review']=df['Reviews'].apply(logtrans)
df['log_Avg']=df['Avg'].apply(logtrans)
df['log_Sales']=df['Sales'].apply(logtrans)


df['Rating&Review_Score']=df['log_Review']*df['log_Avg']
df['DC&ClusterCoef_Score']=pow(df['DC'],df['CC'])

minmax=MinMaxScaler()
dftrans=pd.DataFrame(minmax.fit_transform(df[['DC&ClusterCoef_Score','log_Sales','Rating&Review_Score']]), columns=['DcCC_Score','minmax_Sales','R&R_Score'], index=df.index)
dftrans=pd.concat([df,dftrans],axis=1)

dftrans['Sales_Score']=round(1-dftrans['minmax_Sales'],2)
dftrans=dftrans.drop(['DC&ClusterCoef_Score','minmax_Sales','Rating&Review_Score'],axis=1)
dftrans['composite_score']=dftrans['DcCC_Score']+dftrans['R&R_Score']+(dftrans['Sales_Score']*1/5)

compositemeasure={}
for asin in dftrans.index:
    compositemeasure[asin]=round(dftrans['composite_score'].loc[asin],2)

compositemeasure_sorted=dict(sorted(compositemeasure.items(), key=itemgetter(1),reverse=True)[:5])

print("The top recommendations are:")
items=['Title','SalesRank','Group','TotalReviews','AvgRating','DegreeCentrality','ClusteringCoeff']
for asin in compositemeasure_sorted.keys():
    print("----------------------")
    print("recommendation:",asin)
    for i in items:
        print(i,":",Videos[asin][i])

630290899X
The top recommendations are:
----------------------
recommendation: 6300181308
Title : State of the Union
SalesRank : 858
Group : Video
TotalReviews : 13
AvgRating : 4.0
DegreeCentrality : 4
ClusteringCoeff : 0.77
----------------------
recommendation: 6301278437
Title : Mr. Lucky
SalesRank : 17598
Group : Video
TotalReviews : 10
AvgRating : 4.5
DegreeCentrality : 4
ClusteringCoeff : 0.43
----------------------
recommendation: B00000ICYF
Title : The Farmer's Daughter
SalesRank : 473
Group : Video
TotalReviews : 8
AvgRating : 4.5
DegreeCentrality : 4
ClusteringCoeff : 0.0
----------------------
recommendation: B00004TX2H
Title : The Sea of Grass
SalesRank : 25268
Group : Video
TotalReviews : 5
AvgRating : 4.0
DegreeCentrality : 4
ClusteringCoeff : 0.6
----------------------
recommendation: 6303360041
Title : None But the Lonely Heart
SalesRank : 13524
Group : Video
TotalReviews : 5
AvgRating : 4.0
DegreeCentrality : 6
ClusteringCoeff : 0.3
