In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split 
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from IPython.display import display


In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_ratings_final.csv',\
                      encoding='latin-1')

In [3]:
df.head()

Unnamed: 0,userId,rating,prod_name
0,joshua,5,Pink Friday: Roman Reloaded Re-Up (w/dvd)
1,dorothy w,5,Lundberg Organic Cinnamon Toast Rice Cakes
2,dorothy w,5,Lundberg Organic Cinnamon Toast Rice Cakes
3,rebecca,1,K-Y Love Sensuality Pleasure Gel
4,walker557,1,K-Y Love Sensuality Pleasure Gel


In [4]:
df.tail()

Unnamed: 0,userId,rating,prod_name
29561,laurasnchz,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29562,scarlepadilla,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29563,liviasuexo,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29564,ktreed95,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29565,kcoopxoxo,5,L'or233al Paris Elvive Extraordinary Clay Reba...


In [5]:
df.isnull().sum()

userId       0
rating       0
prod_name    0
dtype: int64

In [6]:
df.duplicated().sum()

2194

In [7]:
display(df.sample(n=5, random_state=42)) # this find the easy way random pick the materials

Unnamed: 0,userId,rating,prod_name
28266,daitaliana23,5,"Storkcraft Tuscany Glider and Ottoman, Beige C..."
15603,beverly,5,"Lysol Concentrate Deodorizing Cleaner, Origina..."
7839,amy77,5,Clorox Disinfecting Wipes Value Pack Scented 1...
4850,dmann10101,5,The Resident Evil Collection 5 Discs (blu-Ray)
4699,morenito021582,5,The Resident Evil Collection 5 Discs (blu-Ray)


In [8]:
# data preprocessing
def apply_pivot(df1,fillby=None):
    if fillby is not None:
        return df1.pivot_table(index='userId', columns='prod_name',values='rating').fillna(fillby)
    return df1.pivot_table(index='userId', columns='prod_name',values='rating')

In [9]:
train,test=train_test_split(df,test_size=0.30,random_state=42)
test=test[test.userId.isin(train.userId )]

In [10]:
test

Unnamed: 0,userId,rating,prod_name
15603,beverly,5,"Lysol Concentrate Deodorizing Cleaner, Origina..."
7839,amy77,5,Clorox Disinfecting Wipes Value Pack Scented 1...
21203,meme,1,Nexxus Exxtra Gel Style Creation Sculptor
12965,shear12,5,Clorox Disinfecting Wipes Value Pack Scented 1...
2943,sandy,5,My Big Fat Greek Wedding 2 (blu-Ray + Dvd + Di...
...,...,...,...
21159,mike yhe,1,Nexxus Exxtra Gel Style Creation Sculptor
9293,ecyoung105,5,Clorox Disinfecting Wipes Value Pack Scented 1...
9539,deedee88,5,Clorox Disinfecting Wipes Value Pack Scented 1...
26195,sharon,4,Aveeno Baby Continuous Protection Lotion Sunsc...


In [11]:
train

Unnamed: 0,userId,rating,prod_name
10144,couponinglady,5,Clorox Disinfecting Wipes Value Pack Scented 1...
7003,gabyh760,4,Clorox Disinfecting Wipes Value Pack Scented 1...
23956,mikey,4,Godzilla 3d Includes Digital Copy Ultraviolet ...
12052,tay247,5,Clorox Disinfecting Wipes Value Pack Scented 1...
22680,phoenixablaze,3,Godzilla 3d Includes Digital Copy Ultraviolet ...
...,...,...,...
21575,karlagw94,4,Nexxus Exxtra Gel Style Creation Sculptor
5390,cliff,5,100:Complete First Season (blu-Ray)
860,joli89,5,Windex Original Glass Cleaner Refill 67.6oz (2...
15795,tropicalaqua,4,"Burt's Bees Lip Shimmer, Raisin"


In [12]:
#3.2 Apply pivot operation and fillna used to replace NaN values with 0 i.e. where user didn't made any rating
df_train_pivot=apply_pivot(df1= train,fillby=0)
df_test_pivot=apply_pivot(df1= test, fillby=0)

In [13]:
#3.3 dummy dataset (train and test)
dummy_train=train.copy()


In [14]:
dummy_train

Unnamed: 0,userId,rating,prod_name
10144,couponinglady,5,Clorox Disinfecting Wipes Value Pack Scented 1...
7003,gabyh760,4,Clorox Disinfecting Wipes Value Pack Scented 1...
23956,mikey,4,Godzilla 3d Includes Digital Copy Ultraviolet ...
12052,tay247,5,Clorox Disinfecting Wipes Value Pack Scented 1...
22680,phoenixablaze,3,Godzilla 3d Includes Digital Copy Ultraviolet ...
...,...,...,...
21575,karlagw94,4,Nexxus Exxtra Gel Style Creation Sculptor
5390,cliff,5,100:Complete First Season (blu-Ray)
860,joli89,5,Windex Original Glass Cleaner Refill 67.6oz (2...
15795,tropicalaqua,4,"Burt's Bees Lip Shimmer, Raisin"


In [15]:
dummy_train.isnull().sum()

userId       0
rating       0
prod_name    0
dtype: int64

In [16]:
dummy_train['rating']=dummy_train['rating'].apply(lambda x:0 if x >=1 else 1)

In [17]:
dummy_train=apply_pivot(df1=dummy_train, fillby=1)

In [18]:
dummy_test=test.copy()
dummy_test['rating']= dummy_test['rating'].apply(lambda x:1 if x >=1 else 0)
dummy_test=apply_pivot(df1=dummy_test, fillby=0)

In [19]:
dummy_train.isnull().sum()


prod_name
0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest        0
100:Complete First Season (blu-Ray)                                  0
2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black    0
2x Ultra Era with Oxi Booster, 50fl oz                               0
42 Dual Drop Leaf Table with 2 Madrid Chairs"                        0
                                                                    ..
Wedding Wishes Wedding Guest Book                                    0
Weleda Everon Lip Balm                                               0
Windex Original Glass Cleaner Refill 67.6oz (2 Liter)                0
Yes To Carrots Nourishing Body Wash                                  0
Yes To Grapefruit Rejuvenating Body Wash                             0
Length: 231, dtype: int64

In [20]:
dummy_test.isnull().sum()

prod_name
0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest                                              0
100:Complete First Season (blu-Ray)                                                                        0
Aussie Aussome Volume Shampoo, 13.5 Oz                                                                     0
Australian Gold Exotic Blend Lotion, SPF 4                                                                 0
Aveeno Baby Continuous Protection Lotion Sunscreen with Broad Spectrum SPF 55, 4oz                         0
                                                                                                          ..
Vaseline Intensive Care Lip Therapy Cocoa Butter                                                           0
Vicks Vaporub, Regular, 3.53oz                                                                             0
Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee    0
Windex Or

In [21]:
dummy_test

prod_name,0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest,100:Complete First Season (blu-Ray),"Aussie Aussome Volume Shampoo, 13.5 Oz","Australian Gold Exotic Blend Lotion, SPF 4","Aveeno Baby Continuous Protection Lotion Sunscreen with Broad Spectrum SPF 55, 4oz","Avery174 Ready Index Contemporary Table Of Contents Divider, 1-8, Multi, Letter",Axe Dry Anti-Perspirant Deodorant Invisible Solid Phoenix,"Banana Boat Sunless Summer Color Self Tanning Lotion, Light To Medium",Bisquick Original Pancake And Baking Mix - 40oz,Black Front Loading Frame Set (8.5x11) Set Of 12,...,Tresemme Kertatin Smooth Infusing Conditioning,Various - Country's Greatest Gospel:Gold Ed (cd),Various - Red Hot Blue:Tribute To Cole Porter (cd),Various Artists - Choo Choo Soul (cd),Vaseline Intensive Care Healthy Hands Stronger Nails,Vaseline Intensive Care Lip Therapy Cocoa Butter,"Vicks Vaporub, Regular, 3.53oz","Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee",Windex Original Glass Cleaner Refill 67.6oz (2 Liter),Yes To Carrots Nourishing Body Wash
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
143st,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1witch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37f5p,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 rooms 1 dog lotsa fur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8ellie24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
yummy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zach,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zburt5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zitro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### STEP 04: User-User Similarity

In [22]:
mean=np.nanmean(apply_pivot(df1=train),axis=1)

In [23]:
df_train_subtracted=(apply_pivot(df1=train).T -mean).T

In [24]:
# Make rating=0 where user hasn't given any rating
df_train_subtracted.fillna(0,inplace=True)

In [25]:
 #Creating the User Similarity Matrix using pairwise_distance function. shape of user_correlation is userXuser i.e. 2071X2071
user_correlation = 1 - pairwise_distances(df_train_subtracted, metric='cosine')
user_correlation[np.isnan(user_correlation)] = 0
 

In [26]:
# user_correlation[user_correlation<0] = 0
# Convert the user_correlation matrix into dataframe
user_correlation_df = pd.DataFrame(user_correlation)
user_correlation_df['userId'] = df_train_subtracted.index
user_correlation_df.set_index('userId',inplace=True)
user_correlation_df.columns = df_train_subtracted.index.tolist()
    

In [33]:
user_final_rating=np.dot(user_correlation,df_train_pivot)
user_final_rating_df = pd.DataFrame(user_final_rating, index=df_train_pivot.index, columns=df_train_pivot.columns)
    

In [34]:
user_final_rating_df.head()

prod_name,0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest,100:Complete First Season (blu-Ray),2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black,"2x Ultra Era with Oxi Booster, 50fl oz","42 Dual Drop Leaf Table with 2 Madrid Chairs""",4C Grated Parmesan Cheese 100% Natural 8oz Shaker,Africa's Best No-Lye Dual Conditioning Relaxer System Super,Alberto VO5 Salon Series Smooth Plus Sleek Shampoo,"All,bran Complete Wheat Flakes, 18 Oz.",Ambi Complexion Cleansing Bar,...,"Vicks Vaporub, Regular, 3.53oz",Voortman Sugar Free Fudge Chocolate Chip Cookies,Wagan Smartac 80watt Inverter With Usb,"Wallmount Server Cabinet (450mm, 9 RU)","Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee",Wedding Wishes Wedding Guest Book,Weleda Everon Lip Balm,Windex Original Glass Cleaner Refill 67.6oz (2 Liter),Yes To Carrots Nourishing Body Wash,Yes To Grapefruit Rejuvenating Body Wash
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00sab00,0.0,5.218276,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.816497,...,0.546994,0.0,0.816497,0.0,-2.828762,0.0,0.0,-0.202787,0.0,-3.627807
02deuce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0325home,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10ten,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
user_correlation_df

Unnamed: 0_level_0,00sab00,02deuce,0325home,1085,10ten,11677j,1234,1234567,123cat123,123charlie,...,zpalma,zt313,zubb,zuttle,zwithanx,zxcsdfd,zxjki,zyiah4,zzdiane,zzz1127
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00sab00,1.0,0.0,0.0,0.0,0.0,0.0,-0.612372,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
02deuce,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0325home,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1085,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10ten,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zxcsdfd,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zxjki,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zyiah4,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zzdiane,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##STEP 06: Find Top N recommendation for User (User-User

In [41]:
def find_top_recommendations(pred_rating_df, userid, topn):
    recommendation = pred_rating_df.loc[userid].sort_values(ascending=False)[:topn]
    recommendation = pd.DataFrame(recommendation).reset_index()
    recommendation.columns=['prod_name', 'predicted_ratings']
    
    
    return recommendation


user_input = str(input("Enter your user id"))
recommendation_user_user = find_top_recommendations(user_final_rating_df, user_input, 5)
recommendation_user_user['userId'] = user_input

print("Earlier rated products by user id:{} as below".format(user_input))
display(train[train['userId']==user_input].sort_values(['rating'],ascending=False))
print("Earlier rated products by user id:{} as below".format(user_input))
display(train[train['userId']==user_input].sort_values(['rating'],ascending=False))

Earlier rated products by user id:joshua as below


Unnamed: 0,userId,rating,prod_name
0,joshua,5,Pink Friday: Roman Reloaded Re-Up (w/dvd)
17718,joshua,5,Smead174 Recycled Letter Size Manila File Back...
22379,joshua,5,Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...
1541,joshua,3,Dark Shadows (includes Digital Copy) (ultravio...


Earlier rated products by user id:joshua as below


Unnamed: 0,userId,rating,prod_name
0,joshua,5,Pink Friday: Roman Reloaded Re-Up (w/dvd)
17718,joshua,5,Smead174 Recycled Letter Size Manila File Back...
22379,joshua,5,Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...
1541,joshua,3,Dark Shadows (includes Digital Copy) (ultravio...


## TEP 07: Evaluation (User-User) on test

In [42]:
##Filter user correlation only for user which is in test, test is subset/equal of train in terms of userId

In [43]:
user_correlation_test_df = user_correlation_df[user_correlation_df.index.isin(test.userId)]
user_correlation_test_df = user_correlation_test_df[list(set(test.userId))]

In [44]:
# user_correlation_test_df[user_correlation_test_df<0]=0
#Get test user predicted rating
test_user_predicted_ratings = np.dot(user_correlation_test_df, df_test_pivot)
test_user_predicted_ratings = np.multiply(test_user_predicted_ratings,dummy_test)

In [45]:
    #Get NaN where user never rated as it shouldn't contribute in calculating RMSE
test_user_predicted_ratings = test_user_predicted_ratings[test_user_predicted_ratings>0]

In [46]:
scaler = MinMaxScaler(feature_range=(1, 5))

In [47]:
scaler.fit(test_user_predicted_ratings)
test_user_predicted_ratings = scaler.transform(test_user_predicted_ratings)

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


In [48]:
total_non_nan = np.count_nonzero(~np.isnan(test_user_predicted_ratings))
rmse = (np.sum(np.sum((apply_pivot(df1 = test) - test_user_predicted_ratings)**2))/total_non_nan)**0.5
print(rmse)

2.692867512942061


  return reduction(axis=axis, out=out, **passkwargs)


In [51]:
pickle.dump(user_final_rating_df,open('E:\\model\\user_final_rating.pkl','wb'))

## Sentiment analysis

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split 
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from IPython.display import display


In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_ratings_final.csv',\
                      encoding='latin-1')

In [None]:
df.head()

Unnamed: 0,userId,rating,prod_name
0,joshua,5,Pink Friday: Roman Reloaded Re-Up (w/dvd)
1,dorothy w,5,Lundberg Organic Cinnamon Toast Rice Cakes
2,dorothy w,5,Lundberg Organic Cinnamon Toast Rice Cakes
3,rebecca,1,K-Y Love Sensuality Pleasure Gel
4,walker557,1,K-Y Love Sensuality Pleasure Gel


In [None]:
df.tail()

Unnamed: 0,userId,rating,prod_name
29561,laurasnchz,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29562,scarlepadilla,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29563,liviasuexo,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29564,ktreed95,5,L'or233al Paris Elvive Extraordinary Clay Reba...
29565,kcoopxoxo,5,L'or233al Paris Elvive Extraordinary Clay Reba...


In [None]:
df.isnull().sum()

userId       0
rating       0
prod_name    0
dtype: int64

In [None]:
df.duplicated().sum()

2194

In [None]:
display(df.sample(n=5, random_state=42)) # this find the easy way random pick the materials

Unnamed: 0,userId,rating,prod_name
28266,daitaliana23,5,"Storkcraft Tuscany Glider and Ottoman, Beige C..."
15603,beverly,5,"Lysol Concentrate Deodorizing Cleaner, Origina..."
7839,amy77,5,Clorox Disinfecting Wipes Value Pack Scented 1...
4850,dmann10101,5,The Resident Evil Collection 5 Discs (blu-Ray)
4699,morenito021582,5,The Resident Evil Collection 5 Discs (blu-Ray)


In [None]:
# data preprocessing
def apply_pivot(df1,fillby=None):
    if fillby is not None:
        return df1.pivot_table(index='userId', columns='prod_name',values='rating').fillna(fillby)
    return df1.pivot_table(index='userId', columns='prod_name',values='rating')

In [None]:
train,test=train_test_split(df,test_size=0.30,random_state=42)
test=test[test.userId.isin(train.userId )]

In [None]:
test

Unnamed: 0,userId,rating,prod_name
15603,beverly,5,"Lysol Concentrate Deodorizing Cleaner, Origina..."
7839,amy77,5,Clorox Disinfecting Wipes Value Pack Scented 1...
21203,meme,1,Nexxus Exxtra Gel Style Creation Sculptor
12965,shear12,5,Clorox Disinfecting Wipes Value Pack Scented 1...
2943,sandy,5,My Big Fat Greek Wedding 2 (blu-Ray + Dvd + Di...
...,...,...,...
21159,mike yhe,1,Nexxus Exxtra Gel Style Creation Sculptor
9293,ecyoung105,5,Clorox Disinfecting Wipes Value Pack Scented 1...
9539,deedee88,5,Clorox Disinfecting Wipes Value Pack Scented 1...
26195,sharon,4,Aveeno Baby Continuous Protection Lotion Sunsc...


In [None]:
train

Unnamed: 0,userId,rating,prod_name
10144,couponinglady,5,Clorox Disinfecting Wipes Value Pack Scented 1...
7003,gabyh760,4,Clorox Disinfecting Wipes Value Pack Scented 1...
23956,mikey,4,Godzilla 3d Includes Digital Copy Ultraviolet ...
12052,tay247,5,Clorox Disinfecting Wipes Value Pack Scented 1...
22680,phoenixablaze,3,Godzilla 3d Includes Digital Copy Ultraviolet ...
...,...,...,...
21575,karlagw94,4,Nexxus Exxtra Gel Style Creation Sculptor
5390,cliff,5,100:Complete First Season (blu-Ray)
860,joli89,5,Windex Original Glass Cleaner Refill 67.6oz (2...
15795,tropicalaqua,4,"Burt's Bees Lip Shimmer, Raisin"


In [None]:
#3.2 Apply pivot operation and fillna used to replace NaN values with 0 i.e. where user didn't made any rating
df_train_pivot=apply_pivot(df1= train,fillby=0)
df_test_pivot=apply_pivot(df1= test, fillby=0)

In [None]:
#3.3 dummy dataset (train and test)
dummy_train=train.copy()


In [None]:
dummy_train

Unnamed: 0,userId,rating,prod_name
10144,couponinglady,5,Clorox Disinfecting Wipes Value Pack Scented 1...
7003,gabyh760,4,Clorox Disinfecting Wipes Value Pack Scented 1...
23956,mikey,4,Godzilla 3d Includes Digital Copy Ultraviolet ...
12052,tay247,5,Clorox Disinfecting Wipes Value Pack Scented 1...
22680,phoenixablaze,3,Godzilla 3d Includes Digital Copy Ultraviolet ...
...,...,...,...
21575,karlagw94,4,Nexxus Exxtra Gel Style Creation Sculptor
5390,cliff,5,100:Complete First Season (blu-Ray)
860,joli89,5,Windex Original Glass Cleaner Refill 67.6oz (2...
15795,tropicalaqua,4,"Burt's Bees Lip Shimmer, Raisin"


In [None]:
dummy_train.isnull().sum()

userId       0
rating       0
prod_name    0
dtype: int64

In [None]:
dummy_train['rating']=dummy_train['rating'].apply(lambda x:0 if x >=1 else 1)

In [None]:
dummy_train=apply_pivot(df1=dummy_train, fillby=1)

In [None]:
dummy_test=test.copy()
dummy_test['rating']= dummy_test['rating'].apply(lambda x:1 if x >=1 else 0)
dummy_test=apply_pivot(df1=dummy_test, fillby=0)

In [None]:
dummy_train.isnull().sum()


prod_name
0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest        0
100:Complete First Season (blu-Ray)                                  0
2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black    0
2x Ultra Era with Oxi Booster, 50fl oz                               0
42 Dual Drop Leaf Table with 2 Madrid Chairs"                        0
                                                                    ..
Wedding Wishes Wedding Guest Book                                    0
Weleda Everon Lip Balm                                               0
Windex Original Glass Cleaner Refill 67.6oz (2 Liter)                0
Yes To Carrots Nourishing Body Wash                                  0
Yes To Grapefruit Rejuvenating Body Wash                             0
Length: 231, dtype: int64

In [None]:
dummy_test.isnull().sum()

prod_name
0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest                                              0
100:Complete First Season (blu-Ray)                                                                        0
Aussie Aussome Volume Shampoo, 13.5 Oz                                                                     0
Australian Gold Exotic Blend Lotion, SPF 4                                                                 0
Aveeno Baby Continuous Protection Lotion Sunscreen with Broad Spectrum SPF 55, 4oz                         0
                                                                                                          ..
Vaseline Intensive Care Lip Therapy Cocoa Butter                                                           0
Vicks Vaporub, Regular, 3.53oz                                                                             0
Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee    0
Windex Or

In [None]:
dummy_test

prod_name,0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest,100:Complete First Season (blu-Ray),"Aussie Aussome Volume Shampoo, 13.5 Oz","Australian Gold Exotic Blend Lotion, SPF 4","Aveeno Baby Continuous Protection Lotion Sunscreen with Broad Spectrum SPF 55, 4oz","Avery174 Ready Index Contemporary Table Of Contents Divider, 1-8, Multi, Letter",Axe Dry Anti-Perspirant Deodorant Invisible Solid Phoenix,"Banana Boat Sunless Summer Color Self Tanning Lotion, Light To Medium",Bisquick Original Pancake And Baking Mix - 40oz,Black Front Loading Frame Set (8.5x11) Set Of 12,...,Tresemme Kertatin Smooth Infusing Conditioning,Various - Country's Greatest Gospel:Gold Ed (cd),Various - Red Hot Blue:Tribute To Cole Porter (cd),Various Artists - Choo Choo Soul (cd),Vaseline Intensive Care Healthy Hands Stronger Nails,Vaseline Intensive Care Lip Therapy Cocoa Butter,"Vicks Vaporub, Regular, 3.53oz","Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee",Windex Original Glass Cleaner Refill 67.6oz (2 Liter),Yes To Carrots Nourishing Body Wash
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
143st,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1witch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37f5p,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 rooms 1 dog lotsa fur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8ellie24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
yummy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zach,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zburt5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zitro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### STEP 04: User-User Similarity

In [None]:
mean=np.nanmean(apply_pivot(df1=train),axis=1)

In [None]:
df_train_subtracted=(apply_pivot(df1=train).T -mean).T

In [None]:
# Make rating=0 where user hasn't given any rating
df_train_subtracted.fillna(0,inplace=True)

In [None]:
 #Creating the User Similarity Matrix using pairwise_distance function. shape of user_correlation is userXuser i.e. 2071X2071
user_correlation = 1 - pairwise_distances(df_train_subtracted, metric='cosine')
user_correlation[np.isnan(user_correlation)] = 0
 

In [None]:
# user_correlation[user_correlation<0] = 0
# Convert the user_correlation matrix into dataframe
user_correlation_df = pd.DataFrame(user_correlation)
user_correlation_df['userId'] = df_train_subtracted.index
user_correlation_df.set_index('userId',inplace=True)
user_correlation_df.columns = df_train_subtracted.index.tolist()


In [None]:
user_correlation_df

Unnamed: 0_level_0,00sab00,02deuce,0325home,1085,10ten,11677j,1234,1234567,123cat123,123charlie,...,zpalma,zt313,zubb,zuttle,zwithanx,zxcsdfd,zxjki,zyiah4,zzdiane,zzz1127
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00sab00,1.0,0.0,0.0,0.0,0.0,0.0,-0.612372,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
02deuce,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0325home,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1085,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10ten,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zxcsdfd,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zxjki,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zyiah4,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zzdiane,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##STEP 06: Find Top N recommendation for User (User-User

In [None]:
def find_top_recommendations(pred_rating_df, userid, topn):
    recommendation = pred_rating_df.loc[userid].sort_values(ascending=False)[:topn]
    recommendation = pd.DataFrame(recommendation).reset_index()
    recommendation.columns=['prod_name', 'predicted_ratings']
    
    
    return recommendation


user_input = str(input("Enter your user id"))
recommendation_user_user = find_top_recommendations(user_final_rating_df, user_input, 5)
recommendation_user_user['userId'] = user_input

print("Earlier rated products by user id:{} as below".format(user_input))
display(train[train['userId']==user_input].sort_values(['rating'],ascending=False))
print("Earlier rated products by user id:{} as below".format(user_input))
display(train[train['userId']==user_input].sort_values(['rating'],ascending=False))

Earlier rated products by user id:joshua as below


Unnamed: 0,userId,rating,prod_name
0,joshua,5,Pink Friday: Roman Reloaded Re-Up (w/dvd)
17718,joshua,5,Smead174 Recycled Letter Size Manila File Back...
22379,joshua,5,Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...
1541,joshua,3,Dark Shadows (includes Digital Copy) (ultravio...


Earlier rated products by user id:joshua as below


Unnamed: 0,userId,rating,prod_name
0,joshua,5,Pink Friday: Roman Reloaded Re-Up (w/dvd)
17718,joshua,5,Smead174 Recycled Letter Size Manila File Back...
22379,joshua,5,Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...
1541,joshua,3,Dark Shadows (includes Digital Copy) (ultravio...


In [None]:
##Filter user correlation only for user which is in test, test is subset/equal of train in terms of userId