In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter("ignore")

%matplotlib inline

In [None]:
#importing data file
recomd=pd.read_csv("/content/drive/MyDrive/Akshat/products - Orders - products - Orders.csv",names=['userId','productId','quantity','timestamp'])
recomd=pd.DataFrame(recomd)
recomd.head()


In [None]:
#dropping timestamp as its of no use
recomd.drop(['timestamp'],inplace=True,axis=1)


In [None]:
#checking for blanks
recomd.isnull().sum()
#there are no blanks in data

In [None]:
#No of unique customers and products
print("Unique Customers: ",recomd.userId.nunique())
print("Unique Products: ",recomd.productId.nunique())

In [None]:
#Top users, rated good on average
data_user_rate=pd.DataFrame(recomd.groupby('userId')['quantity'].agg(['mean','count']),
                            columns=['mean','count']).sort_values(by=['count','mean'],ascending=False)[:20]
data_user_rate.head(20)

In [None]:

#Wrodcloud of UserId
from wordcloud import WordCloud
data_user_word=recomd.groupby('userId').size().sort_values(ascending=False)
data_user_word.index = data_user_word.index.map(str)
wordcloud = WordCloud(background_color='white',random_state=1).generate_from_frequencies(data_user_word)
plt.figure(figsize=(15,15))
plt.imshow(wordcloud)
plt.axis("off")
plt.title("Word Cloud UserId",size=30)
plt.show()

In [None]:
#Top products bought have been rated as good on average 
data_prod_rate=pd.DataFrame(recomd.groupby('productId')['quantity'].agg(['mean','count']),
                            columns=['mean','count']).sort_values(by=['count','mean'],ascending=False)[:20]
data_prod_rate.head(20)

In [None]:
#Top 20 products basis no of ratings provided and average rating
fig, ax1 = plt.subplots(figsize=(15,6))
ax1.bar(data_prod_rate.index,data_prod_rate['count'],color='grey')
plt.xticks(rotation=90)
ax2 = ax1.twinx()
ax2.plot(data_prod_rate.index,data_prod_rate['mean'],color='orange')
plt.ylim(0, 5)
plt.title("Top 20 productId and its mean quantity")
plt.show()

In [None]:
#Wrodcloud of productId
data_prod_word=recomd.groupby('productId').size().sort_values(ascending=False)
data_prod_word.index = data_prod_word.index.map(str)
wordcloud = WordCloud(background_color='white',random_state=2).generate_from_frequencies(data_prod_word)
plt.figure(figsize=(15,15))
plt.imshow(wordcloud)
plt.axis("off")
plt.title("Word Cloud productId",size=30)
plt.show()

In [None]:
#Rating value_count, most ratings are either 4 or 5.
print(recomd.quantity.value_counts())
sns.catplot("quantity",data=recomd,kind='count')

In [None]:
#Mean of Rating
print("Mean of Rating:",np.mean(recomd.quantity))
#Median of Rating
print("Median of Rating:",np.median(recomd.quantity))

In [None]:
data_flt=recomd.userId.value_counts()

In [None]:
data_flt[data_flt.values>=10].index.shape

In [None]:
data_final=recomd[recomd['userId'].isin(data_flt.index)]

In [None]:
data_final.shape

In [None]:
print("No of users giving rating for atleast 40 products:",(len(data_final)/len(recomd))*100)

In [None]:

#Rating value_count
print(data_final.quantity.value_counts())


In [None]:
data_flt_prod=data_final.productId.value_counts()

In [None]:
print("Mean of Rating:",np.mean(data_final.quantity))
#Median of Rating
print("Median of Rating:",np.median(data_final.quantity))

In [None]:

data_final_1=data_final[data_final['productId'].isin(data_flt_prod[data_flt_prod>=1].index)]

In [None]:
data_final_1.groupby('productId').size().sort_values(ascending=False)

In [None]:
data_final_1.head(10)


In [None]:

print("A user quantity atleast 50 times and each product is rated atleast 5 times:"
      ,(len(data_final_1)/len(recomd))*100)

In [None]:
print("Mean of Rating:",np.mean(data_final_1.quantity))
#Median of Rating
print("Median of Rating:",np.median(data_final_1.quantity))

In [None]:
print(data_final_1.quantity.value_counts())


In [None]:
final_ratings_matrix = pd.pivot_table(data_final_1,index='userId',columns='productId',values = 'quantity').fillna(0)

In [None]:

# Matrix with one row per 'Product' and one column per 'user' for Item-based CF
final_ratings_matrix_T = final_ratings_matrix.transpose()
final_ratings_matrix_T.head()

In [None]:
from sklearn.model_selection import train_test_split
#Split the data randomnly into test and train datasets
#Split the training and test data in the ratio 70:30
train_data, test_data = train_test_split(data_final_1, test_size = 0.3, random_state=10)
train_data.head()

In [None]:
print(train_data.shape)
print(test_data.shape)

In [None]:
#Count of user_id for each unique product as recommendation score 
train_data_grouped = train_data.groupby('productId').agg({'userId':'count','quantity':'mean'}).reset_index()
train_data_grouped.rename(columns={'userId':'count','quantity':'mean'},inplace=True)
train_data_grouped.head()

In [None]:
#Sort the songs on recommendation score 
train_data_sort = train_data_grouped.sort_values(['count'], ascending = 0) 
      
#Generate a recommendation rank based upon score 
train_data_sort['Rank'] = train_data_sort['count'].rank(ascending=0, method='first') 
          
#Get the top 5 recommendations 
popularity_recommendations = train_data_sort.head(5) 
popularity_recommendations

In [None]:

# Use popularity based recommender model to make predictions
def recommend(user_id):     
    user_recommendations = popularity_recommendations 
          
    #Add user_id column for which the recommendations are being generated 
    user_recommendations['userId'] = user_id 
      
    #Bring user_id column to the front 
    cols = user_recommendations.columns.tolist() 
    cols = cols[-1:] + cols[:-1] 
    user_recommendations = user_recommendations[cols] 
          
    return user_recommendations

In [None]:
find_recom = [1267,385,40]   # This list is user choice.
for i in find_recom:
    print("Here is the recommendation for the userId: ", i)
    print("The products are most sold along with there mean rating:\n")
    print(recommend(i))    
    print("\n")

In [None]:
train_final_ratings_matrix = pd.pivot_table(train_data,index='userId',columns='productId',values = 'quantity').fillna(0)
test_final_ratings_matrix = pd.pivot_table(test_data,index='userId',columns='productId',values = 'quantity').fillna(0)

In [None]:

final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0], 1)
train_final_ratings_matrix['user_index'] = np.arange(0, train_final_ratings_matrix.shape[0], 1)
train_final_ratings_matrix.head(10)

In [None]:

#Test Data
test_final_ratings_matrix['user_index'] = np.arange(0, test_final_ratings_matrix.shape[0], 1)
test_final_ratings_matrix.head()

In [None]:

#Complete data
final_ratings_matrix.set_index(['user_index'], inplace=True)

#Train data
train_final_ratings_matrix.set_index(['user_index'], inplace=True)
train_final_ratings_matrix.head()

#Test Data
test_final_ratings_matrix.set_index(['user_index'], inplace=True)
test_final_ratings_matrix.head()

In [None]:
from scipy.sparse.linalg import svds
# Singular Value Decomposition
U, sigma, Vt = svds(train_final_ratings_matrix, k = 10)
# Construct diagonal array in SVD
sigma = np.diag(sigma)


In [None]:
train_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

# Predicted ratings
train_preds_df = pd.DataFrame(train_predicted_ratings, columns = train_final_ratings_matrix.columns)
train_preds_df.head()

In [None]:

# Recommend the items with the highest predicted ratings

def recommend_items(userID, final_ratings_matrix, preds_df, num_recommendations):
      
    user_idx = userID-1 # index starts at 0
    
    # Get and sort the user's ratings
    sorted_user_ratings = final_ratings_matrix.iloc[user_idx].sort_values(ascending=False)
    #sorted_user_ratings
    sorted_user_predictions = preds_df.iloc[user_idx].sort_values(ascending=False)
    #sorted_user_predictions

    temp = pd.concat([sorted_user_ratings, sorted_user_predictions], axis=1)
    temp.index.name = 'Recommended Items'
    temp.columns = ['user_ratings', 'user_predictions']
    
    temp = temp.loc[temp.user_ratings == 0]   
    temp = temp.sort_values('user_predictions', ascending=False)
    print('\nBelow are the recommended items for user(user_id = {}):\n'.format(userID))
    print(temp.head(num_recommendations))

In [None]:

#Enter 'userID' and 'num_recommendations' for the user #
userID = 3
num_recommendations = 5
recommend_items(userID, train_final_ratings_matrix, train_preds_df, num_recommendations)

In [None]:
# Average ACTUAL rating for each item
test_final_ratings_matrix.mean().head()

In [None]:

# Singular Value Decomposition
U_t, sigma_t, Vt_t = svds(test_final_ratings_matrix, k = 10)
# Construct diagonal array in SVD
sigma_t = np.diag(sigma_t)

In [None]:
test_predicted_ratings = np.dot(np.dot(U_t, sigma_t), Vt_t) 

# Predicted ratings
test_preds_df = pd.DataFrame(test_predicted_ratings, columns = test_final_ratings_matrix.columns)
test_preds_df.head()

In [None]:

# Average PREDICTED rating for each item
test_preds_df.mean().head()

In [None]:
rmse_df = pd.concat([test_final_ratings_matrix.mean(), test_preds_df.mean()], axis=1)
rmse_df.columns = ['Avg_actual_quantity', 'Avg_predicted_quantity']
print(rmse_df.shape)
rmse_df['item_index'] = np.arange(0, rmse_df.shape[0], 1)
rmse_df.head()

In [None]:
U, sigma, Vt = svds(final_ratings_matrix, k = 10)
# Construct diagonal array in SVD
sigma = np.diag(sigma)

In [None]:

predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

# Predicted ratings
preds_df = pd.DataFrame(predicted_ratings, columns = final_ratings_matrix.columns)

In [None]:
userID = 1267
num_recommendations = 5
recommend_items(userID, final_ratings_matrix, preds_df, num_recommendations)