### <font color='grey'>**Import the libraries**</font>  


In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Basically, the base :)
import numpy as np
import pandas as pd
import math
import json
import time


# some from one of the most useful library for machine learning 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

import joblib
import warnings; warnings.simplefilter('ignore')

# Scipy uses NumPy underneath: functions for optimization, stats and signal processing
import scipy.sparse
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds


# Data viz libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.figure_factory as ff
import plotly.graph_objects as go



#  <font color='orange'>**Extract, Trasformation and Load**</font>    
  
### <font color='grey'>**Import the data**</font>  

In [None]:
#define column name of the csv
cols_name=['USER_ID', 'PRODUCT_ID', 'RATINGS','TIMESTAMP']
# import the csv
amz_kindle_store=pd.read_csv('Kindle_Store.csv',names=cols_name)
# run the follow code to see the df
amz_kindle_store.head()

In [None]:
amz_kindle_store.info()


#### <font color='red'>Check the number of rows and columns</font>  


In [None]:
rows,columns=amz_kindle_store.shape
print('The number of rows is: ',rows)
print('Number of columns is: ',columns)

#  <font color='orange'>**Exploration Data Analysis**</font>    

#### <font color='red'>Count of NaNs</font>  


there isn't null values

In [None]:
counts = amz_kindle_store.isna().sum()
print(counts.sort_values())


percentages = round(amz_kindle_store.isna().mean() * 100, 1)
print(percentages.sort_values())

#### <font color='red'>Some summary indicator</font>  


In [None]:
#Summary statistics of rating variable
round(amz_kindle_store['RATINGS'].describe().transpose(),2)

In [None]:
fig = go.Figure(data=[go.Histogram(x=amz_kindle_store['RATINGS'],
                                   texttemplate="%{x}", 
                                   textfont_size=12)])
fig.show()

#### <font color='red'>How many Users and Product?</font>  


In [None]:
# Number of unique user id  in the data
print('Number of distinct users is = ', amz_kindle_store['USER_ID'].nunique())
# Number of unique product id  in the data
print('Number of distinct product is = ', amz_kindle_store['PRODUCT_ID'].nunique())

In [None]:
#Check the top 5 users/product based on ratings
userid_most_rated=amz_kindle_store.groupby('USER_ID').size().sort_values(ascending=False)[:5]
productid_most_rated=amz_kindle_store.groupby('PRODUCT_ID').size().sort_values(ascending=False)[:5]
print('Top 5 users based on ratings: \n',userid_most_rated)
print('Top 5 product based on ratings: \n',productid_most_rated)

## <font color='orange'>**Model Preparation**</font>   

Split the data randomnly into train and test datasets into 70:30 ratio




In [None]:
amz_kindle_train_data, amz_kindle_test_data = train_test_split(amz_kindle_store, test_size = 0.3, random_state=0)


In [None]:
print('Shape of training data: ',amz_kindle_train_data.shape)
print('Shape of testing data: ',amz_kindle_store.shape)

# **Model 1**:*  <font color='orange'>Dev Popularity Recommder model</font>  


It is a type of recommendation system which works on the principle of popularity and or anything which is in trend. These systems check about the product or movie which are in trend or are most popular among the users and directly recommend those.

In [None]:

#* Count of USER_ID for each distinct product as recommendation score 
amz_kindle_train_data_grouped = amz_kindle_train_data.groupby('PRODUCT_ID').agg({'USER_ID': 'count'}).reset_index()
amz_kindle_train_data_grouped.rename(columns = {'USER_ID': 'SCORE'},inplace=True)
#* Sort the kindle product Id on recommendation score (which is the count) 
amz_kindle_train_data_grouped = amz_kindle_train_data_grouped.sort_values(['SCORE', 'PRODUCT_ID'], ascending = [0,1]) 
#* let's generate a rank besed on score: who has the higher score will on the first place. Smaller score will be the last
amz_kindle_train_data_grouped['RANK'] = amz_kindle_train_data_grouped['SCORE'].rank(ascending=0, method='first') 
amz_kindle_train_data_grouped = amz_kindle_train_data_grouped.head(5)



In [None]:

#* Use popularity based recommender model to make predictions
def populariry_recommender_model(USER_ID):     
    user_recommendations = amz_kindle_train_data_grouped
          
    #Add USER_ID column for which the recommendations are being generated 
    user_recommendations['USER_ID'] = USER_ID 
      
    #Bring USER_ID column to the front 
    user_recommendations_columns = user_recommendations.columns.tolist() 
    user_recommendations_columns = user_recommendations_columns[-1:] + user_recommendations_columns[:-1] 
    user_recommendations = user_recommendations[user_recommendations_columns] 
          
    return user_recommendations 

In [None]:
# let's find the find the top5 recommended Kindle for user 11 and 17.
find_recommendation = [11,17]   
for user_id in find_recommendation:
    print("The list of recomended kindle for the user: %d\n" %(user_id))
    print(populariry_recommender_model(user_id)) 
    print(populariry_recommender_model(user_id))    
    print("\n") 

# **Model 2**:*  <font color='orange'>Dev Collaborative Filtering recommender model.</font>  
