# Interest Score Calculation
- In this a basic algorithm is proposed which is used to calculate the Interest score of user for each Product Category
- This score is calculated on the basis of user previous interaction with products and their average rating to any products in that specific category
- Different Weights are given to Different Interaction Type 
- A product Purchased is weighted more than product that are added to cart, and these are weighted more than just viewed products.
- The final Interest Score is calculated by Combining the score of Interaction Type and average rating score to any category

In [11]:
import pandas as pd

# Load data into dataframes
users = pd.read_csv('user.csv')
products = pd.read_csv('product.csv')
interactions = pd.read_csv('interactions.csv')
purchases = pd.read_csv('purchases.csv')

In [12]:
# Merge dataframes to get all relevant information in one place
df = pd.merge(interactions, users, on='User ID')
df = pd.merge(df, products, on='Product ID')
df = pd.merge(df, purchases, on=['User ID', 'Product ID'], how='left')

# Defining weights for different interaction types
weights = {'Viewed': 1, 'Added to Cart': 2, 'Purchased': 3}

In [16]:
# Calculating weighted interest score for each user and product category
df['Weighted Rating'] = df['Rating'] * df['Interaction Type'].map(weights)
interest = df.groupby(['User ID', 'Category'])['Weighted Rating'].mean().reset_index()
interest.columns = ['User ID', 'Category', 'Interest Score']

In [17]:
# Calculating average rating for each user and product category from purchases
purchases_df = pd.merge(purchases, products, on='Product ID')
purchases_df = purchases_df.groupby(['User ID', 'Category'])['Rating'].mean().reset_index()
purchases_df.columns = ['User ID', 'Category', 'Avg Purchase Rating']

In [18]:
# Merge interest scores and average purchase ratings
interest = pd.merge(interest, purchases_df, on=['User ID', 'Category'], how='left')

# Fill missing values with 0
interest['Avg Purchase Rating'] = interest['Avg Purchase Rating'].fillna(0)

# Calculate final interest score by taking the average of interest score and average purchase rating
interest['Final Interest Score'] = (interest['Interest Score'] + interest['Avg Purchase Rating']) / 2

# Save interest scores to file
interest.to_csv('interest_scores.csv', index=False)

In [19]:
#Interest scores for user with ID 
user_interest = interest[interest['User ID'] == 0]
print(user_interest)

   User ID     Category  Interest Score  Avg Purchase Rating  \
0        0     Clothing           11.64                 3.88   
1        0  Electronics             NaN                 0.00   
2        0         Home             NaN                 0.00   

   Final Interest Score  
0                  7.76  
1                   NaN  
2                   NaN  


# Interest Score Data Exploration

In [20]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
df_interest = pd.read_csv('interest_scores.csv')

In [22]:
df_interest.head()

Unnamed: 0,User ID,Category,Interest Score,Avg Purchase Rating,Final Interest Score
0,0,Clothing,11.64,3.88,7.76
1,0,Electronics,,0.0,
2,0,Home,,0.0,
3,1,Beauty,9.625,3.618,6.6215
4,1,Books,10.53,3.51,7.02


In [23]:
df_interest.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3610 entries, 0 to 3609
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   User ID               3610 non-null   int64  
 1   Category              3610 non-null   object 
 2   Interest Score        2321 non-null   float64
 3   Avg Purchase Rating   3610 non-null   float64
 4   Final Interest Score  2321 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 141.1+ KB


In [24]:
df_interest.describe()

Unnamed: 0,User ID,Interest Score,Avg Purchase Rating,Final Interest Score
count,3610.0,2321.0,3610.0,2321.0
mean,497.998338,7.994842,1.927876,5.496695
std,288.773116,2.711448,1.599567,1.759525
min,0.0,2.02,0.0,1.515
25%,246.0,6.03,0.0,4.3
50%,496.0,7.59,2.355357,5.34
75%,749.0,9.6975,3.23,6.586667
max,999.0,15.0,5.0,10.0
