In [None]:
!pip install sentence-transformers

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

from sentence_transformers import SentenceTransformer

In [None]:
data = pd.read_csv('ratings_Electronics.csv')

In [None]:
data.columns

In [None]:
data.columns = ['reviewerID','asin','rating','unixReviewTime']

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
# User Features For KMeans

user_feature = (
    data.groupby('reviewerID')
    .agg(
        avg_rating = ('rating','mean'),
        rating_count = ('rating','count'),
        recency = ('unixReviewTime','max')
    )
    .reset_index()
)

In [None]:
user_feature

In [None]:
user_feature.columns

In [None]:
user_feature.shape

In [None]:
scaler = StandardScaler()

X_user = scaler.fit_transform(user_feature[['avg_rating','rating_count','recency']])

In [None]:
inertia = []
k_range = range(2,10)

for i in k_range:
  km = KMeans(n_clusters=i,init="k-means++",random_state=42,n_init=10)
  km.fit(X_user)
  inertia.append(km.inertia_)

plt.plot(k_range,inertia,marker='o')
plt.title('Elbow Method')
plt.xlabel('No. of clusters')
plt.ylabel('Inertia')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=5,init='k-means++',random_state=42,n_init=10)
user_feature['cluster'] = km.fit_predict(X_user)

In [None]:
data = data.merge(
    user_feature[['reviewerID','cluster']],
    on = 'reviewerID',
    how = 'left'
)

In [None]:
data.columns

In [None]:
# Item Embedding

product_text = (
    data.groupby('asin')['rating']
    .apply(lambda x: ' '.join(x.astype(str)))
    .reset_index()
)

In [None]:
product_text

In [None]:
# Loading Embedding Model

model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
product_embedding = model.encode(
    product_text['rating'].tolist(),
    show_progress_bar = True
)

Batches:   0%|          | 0/14876 [00:00<?, ?it/s]

In [None]:
def get_user_embedding(user_id,last_n=3):
  user_data = (
      data[data['reviewerID']==user_id]
      .sort_values('unixReviewTime')
      .tail(last_n)
  )

  if len(user_data) == 0:
    return None

  text = ' '.join(user_data['rating'].astype(str))
  return model.encode(text)

In [None]:
def hybrid_recommend(user_id, top_k=5):

    user_cluster = user_feature[
        user_feature['reviewerID'] == user_id
    ]['cluster'].values

    if len(user_cluster) == 0:
        return "User not found"

    cluster_id = user_cluster[0]

    # products popular in same cluster
    cluster_products = data[data['cluster'] == cluster_id]['asin'].unique()

    prod_df = product_text[product_text['asin'].isin(cluster_products)]

    user_vec = get_user_embedding(user_id)

    if user_vec is None:
        return "No interaction history"

    prod_idx = prod_df.index
    prod_vecs = product_embedding[prod_idx]

    sims = cosine_similarity([user_vec], prod_vecs)[0]

    top_idx = sims.argsort()[-top_k:][::-1]

    return prod_df.iloc[top_idx][['asin']]


In [None]:
sample_user = data['reviewerID'].iloc[0]
hybrid_recommend(sample_user)