In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('ecommerce_dataset.csv')

In [3]:
df.head()

Unnamed: 0,Product_ID,Name,Category,Price,Rating,Reviews,Stock_Availability
0,P1000,Doll,Fashion,80.17,2.0,"Value for money, satisfied with the purchase.",Out of Stock
1,P1001,Smartphone,Electronics,162.39,3.4,"Quality is okay, could be better.",Out of Stock
2,P1002,T-Shirt,Fashion,184.34,4.5,"Value for money, satisfied with the purchase.",Available
3,P1003,Board Game,Fashion,308.1,4.1,"Not as described, a bit disappointed.",Available
4,P1004,Laptop,Home Appliances,479.9,2.6,"Value for money, satisfied with the purchase.",Available


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Product_ID          100 non-null    object 
 1   Name                100 non-null    object 
 2   Category            100 non-null    object 
 3   Price               100 non-null    float64
 4   Rating              100 non-null    float64
 5   Reviews             100 non-null    object 
 6   Stock_Availability  100 non-null    object 
dtypes: float64(2), object(5)
memory usage: 5.6+ KB


In [5]:
 from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
import numpy as np

In [6]:
df.fillna("",inplace=True)

In [7]:
df["Combined_Features"] = df["Category"].astype(str) + " " + df["Name"].astype(str) + " " + df["Reviews"].astype(str)


In [8]:
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["Combined_Features"])



In [9]:
content_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [10]:
preds_df = None
if {'User_ID', 'Product_ID', 'Rating'}.issubset(df.columns):
    user_item_matrix = df.pivot_table(index='User_ID', columns='Product_ID', values='Rating', aggfunc='mean').fillna(0)

    if user_item_matrix.shape[0] > 0 and user_item_matrix.shape[1] > 0:
        # Normalize and perform Singular Value Decomposition (SVD)
        user_ratings_mean = np.mean(user_item_matrix, axis=1)
        rating_matrix_demeaned = user_item_matrix - user_ratings_mean.values.reshape(-1, 1)
        
        # Ensure k is not larger than the smallest dimension
        k_value = min(50, min(user_item_matrix.shape) - 1)
        U, sigma, Vt = svds(rating_matrix_demeaned, k=k_value)
        sigma = np.diag(sigma)
        
        # Reconstruct predictions
        predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.values.reshape(-1, 1)
        preds_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)
    else:
        preds_df = None
else:
    preds_df = None

    

In [11]:
def hybrid_recommend(product_name, user_id=None, num_recommendations=5):
    if product_name.lower() not in df["Name"].str.lower().values:
        return "Product not found in the dataset."
    
    # Content-Based Recommendations
    idx = df[df["Name"].str.lower() == product_name.lower()].index[0]
    sim_scores = list(enumerate(content_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    content_based_recommendations = df.iloc[[i[0] for i in sim_scores]]['Name'].values.tolist()
    
    # Collaborative Filtering Recommendations
    collaborative_recommendations = []
    if user_id is not None and preds_df is not None and user_id in preds_df.index:
        user_ratings = preds_df.loc[user_id].sort_values(ascending=False)
        top_product_ids = user_ratings.head(num_recommendations).index
        collaborative_recommendations = df[df['Product_ID'].isin(top_product_ids)]['Name'].values.tolist()
    
    # Combine both recommendations
    final_recommendations = list(set(content_based_recommendations + collaborative_recommendations))
    return final_recommendations[:num_recommendations]

print(hybrid_recommend("Smartphone", user_id=101, num_recommendations=5))


['Smartphone', 'T-Shirt', 'Board Game', 'Doll']
