# Hybrid Based Filtering

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split, GridSearchCV, cross_validate
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import MinMaxScaler
from nltk.stem import PorterStemmer
import string

from IPython.display import display, clear_output, HTML
display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))
import warnings; warnings.simplefilter('ignore')

In [2]:
customer_data = pd.read_csv('../Dataset/customer_data_final.csv')
purchase_history = pd.read_csv('../Dataset/purchase_history.csv')
df = pd.read_csv('../Dataset/Item_data2.csv')

### Preprocessing

In [3]:
df['Combined_Text'] = (
        df['Name'].fillna('') + ' ' +
        df['Description'].fillna('') + ' ' +
        df['Tags'].fillna('') + ' ' +
        df['Brand'].fillna('') + ' ' +
        df['Category'].fillna('')
    )

## Content based

In [4]:
def compute_cosine_similarity(data):
    tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), stop_words='english')
    tfidf_matrix = tf.fit_transform(data['Combined_Text'])

    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    return cosine_sim


cosine_sim = compute_cosine_similarity(df)

## Collaborative Based

In [5]:
def train_svd_model_with_tuning(ground_truth_clean):
    reader = Reader()
    data = Dataset.load_from_df(ground_truth_clean[['ID', 'ProdID_List', 'Rating']], reader)

    param_grid = {
        'n_factors': [50, 100, 150],     
        'n_epochs': [10, 20, 30],        
        'lr_all': [0.005, 0.01, 0.02],    
        'reg_all': [0.02, 0.05, 0.1]     
    }

    grid_search = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5, n_jobs=-1)
    
    grid_search.fit(data)
    
    best_params = grid_search.best_params['rmse']
    print("Best RMSE score:", grid_search.best_score['rmse'])
    print("Best parameters:", best_params)
    
    best_svd = grid_search.best_estimator['rmse']
    trainset, testset = train_test_split(data, test_size=0.2)
    best_svd.fit(trainset)
    
    results = cross_validate(best_svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
    mean_rmse = results['test_rmse'].mean()
    mean_mae = results['test_mae'].mean()
    
    print(f"Mean RMSE across folds: {mean_rmse}")
    print(f"Mean MAE across folds: {mean_mae}")
    
    return best_svd


svd = train_svd_model_with_tuning(purchase_history)

Best RMSE score: 1.2693900549412913
Best parameters: {'n_factors': 150, 'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.02}
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2715  1.2684  1.2794  1.2829  1.2635  1.2731  0.0071  
MAE (testset)     1.0001  0.9970  0.9913  1.0011  0.9858  0.9951  0.0057  
Fit time          0.05    0.06    0.05    0.04    0.04    0.05    0.01    
Test time         0.00    0.00    0.01    0.00    0.00    0.00    0.00    
Mean RMSE across folds: 1.2731418427633945
Mean MAE across folds: 0.9950809222002128


# Mapping Both

In [6]:
def map_product_indices(data):
    data = data.reset_index()

    indices = pd.Series(data.index, index=data['Name']).drop_duplicates()
    
    id_map = data[['ProdID', 'Name']]
    
    id_map = id_map.set_index('Name')
    
    indices_map = id_map.set_index('ProdID')
    
    return indices, indices_map

indices, indices_map = map_product_indices(df)
print(indices.head(10))       
print(indices_map.head()) 

Name
Totally Products Apple Cider Vinegar High Potency (120 Capsules)                                                                                                         0
LOreal Paris Excellence Creme Triple Protection Color Creame 100% Gray Coverage Natural Ash Black Cooler 3A Hair Color, 1 Kt                                             1
Garnier Nutrisse Nourishing Hair Color Creme (Browns), 41 Dark Nude Brown, 1 kit                                                                                         2
Sensationail Gel Nail Color Polish &quotFuchsia Fab&quot, Red Gel Color, 0.25 fl oz (7.39 mL) (Packaging May Vary)                                                       3
2 Pack Air Purifying Bag Bamboo Charcoal Air Freshener Deodorizer Odor Eliminator Odor Absorber for Car and Closet 0.11lb/Pack                                           4
Wet Dry Unisex Painless Stainless Steel Blade Men Women s Fashion Personal Face Grooming Care Electric Nose Ear Face Nose Hair Removal Trimm

## Hybrid Based

In [7]:
def hybrid_recommendation(UserID, product_name, indices, data_cleaned, cosine_sim, svd, display=10):
    if product_name in indices:
        idx = indices[product_name]
    else:
        raise ValueError(f"Product '{product_name}' not found in indices.")
    
    sim_scores = list(enumerate(cosine_sim[int(idx)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]

    max_index = len(data_cleaned) - 1
    product_indices = [i[0] for i in sim_scores if i[0] <= max_index]
    
    if not product_indices:
        raise ValueError(f"No valid product indices found for '{product_name}'.")
    
    products = df.iloc[product_indices][['ProdID', 'Name', 'Rating', 'Price']]
    
    products['est'] = products['ProdID'].apply(lambda x: svd.predict(UserID, x).est)
    
    products = products.sort_values('est', ascending=False)
    
    return products.head(display), product_indices


### Example 1

In [8]:
# Example call:
recommendations, product_indices = hybrid_recommendation(
    1, 
    'LOreal Paris Excellence Creme Triple Protection Color Creame 100% Gray Coverage Natural Ash Black Cooler 3A Hair Color, 1 Kt', 
    indices, 
    df, 
    cosine_sim, 
    svd, 
    display=10
)

# View recommendations
recommendations

Unnamed: 0,ProdID,Name,Rating,Price,est
2966,7.0,Loreal Loreal Healthy Look Sublime Mousse Perm...,3.1,15.0,5.0
3653,7.0,LOreal Paris Excellence Creme Permanent Triple...,4.0,7.91,5.0
3711,35.0,LOreal Superior Preference - 6 Light Brown (Na...,3.3,33.36,4.302913
2786,5.0,"LOreal Superior Preference Les Blondissimes, L...",3.0,24.76,4.022856
1493,68.0,"Garnier Nutrisse Haircolor Creme, Black [10] 1...",2.7,47.78,3.626445
643,8.0,"LOreal Excellence Creme, Medium Brown [5] 1 Ea...",2.9,23.47,3.292576
2745,8.0,"Garnier Nutrisse Nourishing Hair Color Creme, ...",4.0,6.97,3.292576
1226,8.0,LOreal Paris Excellence Creme Triple Protectio...,5.0,62.81,3.292576
2802,9.0,LOreal Superior Preference - 8-1/2A Champagne ...,2.9,32.97,3.083447
59,6418.0,"Creme of Nature Colors Hair Color, 1 each",4.0,9.99,3.069644


In [9]:
# Example call:
recommendations, product_indices = hybrid_recommendation(
    2, 
    'LOreal Paris Excellence Creme Triple Protection Color Creame 100% Gray Coverage Natural Ash Black Cooler 3A Hair Color, 1 Kt', 
    indices, 
    df, 
    cosine_sim, 
    svd, 
    display=10
)

# View recommendations
recommendations

Unnamed: 0,ProdID,Name,Rating,Price,est
2755,2.0,JUST FOR MEN Color Gel Mustache & Beard M-35 M...,3.2,40.68,5.0
2,1.0,Garnier Nutrisse Nourishing Hair Color Creme (...,3.9,4.44,5.0
2811,1.0,"Garnier Nutrisse Nourishing Hair Color Creme, ...",4.0,6.97,5.0
2666,77.0,Garnier Nutrisse Nourishing Hair Color Creme (...,4.0,6.63,4.293477
1903,6.0,LOreal Paris Superior Preference Fade-Defying ...,3.7,8.97,4.157747
2786,5.0,"LOreal Superior Preference Les Blondissimes, L...",3.0,24.76,4.002225
731,4.0,LOreal Superior Preference - 9-1/2A Lightest A...,3.2,41.96,3.998054
1703,61.0,LOreal Excellence Creme - 9-1/2NB Lightest Nat...,5.0,34.24,3.235086
2802,9.0,LOreal Superior Preference - 8-1/2A Champagne ...,2.9,32.97,3.078569
3711,35.0,LOreal Superior Preference - 6 Light Brown (Na...,3.3,33.36,3.027807


### Example 2

In [10]:
# Example call:
recommendations, product_indices = hybrid_recommendation(
    1, 
    'Luxury Shower Cap for Women- Most Comfortable Fit, Waterproof & Mold Resistant, Reusable Shower Caps by Kitsch (Floral)', 
    indices, 
    df, 
    cosine_sim, 
    svd, 
    display=10
)

# View recommendations
recommendations

Unnamed: 0,ProdID,Name,Rating,Price,est
440,7.0,Thierry Mugler Angel Womens 7-ounce Shower Gel...,3.3,48.49,5.0
3272,7.0,Cedrat (Citron) Fresh Shower Gel-200ml/6.6oz,3.3,26.31,5.0
578,3.0,Neutrogena Rainbath Replenishing Shower/Bath G...,4.6,9.97,5.0
2205,1.111104e+39,Dove Shower Foam Body Wash Sensitive Skin 13.5 oz,4.5,5.94,3.729867
3676,477.0,"Alpha Keri Shower & Bath Moisture Rich Oil, 16 oz",4.0,15.16,3.576456
3904,1.001111e+42,(2 pack) Caress Botanicals Midnight Iris and V...,4.4,10.54,3.48755
353,60105.0,Wrapables Stylish Double Layer Waterproof Show...,4.9,14.99,3.384938
38,48.0,Zodaca Baby Kid Children Soft Shampoo Bath Sho...,2.0,5.99,3.288553
4021,472.0,"Le Male All-Over Shower Gel, 6.8 Oz",2.0,36.99,3.113573
3971,4819.0,Bvlgari Aqva Divina Bath & Shower Gel 3.4 oz -...,3.0,19.76,3.111748


In [11]:
# Example call:
recommendations, product_indices = hybrid_recommendation(
    2, 
    'Luxury Shower Cap for Women- Most Comfortable Fit, Waterproof & Mold Resistant, Reusable Shower Caps by Kitsch (Floral)', 
    indices, 
    df, 
    cosine_sim, 
    svd, 
    display=10
)

# View recommendations
recommendations

Unnamed: 0,ProdID,Name,Rating,Price,est
1063,2.0,Molton Brown Relaxing Ylang-Ylang Bath & Showe...,3.2,32.0,5.0
2977,7.940046e+39,AXE Apollo Body Wash 32 oz,4.7,6.97,4.438592
2224,6.0,Crabtree & Evelyn Rosewater Bath and Shower Ge...,3.0,29.97,4.157747
2840,1.005509e+42,"The Right To Shower Body Wash Joy, 16 oz",5.0,8.99,3.336826
3971,4819.0,Bvlgari Aqva Divina Bath & Shower Gel 3.4 oz -...,3.0,19.76,3.260731
1735,475.0,Bvlgari au the blanc (white tea) Shampoo and S...,3.0,36.97,3.173027
3904,1.001111e+42,(2 pack) Caress Botanicals Midnight Iris and V...,4.4,10.54,3.143117
2205,1.111104e+39,Dove Shower Foam Body Wash Sensitive Skin 13.5 oz,4.5,5.94,3.142302
3485,683.0,Kiehls Bath & Shower Liquid Body Cleanser - Co...,2.5,25.76,3.116806
1102,9.0,Neutrogena Rainbath Refreshing and Cleansing S...,4.7,9.97,3.078569


### Example 3

In [12]:
# Example call:
recommendations, product_indices = hybrid_recommendation(
    1, 
    'Clairol Natural Instincts Demi-Permanent Hair Color Creme 5R Medium Auburn, 1 Application', 
    indices, 
       df, 
    cosine_sim, 
    svd, 
    display=10
)

# View recommendations
recommendations

Unnamed: 0,ProdID,Name,Rating,Price,est
3731,5.0,Clairol Nicen Easy Permanent Hair Color Crème ...,3.1,6.92,4.022856
12,289.0,Clairol Professional Beautiful Collection Semi...,5.0,8.42,3.48784
1503,9886928000.0,Clairol Natural Instincts Hair Color 12A Light...,3.9,6.49,3.389619
3956,13.0,Clairol Natural Instincts Demi-Permanent Hair ...,3.6,6.99,3.352004
1399,8.0,Clairol Nice N Easy Permanent Color 7/106A Nat...,4.5,7.99,3.292576
2877,299080.0,"Clairol Natural Instincts Hair Color, 7GR Ligh...",5.0,18.99,3.285941
903,110.0,Clairol Natural Instincts Demi-Permanent Hair ...,3.6,6.92,3.230588
2819,6489.0,LOreal Paris Superior Preference Fade-Defying ...,3.7,8.97,3.108005
329,9.0,"Clairol Nice n Easy Permanent Hair Color, Natu...",4.5,7.29,3.083447
4062,2268.0,Clairol Nice n Easy Permanent Hair Color Creme...,4.5,6.92,3.069644


In [13]:
# Example call:
recommendations, product_indices = hybrid_recommendation(
    2, 
    'Clairol Natural Instincts Demi-Permanent Hair Color Creme 5R Medium Auburn, 1 Application', 
    indices, 
    df, 
    cosine_sim, 
    svd, 
    display=10
)

# View recommendations
recommendations

Unnamed: 0,ProdID,Name,Rating,Price,est
2319,2.0,Clairol Natural Instincts Demi-Permanent Hair ...,3.7,8.58,5.0
1918,1.0,Clairol Natural Instincts Demi-Permanent Hair ...,3.7,6.99,5.0
2666,77.0,Garnier Nutrisse Nourishing Hair Color Creme (...,4.0,6.63,4.293477
4033,88.0,Clairol Nice N Easy Permanent Hair Color Creme...,4.5,6.92,4.177925
1029,6.0,Clairol Nicen Easy Permanent Hair Color Crème ...,4.5,6.92,4.157747
3731,5.0,Clairol Nicen Easy Permanent Hair Color Crème ...,3.1,6.92,4.002225
2993,76.0,LOreal Paris Superior Preference Fade-Defying ...,3.8,8.97,3.984637
1503,9886928000.0,Clairol Natural Instincts Hair Color 12A Light...,3.9,6.49,3.633868
12,289.0,Clairol Professional Beautiful Collection Semi...,5.0,8.42,3.505557
239,53.0,Clairol nice n easy permanent hair color 4/120...,4.5,7.1,3.390772


In [15]:
import pickle

with open('hybrid_svd_model.pkl', 'wb') as f:
    pickle.dump(svd, f)