In [55]:
import pandas as pd
import scipy.sparse as sparse
import numpy as np
import random
import implicit
import joblib
from sklearn.preprocessing import MinMaxScaler

In [56]:
products_df = pd.read_csv("../IP2022_Spring_8451/data/products.csv",index_col=0)
#Importing the final merged cvs file into transactions dataframe
transactions = pd.read_csv("FinalMergedData.csv")

In [57]:
#Performs data-preprocessing on the input dataframe
#One mandatory parameter: df
#options parameters: cap_value, bins, weights, byBasket
def generateRequiredDataframe(df,cap_value = 30, bins = [0,1,5,30],weights = [1,2,3], byBasket=True):
    if(byBasket):
        df = df.loc[:,["household_id","basket_id","product_id","quantity"]]
        df_preprocessed = df.groupby(["household_id","product_id"]).agg(times_purchased = ("basket_id","count")).reset_index()
        df_preprocessed.loc[df_preprocessed.times_purchased >= cap_value,"times_purchased"] = cap_value
        df_preprocessed["weight"]=pd.cut(df_preprocessed.times_purchased,bins=bins,labels=weights)
    else:
        df = df.loc[:,["household_id","product_id","quantity"]]
        df.loc[df.quantity==0,"quantity"] = 1
        df_preprocessed = df.groupby(["household_id","product_id"]).agg(total_purchase = ("quantity","sum")).reset_index()
        df_preprocessed.loc[df_preprocessed.total_purchase >= cap_value,"total_purchase"] = cap_value
        df_preprocessed["weight"]=pd.cut(df_preprocessed.total_purchase,bins=bins,labels=weights)
    
    #Converting household_id and product_id into categorical variable and equivalent dummy codes
    df_preprocessed['household_id_cat'] = df_preprocessed['household_id'].astype("category")
    df_preprocessed['product_id_cat'] = df_preprocessed['product_id'].astype("category")
    df_preprocessed['household_id_cat'] = df_preprocessed['household_id_cat'].cat.codes
    df_preprocessed['product_id_cat'] = df_preprocessed['product_id_cat'].cat.codes    
    return df_preprocessed

#result = generateRequiredDataframe(transactions)
result = generateRequiredDataframe(transactions,500,[0,1,5,501],[1,2,3],False)
result

Unnamed: 0,household_id,product_id,total_purchase,weight,household_id_cat,product_id_cat
0,1,819312.0,1,1,0,1599
1,1,820165.0,4,2,0,1670
2,1,821815.0,2,2,0,1794
3,1,821867.0,3,2,0,1803
4,1,823721.0,1,1,0,1942
...,...,...,...,...,...,...
408760,2500,16059046.0,2,2,1531,47542
408761,2500,16223114.0,1,1,1531,47689
408762,2500,16223404.0,3,2,1531,47710
408763,2500,16809391.0,6,3,1531,47914


In [58]:
#Creating 2 sparse matrix with weight as the input data
def createSparseMatrix(df):
    sparse_product_household = sparse.csr_matrix((df['weight'].astype(float), (df['product_id_cat'].astype(int), df['household_id_cat'].astype(int))))
    sparse_household_product = sparse.csr_matrix((df['weight'].astype(float), (df['household_id_cat'].astype(int), df['product_id_cat'].astype(int))))
    return (sparse_product_household,sparse_household_product)

#training an ALS model for recommending new products to the household
def trainModel(sparse_product_household,latent_factors =40,regularization_rate = 0.1,n_iterations = 100):
    #using implicit package for creating ALS model with n latent factors
    model = implicit.als.AlternatingLeastSquares(factors=latent_factors, regularization=regularization_rate, iterations=n_iterations)
    #Intensifying the weights
    alpha_val = 15
    data_conf = (sparse_product_household * alpha_val).astype('double')
    #Training the ALS model using updated sparse matrix
    model.fit(data_conf)
    joblib.dump(model,"model")

#Recommending new products
#input: preprocessed dataframe, household_id, household_product matrix
def getRecommendations(df,household_id,sparse_household_product):
    model = joblib.load("model")
    userid_cat = df.loc[df.household_id==household_id,"household_id_cat"].iloc[0]
    recommended = model.recommend(userid_cat, sparse_household_product)
    #Iterating over the recommendations
    recommended_products = []
    recommendation_score =[]
    for item in recommended:
        idx, score = item 
        recommendation_score.append(score)
        recommended_products.append(df.loc[df.product_id_cat == idx, "product_id"].iloc[0])
    result = pd.DataFrame(data = {"product_id":recommended_products,"recommendation_score":recommendation_score})
    result = pd.merge(result,products_df,on="product_id",how="left")
    return result

# Returns a dataframe with "n" similar products
def getSimilarProducts(df,product_id,n_similar=10):
    model = joblib.load("model")
    item_id = df.loc[df.product_id==product_id,"product_id_cat"].iloc[0]
    similar = model.similar_items(item_id, n_similar)
    product_ids = []
    similarity_score =[]
    for item in similar:
        idx, score = item
        similarity_score.append(score)
        product_ids.append(df.loc[df.product_id_cat == idx, "product_id"].iloc[0])
    result = pd.DataFrame(data = {"product_id":product_ids,"similarity_score":similarity_score})
    result = pd.merge(result,products_df,on="product_id",how="left")
    return result

In [59]:
#generating sparse matrix from the preprocessed dataframe
sparse_product_household,sparse_household_product = createSparseMatrix(result)

In [60]:
#training the model
trainModel(sparse_product_household)

  0%|          | 0/100 [00:00<?, ?it/s]

In [61]:
#Using ALS model to get recommendations
recommendations = getRecommendations(result,2479,sparse_household_product)
recommendations

Unnamed: 0,product_id,recommendation_score,manufacturer_id,department,brand,product_category,product_type,package_size
0,9881593.0,1.863199,1102,GROCERY,National,BAKED SWEET GOODS,SNACK CAKE - MULTI PACK,8 OZ
1,9878513.0,1.828372,69,GROCERY,Private,FACIAL TISS/DNR NAPKIN,FACIAL TISSUE & PAPER HANDKE,85 CT
2,7441498.0,1.714761,69,GROCERY,Private,ICE CREAM/MILK/SHERBTS,PREMIUM,56 OZ
3,844818.0,1.639786,544,GROCERY,National,BAG SNACKS,CORN CHIPS,10 OZ
4,9553397.0,1.626923,764,GROCERY,National,BATH TISSUES,TOILET TISSUE,
5,991268.0,1.595737,1457,GROCERY,National,BAG SNACKS,PRETZELS,12 OZ
6,963226.0,1.583489,69,GROCERY,Private,FRZN POTATOES,FRZN FRENCH FRIES,28 OZ
7,838842.0,1.580335,586,GROCERY,National,COOKIES/CONES,TRAY PACK/CHOC CHIP COOKIES,15 OZ
8,13671759.0,1.572609,531,DRUG GM,National,CANDY - PACKAGED,CANDY BARS (MULTI PACK),6.5 OZ
9,9707240.0,1.562341,665,MEAT-PCKGD,National,HOT DOGS,PREMIUM - MEAT,14 OZ


In [62]:
#Recommendation by number of times a product was purchased in different transactions
result = generateRequiredDataframe(transactions)
sparse_product_household,sparse_household_product = createSparseMatrix(result)
trainModel(sparse_product_household)
recommendations = getRecommendations(result,1,sparse_household_product)
recommendations

  0%|          | 0/100 [00:00<?, ?it/s]

Unnamed: 0,product_id,recommendation_score,manufacturer_id,department,brand,product_category,product_type,package_size
0,8090532.0,1.552644,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ
1,948953.0,1.344214,917,DRUG GM,National,BATTERIES,ALKALINE BATTERIES,
2,5568447.0,1.232502,764,GROCERY,National,PAPER TOWELS,PAPER TOWELS & HOLDERS,141.1 SQ FT
3,960732.0,1.210364,194,GROCERY,National,COLD CEREAL,ALL FAMILY CEREAL,14 OZ
4,5572828.0,1.200409,2224,GROCERY,National,SOFT DRINKS,SFT DRNK SNGL SRV BTL CARB (EX,20 OZ
5,1051211.0,1.186922,1266,GROCERY,National,PNT BTR/JELLY/JAMS,PEANUT BUTTER,18 OZ
6,856345.0,1.173014,1884,GROCERY,National,MILK BY-PRODUCTS,SOUR CREAMS,16 OZ
7,1059527.0,1.163446,1646,PRODUCE,National,VALUE ADDED VEGETABLES,CUT VEGETABLES ALL OTHER,10 OZ
8,1043128.0,1.150819,827,GROCERY,National,FRUIT - SHELF STABLE,PINEAPPLE,20 OZ
9,17104444.0,1.13511,693,DRUG GM,National,CANDY - PACKAGED,SEASONAL CANDY BAGS-CHOCOLATE,12 OZ


In [63]:
#Find 10 similar products for softdrink with id:8090532.0
similar_products = getSimilarProducts(result,8090532.0,10)
similar_products

Unnamed: 0,product_id,similarity_score,manufacturer_id,department,brand,product_category,product_type,package_size
0,8090532.0,1.0,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ
1,12605292.0,0.788604,2551,DRUG GM,National,CHRISTMAS SEASONAL,CHRISTMAS LIGHTS,20 CT
2,5589752.0,0.788604,69,GROCERY,Private,WATER - CARBONATED/FLVRD DRINK,DISTILLED WATER,GAL
3,9190629.0,0.788604,2551,DRUG GM,National,ELECTRICAL SUPPPLIES,EXTENSION CORDS,
4,12604493.0,0.788604,484,DRUG GM,National,CHRISTMAS SEASONAL,DECOR,
5,12605734.0,0.788604,2551,DRUG GM,National,CHRISTMAS SEASONAL,CHRISTMAS LIGHTS,
6,5568419.0,0.788604,1208,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ
7,12606012.0,0.788604,2551,DRUG GM,National,CHRISTMAS SEASONAL,CHRISTMAS LIGHTS,
8,8090536.0,0.710805,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ
9,8090513.0,0.653257,2224,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ
