In [164]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

from math import sqrt

In [27]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity




In [2]:
# load the dataset
df = pd.read_csv('data.csv',encoding= 'unicode_escape')

In [3]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [4]:
df = df[df.Country == 'United Kingdom']
df = df[df.Quantity>0]

In [171]:
train_data, test_data = train_test_split(df, test_size=0.2)


In [None]:
ratings_matrix = pd.pivot_table(train_data, index='CustomerID', columns='Description', values='Quantity', aggfunc="sum" ,fill_value=0)


In [169]:
# Compute the cosine similarity between each pair of users
user_similarity = cosine_similarity(ratings_matrix)

# Create a dictionary to store the recommendations for each user
recommendations = {}

# For each user, find the most similar users and get their product recommendations
for user in ratings_matrix.index:
    # Get the index of the user in the similarity matrix
    user_index = ratings_matrix.index.get_loc(user)

    # Get the indices of the most similar users
    similar_user_indices = user_similarity[user_index].argsort()[::-1][1:6]

    # Get the product ratings for the most similar users
    similar_user_ratings = ratings_matrix.iloc[similar_user_indices]

    # Get the average ratings for each product across the most similar users, excluding the user's own ratings
    product_ratings = similar_user_ratings.mean(axis=0) - ratings_matrix.loc[user]

    # Get the top 5 products with the highest average ratings
    top_products = product_ratings.sort_values(ascending=False)[:5]

    # Store the recommendations for the user
    recommendations[user] = list(top_products.index)
    
    #if I want to see results in a string
    #recommendations[user] = ",".join(top_products.index)


# Create a DataFrame with the recommendations for each user
recommendations_df = pd.DataFrame.from_dict(recommendations, orient='index')
recommendations_df.columns = ['recommended_product1', 'recommended_product2', 'recommended_product3', 'recommended_product4', 'recommended_product5']

In [None]:
test_data = test_data.groupby[["CustomerID","Description"]].unique()

In [178]:
# Group the data by customer ID and concatenate the product names into a single string
test_products_df = test_data.groupby('CustomerID')['Description'].agg(','.join).reset_index().rename(columns={'Description': 'names'})

# Rename the column to show the products in a single cell
#test_products_df.columns = ['products']

# Print the resulting DataFrame
test_products_df


Unnamed: 0,CustomerID,names
0,12747.0,"DOORMAT UNION FLAG,ASSORTED COLOUR BIRD ORNAME..."
1,12748.0,"HEART SHAPED HOLLY WREATH,LUNCH BAG PAISLEY PA..."
2,12749.0,"SINGLE ANTIQUE ROSE HOOK IVORY,LETTER HOLDER H..."
3,12820.0,"T-LIGHT GLASS FLUTED ANTIQUE,RED RETROSPOT WRA..."
4,12822.0,"GIANT 50'S CHRISTMAS CRACKER,LUNCH BAG DOLLY G..."
...,...,...
3670,18280.0,VINTAGE UNION JACK SHOPPING BAG
3671,18281.0,PENNY FARTHING BIRTHDAY CARD
3672,18282.0,"FAIRY CAKE FLANNEL ASSORTED COLOUR,REGENCY CAK..."
3673,18283.0,"LUNCH BAG WOODLAND,LUNCH BAG DOLLY GIRL DESIGN..."


In [180]:
# Merge the recommendations with the test ratings data
merged_df = test_products_df.merge(recommendations_df, how='inner', left_on='CustomerID', right_index=True)

# Compute the RMSE for the recommendations
merged_df

Unnamed: 0,CustomerID,names,recommended_product1,recommended_product2,recommended_product3,recommended_product4,recommended_product5
0,12747.0,"DOORMAT UNION FLAG,ASSORTED COLOUR BIRD ORNAME...",FLUTED ANTIQUE CANDLE HOLDER,REGENCY CAKESTAND 3 TIER,LAVENDER TOILETTE BOTTLE,T-LIGHT GLASS FLUTED ANTIQUE,FRENCH WC SIGN BLUE METAL
1,12748.0,"HEART SHAPED HOLLY WREATH,LUNCH BAG PAISLEY PA...",TOOTHPASTE TUBE PEN,CHRISTMAS CARD SCREEN PRINT,ANTIQUE SILVER TEA GLASS ENGRAVED,MINI PAINT SET VINTAGE,LAVENDER SCENTED FABRIC HEART
2,12749.0,"SINGLE ANTIQUE ROSE HOOK IVORY,LETTER HOLDER H...",JAM MAKING SET PRINTED,SET OF 20 KIDS COOKIE CUTTERS,VINTAGE CHRISTMAS CAKE FRILL,REGENCY MILK JUG PINK,SMALL GLASS HEART TRINKET POT
3,12820.0,"T-LIGHT GLASS FLUTED ANTIQUE,RED RETROSPOT WRA...",60 CAKE CASES VINTAGE CHRISTMAS,PACK OF 60 PINK PAISLEY CAKE CASES,PACK OF 72 SKULL CAKE CASES,72 SWEETHEART FAIRY CAKE CASES,60 TEATIME FAIRY CAKE CASES
4,12822.0,"GIANT 50'S CHRISTMAS CRACKER,LUNCH BAG DOLLY G...",FUNKY DIVA PEN,WHITE HANGING HEART T-LIGHT HOLDER,TRAVEL CARD WALLET KEEP CALM,HOMEMADE JAM SCENTED CANDLES,SET/6 COLLAGE PAPER CUPS
...,...,...,...,...,...,...,...
3670,18280.0,VINTAGE UNION JACK SHOPPING BAG,GUMBALL COAT RACK,PAPER CHAIN KIT EMPIRE,ALARM CLOCK BAKELIKE GREEN,SMALL RED RETROSPOT WINDMILL,ALARM CLOCK BAKELIKE RED
3671,18281.0,PENNY FARTHING BIRTHDAY CARD,CARD MOTORBIKE SANTA,"FANCY FONT BIRTHDAY CARD,",BOX OF 6 ASSORTED COLOUR TEASPOONS,CHRISTMAS METAL TAGS ASSORTED,JINGLE BELL HEART ANTIQUE SILVER
3672,18282.0,"FAIRY CAKE FLANNEL ASSORTED COLOUR,REGENCY CAK...",CARD CHRISTMAS VILLAGE,CARD MOTORBIKE SANTA,WRAP BILLBOARD FONTS DESIGN,IVORY HANGING DECORATION HEART,PINK BLUE FELT CRAFT TRINKET BOX
3673,18283.0,"LUNCH BAG WOODLAND,LUNCH BAG DOLLY GIRL DESIGN...",LUNCH BAG BLACK SKULL.,LUNCH BAG APPLE DESIGN,LUNCH BAG SUKI DESIGN,LUNCH BAG PINK POLKADOT,LUNCH BAG RED RETROSPOT


In [183]:
sum(merged_df[['recommended_product1', 'recommended_product2', 'recommended_product3','recommended_product4','recommended_product5']].isin(merged_df["names"]).any(axis=1))

37