In [224]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re


# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [225]:
# Import data_processor script
from data_processor import load_and_process_data


# Get less data; set to None if you want entire datasets
user_path = '../../data/'
products_slice = 1000
sessions_slice = 1000
test_slice = 1000
task = 'task1'


# Load and process data
products_train, sessions_train, sessions_test = load_and_process_data(user_path, products_slice, sessions_slice, test_slice, task)

In [226]:
products_train.shape, sessions_train.shape, sessions_test.shape

((1000, 11), (1000, 3), (1000, 2))

In [227]:
products_train.head(2)

Unnamed: 0,id,locale,title,price,brand,color,size,model,material,author,desc
0,B005ZSSN10,DE,red dragon amberjack 3 steel tip 22 gramm wolf...,30.95,red dragon,unknown,unknown,rdd0089,unknown,unknown,amberjacks steel dartpfeile sind verf gbar in ...
1,B08PRYN6LD,DE,simply keto lower carb schokodrops ohne zucker...,17.9,simply keto,unknown,750 g 1er pack,unknown,unknown,unknown,nat rliche s sse durch erythrit wir stellen oh...


In [228]:
sessions_train.head(2)

Unnamed: 0,prev_items,next_item,locale
0,"B09W9FND7K,B09JSPLN1M",B09M7GY217,DE
1,"B076THCGSG,B007MO8IME,B08MF65MLV,B001B4TKA0",B001B4THSA,DE


In [229]:
sessions_test.head(2)

Unnamed: 0,prev_items,locale
0,"B08V12CT4C,B08V1KXBQD,B01BVG1XJS,B09VC5PKN5,B0...",DE
1,"B00R9R5ND6,B00R9RZ9ZS,B00R9RZ9ZS",DE


### Recommendations using Co-Occurrences
This recommendation system is based on product co-occurrence matrix. The matrix is created by analyzing the previous items purchased by customers in their sessions. For each session, pairs of products that were purchased together are identified, and a score is assigned to the pair indicating how frequently the products were purchased together. The matrix is created from all of the scores for all of the product pairs.

Once the matrix is created, the function recommend_products takes a product ID as input and returns the top N recommendations for that product. The function looks for all pairs in the matrix that contain the given product ID, and then identifies the related product in each pair. The function sorts the related products based on their scores, and returns the top N products as recommendations for the given product ID.

In [222]:
# Function to create co-occurrence matrix
def cooccurrence_matrix(df):
    sessions = df['prev_items'].apply(lambda x: x.split(',')).tolist()
    cooccurrence = {}

    for session in sessions:
        for i in range(len(session)):
            for j in range(i + 1, len(session)):
                pair = tuple(sorted((session[i], session[j])))
                if pair in cooccurrence:
                    cooccurrence[pair] += 1
                else:
                    cooccurrence[pair] = 1

    cooccurrence_df = pd.DataFrame(list(cooccurrence.items()), columns=['product_pair', 'score'])
    return cooccurrence_df


# Create co-occurrence matrix
train_cooccurrence = cooccurrence_matrix(sessions_train)


# Function to recommend products
def recommend_products(product_id, cooccurrence_df, top_n=10):
    pairs = cooccurrence_df[cooccurrence_df['product_pair'].apply(lambda x: product_id in x)]
    pairs['related_product'] = pairs['product_pair'].apply(lambda x: x[0] if x[1] == product_id else x[1])
    recommendations = pairs[['related_product', 'score']].sort_values(by='score', ascending=False)
    
    # Exclude input product from recommendations
    recommendations = recommendations[recommendations['related_product'] != product_id].head(top_n)
    
    return recommendations

In [223]:
# Get recommendations                   # Exclude the product itself
prod_id = 'B08V1KXBQD'                  # Product ID to get recommendations for
prod_rec = 100                          # Number of recommendations for product_id
recommendations = recommend_products(prod_id, train_cooccurrence, top_n=prod_rec)
print(f'Recommendations for {prod_id}:')
recommendations

Recommendations for B08V1KXBQD:


Unnamed: 0,related_product,score
257,B08V12CT4C,651
118398,B07TV364MZ,122
543134,B07TV22X9M,118
394226,B07JG9TFSB,116
254,B08QYYBTMC,93
...,...,...
1206883,B07RSQXF2F,5
13808238,B0B6ZYPL7R,5
469158,B07TRL83YX,5
298809,B07GPT8HPY,5
