
##### 1) Recommending Top 12 Articles
##### 2) Recommend items that are bought together with previous purchases

# If you find this notebook useful or interesting, please, support with an upvote 😊

In [None]:
import sys
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
import implicit
import plotly.express as px

#Importing the necessary libraries
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline


from collections import Counter
from PIL import Image
from pathlib import Path

# Load data

In [None]:
%%time
path = Path("/kaggle/input/h-and-m-personalized-fashion-recommendations/")

articles_df = pd.read_csv(path / "articles.csv", dtype = {'article_id': str})
cust_df = pd.read_csv(path / "customers.csv", dtype = {'customer_id': str})
trans_df = pd.read_csv(path / "transactions_train.csv", dtype = {'article_id': str,'customer_id': str})
trans_df["t_dat"] = pd.to_datetime(trans_df["t_dat"])
# trans_df = trans_df[["t_dat", "article_id"]]
monthly_df = trans_df.query("'2020-9-1' <= t_dat")
weekly_df = trans_df.query("'2020-9-16' <= t_dat")


In [None]:
articles_df.info()
articles_df.head()
articles_df = articles_df[['article_id', 'product_type_name','product_group_name','colour_group_code']]

In [None]:
cust_df.info()
cust_df = cust_df[['customer_id', 'club_member_status','fashion_news_frequency','age']]

# counting unique customer
n_cust = len(pd.unique(cust_df['customer_id']))
print("No.of.unique values :",n_cust)

In [None]:
dfg = cust_df[['age','fashion_news_frequency','customer_id']]
dfg = dfg.groupby(['age','fashion_news_frequency']).count().reset_index()
dfg.rename(columns = {"customer_id": "count"}, inplace=True)
dfg
fig = px.bar(dfg, x="age", y="count",color='fashion_news_frequency'
#               ,markers=True
              ,color_discrete_sequence=px.colors.diverging.PRGn
             ,template = "plotly_white"
             ) 
fig.update_layout(
    title="Number of customer by age"
    ,xaxis_title="Age"
    ,yaxis_title="Count"
    ,legend_title_text='fashion_news_frequency'
)

fig.show()

In [None]:
trans_df.info()

# Format date
trans_df['t_dat'] = pd.to_datetime(trans_df['t_dat'])
trans_df['YYYY_MM'] = trans_df['t_dat'].dt.year.astype(str) + '_' + trans_df['t_dat'].dt.month.astype(str)
trans_df['year'] = trans_df['t_dat'].dt.year
trans_df['month'] = trans_df['t_dat'].dt.month

# Printing minimum and the maximum date from dataset.
print(trans_df['t_dat'].min())
print(trans_df['t_dat'].max())

In [None]:
# Join the dataset - Left Join (Excluse those do not have transaction)
df = pd.merge(trans_df, cust_df, on='customer_id', how='left')

In [None]:
# counting unique customer
n = len(pd.unique(df['customer_id']))
print("No.of.unique customer that have transaction in transactions_train.csv :",n)

n_cust_notintran = n_cust - n
print("No.of.customer that have no transaction in transactions_train.csv :",n_cust_notintran)

In [None]:
# Join the dataset - Left Join (Excluse those do not have transaction)
df = pd.merge(df, articles_df, on='article_id', how='left')

In [None]:
count_df = df[['t_dat', 'customer_id','article_id']]
count_df = count_df.groupby(['t_dat', 'customer_id']).size().rename('quantity').reset_index()
count_df.head()

In [None]:
print(f'Number of unique customers: {count_df.customer_id.nunique()}')
print(f'Number of unique items: {df.article_id.nunique()}')

print(f'Average purchase quantity per interaction: {int(count_df.quantity.mean())}')
print(f'Minimum purchase quantity per interaction: {count_df.quantity.min()}')
print(f'Maximum purchase quantity per interaction: {count_df.quantity.max()}')


### Find the Monthly Top 12 Articles 
I would recommend the latest monthly Top 10 items to the customer who does not have transaction(that I can not learn)
idea from https://www.kaggle.com/negoto/best-selling-items-catalog-like-eda-of-articles

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter
from PIL import Image
from pathlib import Path


def show_images(article_ids, cols=1, rows=-1):
    if isinstance(article_ids, int) or isinstance(article_ids, str):
        article_ids = [article_ids]
    article_count = len(article_ids)
    if rows < 0: rows = (article_count // cols) + 1
    plt.figure(figsize=(3 + 3.5 * cols, 3 + 5 * rows))
    for i in range(article_count):
        article_id = ("0" + str(article_ids[i]))[-10:]
        plt.subplot(rows, cols, i + 1)
        plt.axis('off')
        plt.title(article_id)
        try:
            image = Image.open(f"/kaggle/input/h-and-m-personalized-fashion-recommendations/images/{article_id[:3]}/{article_id}.jpg")
            plt.imshow(image)
        except:
            pass


sales_counts = Counter(trans_df.article_id)
for i in range(len(articles_df)):
    articles_df.at[i, "sales_count"] = sales_counts[articles_df.at[i, "article_id"]]

monthly_sales_counts = Counter(monthly_df.article_id)
for i in range(len(articles_df)):
    articles_df.at[i, "monthly_sales_count"] = monthly_sales_counts[articles_df.at[i, "article_id"]]
    
weekly_sales_counts = Counter(weekly_df.article_id)
for i in range(len(articles_df)):
    articles_df.at[i, "weekly_sales_count"] = weekly_sales_counts[articles_df.at[i, "article_id"]]

In [None]:
articles_df = articles_df.sort_values(by="monthly_sales_count", ascending=False)
temp = articles_df.article_id[:12]
show_images(list(temp), 6)

## work in progress.

## Please do upvote if you like it.Thanks