# Configurations

### Import

In [1]:
import os
import joblib

from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd; pd.set_option("display.max_columns", None)
import matplotlib.pyplot as plt
from IPython.display import clear_output
from sklearn.model_selection import train_test_split

import torch
from PIL import Image
from torchvision import transforms
from transformers import SwinModel
device = torch.device("cuda")

  from .autonotebook import tqdm as notebook_tqdm


### Params

In [2]:
# General params
random_state = 0

# Dataset params
is_data_exist = True

### Read

In [3]:
if not is_data_exist:
    df_raw = pd.read_csv("../HnM/transactions_train.csv", parse_dates=["t_dat"], dtype={"article_id":str})
    df_article = pd.read_csv("../HnM/articles.csv", dtype={"article_id":str})
    df_article = df_article[["article_id"] + [col for col in df_article.columns if "name" in col]]

### Preprocess

In [4]:
if not is_data_exist:
    df_prep = df_raw.copy()

    # Make daily sales
    df_prep1 = df_prep.copy()
    df_prep1 = df_prep1.groupby(["article_id", "t_dat"], as_index=False).agg(sales=("customer_id", "count"), price=("price","mean"))
    df_prep1["avg"] = df_prep1.groupby("article_id")["sales"].transform("mean")

    # Expand dates
    def func(x):
        article_id = x["article_id"].iloc[0]

        # Expand dates
        min_date = x["t_dat"].min()
        max_date = x["t_dat"].max()
        date_ref = pd.DataFrame(pd.date_range(min_date, max_date, freq="d"), columns=["t_dat"])
        x = pd.merge(x, date_ref, on="t_dat", how="right")

        # Fill missing values
        x["article_id"] = x["article_id"].fillna(article_id)
        x["sales"] = x["sales"].fillna(0)
        x["price"] = x["price"].fillna(method="ffill")
        return x
    df_prep2 = df_prep1.copy()
    df_prep2 = df_prep2.groupby("article_id").apply(lambda x: func(x)).reset_index(drop=True)

    # Make week column
    df_prep3 = df_prep2.copy()
    df_prep3["year"] = df_prep3["t_dat"].dt.year
    df_prep3["month"] = df_prep3["t_dat"].dt.month.astype(str).str.zfill(2)
    df_prep3["week"] = df_prep3["t_dat"].dt.isocalendar().week.astype(str).str.zfill(2)
    df_prep3["year"] = df_prep3.apply(lambda x: x["year"]+1 if x["month"]=="12" and x["week"]=="01" 
                                               else (x["year"]-1 if x["month"]=="01" and x["week"]!="01" else x["year"])
                                                , axis=1).astype(str)
    df_prep3["week_id"] = (df_prep3["year"] + df_prep3["week"]).astype(int)

    # Aggregate by week
    df_prep4 = df_prep3.copy()
    df_prep4 = df_prep4.groupby(["article_id", "week_id"], as_index=False).agg({"sales":"sum", "price":"mean", "week":"max"})
    df_prep4 = df_prep4.sort_values(["article_id", "week_id"])
    df_prep4["sales"] = df_prep4["sales"].apply(lambda x: 1e-5 if x == 0 else x)

    # Group by article_id and make the data as lists
    df_prep5 = df_prep4.copy()
    df_sales = df_prep5.groupby("article_id", as_index=False)["sales"].apply(np.array)
    df_price = df_prep5.groupby("article_id", as_index=False)["price"].apply(np.array)
    df_week = df_prep5.groupby("article_id", as_index=False)["week"].apply(np.array)

    df_prep5 = pd.merge(df_sales, df_price, on="article_id")
    df_prep5 = pd.merge(df_prep5, df_week, on="article_id")

    # Generate image path
    df_prep6 = df_prep5.copy()
    df_prep6["img_path"] = df_prep6["article_id"].apply(lambda x: f'../HnM/images/{x[:3]}/{x}.jpg') # Generate image path
    df_prep6["is_valid"] = df_prep6["img_path"].apply(lambda x: 1 if os.path.isfile(x) else 0) # Check whether the article has corresponding image file
    df_prep6 = df_prep6[df_prep6["is_valid"]==1].drop("is_valid", axis=1)

    # Join with article info
    df_prep7 = df_prep6.copy()
    df_prep7 = pd.merge(df_prep7, df_article, on="article_id", how="left")

    # Calculate average
    df_prep8 = df_prep7.copy()
    df_prep8["avg_abv1"] = df_prep8["sales"].apply(lambda x: np.mean([i for i in x if i>1]))
    df_prep8["avg"] = df_prep8["sales"].apply(lambda x: np.mean([i for i in x]))

    df_prep8.to_parquet("./df_prep.parquet")

# Make dataset

In [5]:
df_read = pd.read_parquet("./df_prep.parquet"); print(f"df_prep.shape: {df_read.shape}")

df_prep.shape: (104106, 19)


In [33]:
df_prep = df_read.copy()
# Filter
df_prep["size"] = df_prep["sales"].str.len()
df_prep = df_prep[df_prep["size"] > 52]; print(f"df_prep size>52 shape: {df_prep.shape}")
df_prep = df_prep[df_prep["avg"] > 10]; print(f"df_prep avg>50 shape: {df_prep.shape}")

# Split train valid
df_train, df_valid = train_test_split(df_prep, test_size=0.3, random_state=random_state)
print(f"df_train.shape: {df_train.shape}"); print(f"df_valid.shape: {df_valid.shape}")

df_prep size>52 shape: (27791, 20)
df_prep avg>50 shape: (7228, 20)
df_train.shape: (5059, 20)
df_valid.shape: (2169, 20)


In [34]:
df_train.head(1)

Unnamed: 0,article_id,sales,price,week,img_path,prod_name,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,perceived_colour_value_name,perceived_colour_master_name,department_name,index_name,index_group_name,section_name,garment_group_name,avg_abv1,avg,size
2600,417951001,"[19.0, 14.0, 14.0, 11.0, 9.0, 40.0, 27.0, 42.0...","[0.013228046811945076, 0.013291796379568744, 0...","[02, 03, 04, 05, 38, 39, 40, 41, 42, 43, 44, 4...",../HnM/images/041/0417951001.jpg,Support 20 den 1p tights,Underwear Tights,Socks & Tights,Solid,Beige,Medium Dusty,Beige,Tights basic,Lingeries/Tights,Ladieswear,"Womens Nightwear, Socks & Tigh",Socks and Tights,24.196262,24.196262,107


In [29]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, idx):
        sales = torch.FloatTensor(self.data["sales"].values[idx][:-12]).unsqueeze(-1)
        y = torch.FloatTensor(self.data["sales"].values[idx][-12:]).unsqueeze(-1)

        img_path = self.data["img_path"].values[idx]
        transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]) # Transform image based on ImageNet standard

        img = transform(Image.open(img_path).convert("RGB")) # Transform an image

        return sales, img, y

    def __len__(self):
        return len(self.data)

def func(data):
    sales = [sales for sales, img, y in data]
    img = [img for sales, img, y in data]
    y = [y for sales, img, y in data]
    
    sales = torch.nn.utils.rnn.pad_sequence(sales, padding_value=0, batch_first=True)
    img = torch.nn.utils.rnn.pad_sequence(img, padding_value=0, batch_first=True)
    y = torch.nn.utils.rnn.pad_sequence(y, padding_value=0, batch_first=True)
    return sales, img, y
    
train_dataset = Dataset(df_train)
valid_dataset = Dataset(df_valid)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=lambda x: func(x))
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: func(x))
next(iter(train_dataloader))[1].shape

  sales = torch.FloatTensor(self.data["sales"].values[idx][:-12]).unsqueeze(-1)


torch.Size([64, 3, 224, 224])

# Make model

In [30]:
class TokenEmbedding(torch.nn.Module):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.d_model = d_model
        self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=d_model)
    
    def forward(self, x):
        embedded = self.embedding(x)
        return embedded * math.sqrt(self.d_model)

class PositionalEncoding(torch.nn.Module):
    # PE(pos, 2i) = sin(pos/10000^{2i/d_model}), 
    # PE(pos, 2i+1) = cos(pos/10000^{2i/d_model})
    def __init__(self, max_len, d_model, dropout):
        super().__init__()
        self.dropout = torch.nn.Dropout(dropout)

        position = torch.arange(max_len).reshape(-1,1).to(device)
        i = torch.arange(d_model).to(device)//2
        exp_term = 2*i/d_model
        div_term = torch.pow(10000, exp_term).reshape(1, -1)
        self.pos_encoded = position / div_term

        self.pos_encoded[:, 0::2] = torch.sin(self.pos_encoded[:, 0::2])
        self.pos_encoded[:, 1::2] = torch.cos(self.pos_encoded[:, 1::2])

    def forward(self, x):
        output = x + self.pos_encoded[:x.shape[1], :]
        return self.dropout(output)

class Mask(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def get_padding_mask(self, arr):
        res = torch.eq(arr, 0).type(torch.FloatTensor).to(device)
        res = torch.where(res==1, -torch.inf, 0)
        return res
    
    def get_lookahead_mask(self, arr):
        seq_len = arr.shape[1]
        mask = torch.triu(torch.ones((seq_len, seq_len))*-torch.inf, 1).to(device)
        return mask

    def forward(self, arr):
        padding_mask = self.get_padding_mask(arr)
        lookahead_mask = self.get_lookahead_mask(arr)
        return padding_mask, lookahead_mask

In [31]:
import gc
torch.cuda.empty_cache()
gc.collect()

class Transformer(torch.nn.Module):
    def __init__(self, d_model, input_size, output_size, max_seq_len, nhead, num_layers, d_ff=512, dropout=0.3):
        super().__init__()
        self.enc_mask = Mask()
        self.enc_linear_embedding = torch.nn.Linear(input_size, d_model)
        self.enc_pos_encoding = PositionalEncoding(max_seq_len, d_model, dropout)
        self.encoder = torch.nn.TransformerEncoder(torch.nn.TransformerEncoderLayer(d_model, nhead, d_ff, dropout, batch_first=True), num_layers)

        self.dec_mask = Mask()
        self.swin_transformer = SwinModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224") # Get pre-trained SwinTransformer
        self.swin_linear = torch.nn.Linear(self.swin_transformer.config.hidden_size, d_model)
        self.decoder = torch.nn.TransformerDecoder(torch.nn.TransformerDecoderLayer(d_model, nhead, d_ff, dropout, batch_first=True), num_layers)
        
        self.linear1 = torch.nn.Linear(d_model*49, d_model)
        self.linear2 = torch.nn.Linear(d_model, 11) # 11 is pred_length
        
    def forward(self, enc_sales, dec_input):
        enc_padding_mask, _ = self.enc_mask(enc_sales.squeeze())
        enc_output = torch.nn.ReLU()(self.enc_linear_embedding(enc_sales))

        enc_output = self.enc_pos_encoding(enc_output)
        enc_output = self.encoder(enc_output, src_key_padding_mask=enc_padding_mask)

        dec_output = self.swin_transformer(dec_input).last_hidden_state
        dec_output = torch.nn.ReLU()(self.swin_linear(dec_output))
        dec_output = self.decoder(tgt=dec_output, memory=enc_output)

        dec_output = torch.nn.Flatten()(dec_output)
        dec_output = torch.nn.ReLU()(self.linear1(dec_output))
        dec_output = torch.nn.ReLU()(self.linear2(dec_output))

        return dec_output

In [32]:
from IPython.display import clear_output
import matplotlib.pyplot as plt

# Train valid compare
model = Transformer(d_model=128, input_size=1, output_size=1, max_seq_len=150, nhead=4, num_layers=4)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.MSELoss()

def train(epoch):
    model.train()
    total_len = len(train_dataloader)
    total_loss = 0
    for n, data in enumerate(train_dataloader):
        sales, img, y = data

        # Train
        optimizer.zero_grad()
        pred = model(sales.to(device), img.to(device))
        loss = loss_fn(pred, y[:, 1:].squeeze().to(device))
        loss.backward()
        optimizer.step()

        # Report
        total_loss += loss.item()
        mean_loss = total_loss / (n+1)
        print(f"\r {epoch}:{n}/{total_len} mean_loss: {mean_loss}", end="")

    print()
    return mean_loss

def val():
    model.eval()
    total_len = len(valid_dataloader)
    total_loss = 0
    for n, data in enumerate(valid_dataloader):
        sales, img, y = data
        
        # Pred
        with torch.no_grad():
            pred = model(sales.to(device), img.to(device))
            loss = loss_fn(pred, y[:, 1:].squeeze().to(device))

            # Report
            total_loss += loss.item()
            mean_loss = total_loss / (n+1)
            return mean_loss
 
def plot(train_loss_li, val_loss_li):
    # Plot loss
    clear_output(wait=True)
    plt.plot(train_loss_li, label="train")
    plt.plot(val_loss_li, label="valid")
    plt.legend()
    plt.show()   

epoch = 100
train_loss_li, val_loss_li = [], []
for e in range(epoch):
    train_loss = train(e) # Train
    val_loss = val()

    train_loss_li.append(train_loss)
    val_loss_li.append(val_loss)
    plot(train_loss_li, val_loss_li)

 0:20/158 mean_loss: 394.18178957984563

KeyboardInterrupt: 