In [None]:
!pip install timm
!pip install torchsummary

In [None]:
import numpy as np
import pandas as pd

import torch
import os
from tqdm import tqdm

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,accuracy_score,recall_score, precision_score,classification_report
from sklearn.preprocessing import MinMaxScaler

import random
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary

import pickle

import warnings
warnings.filterwarnings("ignore")

In [None]:
import shutil
shutil.unpack_archive('pooled_features.zip', 'fv')

## Load data

In [None]:
with open('custlist.pkl', 'rb') as handle:
    cust_list = pickle.load(handle)

In [None]:
pooledfeatures = pd.read_csv('fv/pooled_features.csv', dtype={'0':str})
pooledfeatures.head()

In [None]:
scaler=MinMaxScaler()
pooledfeatures.iloc[:,2:]=scaler.fit_transform(pooledfeatures.iloc[:,2:])

In [None]:
latest3 = pd.read_csv('latest3.csv', dtype=str)
latest3 = latest3[latest3.customer_id.isin(cust_list)].reset_index(drop=True)
latest3.head()

## Data Loader

In [None]:
class CustomerData:
    def __init__(self,pooledfeatures,data):
        self.pooledfeatures=pooledfeatures
        self.data=data
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,item):
        splits = self.data['articles_string'][item].split(',')
        article1=self.pooledfeatures.loc[self.pooledfeatures['0']==splits[0]].iloc[:,2:]
        article2=self.pooledfeatures.loc[self.pooledfeatures['0']==splits[1]].iloc[:,2:]
        article3=self.pooledfeatures.loc[self.pooledfeatures['0']==splits[2]].iloc[:,2:]
        target=article3
        return { "transaction": torch.tensor(np.array([article1,article2,article3]), dtype=torch.float),
                 "targets":torch.tensor(np.array(target),dtype=torch.float)}

In [None]:
dataset=CustomerData(pooledfeatures=pooledfeatures,data=latest3)
dataloader=DataLoader(dataset,batch_size=32,shuffle=False,num_workers=2)

## Create Seq2Seq model

In [None]:
class Encoder_Decoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder = nn.Sequential(nn.Linear(in_features=1280, out_features=320),
                                     nn.LeakyReLU(),
                                     nn.Dropout(p=0.5),
                                     nn.Linear(in_features=320, out_features=160),
                                     nn.LeakyReLU(),
                                     nn.Dropout(p=0.5),
                                     nn.Linear(in_features=160, out_features=30)
                                   )
        
        self.decoder = nn.Sequential(nn.Linear(in_features=90, out_features=512),
                                     nn.LeakyReLU(),
                                     nn.Dropout(p=0.5),
                                     nn.Linear(in_features=512, out_features=1024),
                                     nn.LeakyReLU(),
                                     nn.Dropout(p=0.5),
                                     nn.Linear(in_features=1024, out_features=1280),
                                     nn.Sigmoid()
                                   )

    def forward(self, x):
        op=[]
        for i in range(x.shape[0]):
            op1 = torch.cat((self.encoder(x[i][0]),self.encoder(x[i][1]),self.encoder(x[i][2])),dim=1)
            op1 = self.decoder(op1)
            op.append(op1)
        return torch.cat(op, dim=0)

In [None]:
model= Encoder_Decoder()
model = torch.load('Encoder_Decoder.pt')
model.eval()
model.to('cuda')

## Get outputs

In [None]:
outputs_user = []
i=1
glob_input = None
with torch.no_grad():
    for data in tqdm(dataloader):
        inputs = data["transaction"]
        inputs = inputs.to('cuda', dtype=torch.float)
        outputs = model.forward(inputs)
        outputs = outputs.cpu().detach().numpy().tolist()
        outputs_user.extend(outputs)

In [None]:
pd.DataFrame(np.hstack([latest3[['customer_id',]].values, np.array(outputs_user)])).to_csv('user_features.csv',index=False)

In [None]:
pd.DataFrame(np.hstack([latest3[['customer_id',]].values, np.array(outputs_user)])).shape