In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
import math
import numpy as np
import pickle
import random
import time
from tqdm import tqdm
from datetime import datetime

from stgnn import Model
from stgnn_transformer import Model as Model2
# from stgnn_handler import ForecastDataset, train, test
# from utils import convert_df_wavelet_input, dl_preprocess_data_test
# from utils import rmse, mape, nrmse, nrmse2

import torch
import torch.utils.data as torch_data
import torch.nn as nn
from tensorflow.random import set_seed
from numpy.random import seed
import shap

%matplotlib inline

In [3]:
base_dir = "/recsys_data/RecSys/fashion/polyvore-dataset/polyvore_outfits"
data_type = "nondisjoint" # "nondisjoint", "disjoint"
train_dir = os.path.join(base_dir, data_type)
image_dir = os.path.join(base_dir, "images")
embed_dir = "/recsys_data/RecSys/fashion/polyvore-dataset/precomputed"
train_json = "train.json"
valid_json = "valid.json"
test_json = "test.json"

train_file = "compatibility_train.txt"
valid_file = "compatibility_valid.txt"
test_file = "compatibility_test.txt"
item_file = "polyvore_item_metadata.json"
outfit_file = "polyvore_outfit_titles.json"

model_type = "rnn" #"set-transformer"
include_text = True
batch_size = 32

## Read all the required files

In [4]:
with open(os.path.join(train_dir, train_json), 'r') as fr:
    train_pos = json.load(fr)
    
with open(os.path.join(train_dir, valid_json), 'r') as fr:
    valid_pos = json.load(fr)
    
with open(os.path.join(train_dir, test_json), 'r') as fr:
    test_pos = json.load(fr)
    
with open(os.path.join(base_dir, item_file), 'r') as fr:
    pv_items = json.load(fr)
    
with open(os.path.join(base_dir, outfit_file), 'r') as fr:
    pv_outfits = json.load(fr)

print(f"Total {len(train_pos)}, {len(valid_pos)}, {len(test_pos)} outfits in train, validation and test split, respectively")

Total 53306, 5000, 10000 outfits in train, validation and test split, respectively


In [5]:
df1 = pd.DataFrame({"train": [len(o['items']) for o in train_pos]}).describe()
df2 = pd.DataFrame({"valid": [len(o['items']) for o in valid_pos]}).describe()
df3 = pd.DataFrame({"test": [len(o['items']) for o in test_pos]}).describe()
        
df = df1.join(df2).join(df3)
df

Unnamed: 0,train,valid,test
count,53306.0,5000.0,10000.0
mean,5.342119,5.3562,5.3506
std,1.604385,1.624146,1.624729
min,2.0,2.0,2.0
25%,4.0,4.0,4.0
50%,5.0,5.0,5.0
75%,6.0,6.0,6.0
max,19.0,14.0,17.0


In [6]:
with open(os.path.join(train_dir, train_file), 'r') as fr:
    train_X, train_y = [], []
    for line in fr:
        elems = line.strip().split()
        train_y.append(elems[0])
        train_X.append(elems[1:])

with open(os.path.join(train_dir, valid_file), 'r') as fr:
    valid_X, valid_y = [], []
    for line in fr:
        elems = line.strip().split()
        valid_y.append(elems[0])
        valid_X.append(elems[1:])

with open(os.path.join(train_dir, test_file), 'r') as fr:
    test_X, test_y = [], []
    for line in fr:
        elems = line.strip().split()
        test_y.append(elems[0])
        test_X.append(elems[1:])

print(f"Total {len(train_X)}, {len(valid_X)}, {len(test_X)} examples in train, validation and test split, respectively")

Total 106612, 10000, 20000 examples in train, validation and test split, respectively


### Create a dict that maps to real item-id

In [7]:
item_dict = {}
for ii, outfit in enumerate(train_pos):
    items = outfit['items']
    mapped = train_X[ii]
    item_dict.update({jj:kk['item_id'] for jj, kk in zip(mapped, items)})
print(len(item_dict))

for ii, outfit in enumerate(valid_pos):
    items = outfit['items']
    mapped = valid_X[ii]
    item_dict.update({jj:kk['item_id'] for jj, kk in zip(mapped, items)})
print(len(item_dict))

for ii, outfit in enumerate(test_pos):
    items = outfit['items']
    mapped = test_X[ii]
    item_dict.update({jj:kk['item_id'] for jj, kk in zip(mapped, items)})
print(len(item_dict))

284767
311548
365054


In [8]:
model_type = "transformer" # "set-transformer", "rnn"
include_text = True
use_graphsage = False
batch_size = 32
max_seq_len = 12
d_model_rnn = 512
image_data_type = "original"  # "original", "embedding", "both"
include_item_categories = True
image_encoder = "resnet18"  # "resnet50", "vgg16", "inception"

if use_graphsage:
    image_embedding_dim, image_embedding_file = (50, os.path.join(embed_dir, "graphsage_dict2_polyvore.pkl"))
#         image_embedding_dim, image_embedding_file = (256, os.path.join(embed_dir, "graphsage_dict2_polyvore_nondisjoint.pkl"))
else:
    image_embedding_dim, image_embedding_file = (1280, os.path.join(embed_dir, "effnet_tuned_polyvore.pkl"))
#         image_embedding_dim, image_embedding_file = (256, os.path.join(embed_dir, "triplet_polyvore_image.pkl"))
    
text_embedding_dim, text_embedding_file = (768, os.path.join(embed_dir, "bert_polyvore.pkl"))


In [9]:
params_set = {}
params_set['set1'] = {'order': 4, 
                      'factor': 3,
                      'dropout': 0.05,
                      'd_model': 32,
                      'num_heads': 4, 
                      'e_layers': 1, 
                      'dff': 128,
                      'only_transformer': True,
                      'use_fft_transformer': True,
                      'observe': 'rmse',
                      'patience': 15,
                      'lr': 1e-05}

params_set['set2'] = {'order': 4, 
                      'factor': 3,
                      'dropout': 0.43655976679400943,
                      'd_model': 96,
                      'num_heads': 4, 
                      'e_layers': 2, 
                      'dff': 180,
                      'only_transformer': True,
                      'use_fft_transformer': True,
                      'observe': 'rmse',
                      'patience': 15,
                      'lr': 1e-05}

params_set['set3'] = {'order': 4, 
                      'factor': 3,
                      'dropout': 0.05,
                      'd_model': 32,
                      'num_heads': 4, 
                      'e_layers': 1, 
                      'dff': 128,
                      'only_transformer': True,
                      'use_fft_transformer': False,
                      'observe': 'rmse',
                      'patience': 15,
                      'lr': 1e-05}

params_set['set4'] = {'order': 4, 
                      'factor': 3,
                      'dropout': 0.05,
                      'd_model': 32,
                      'num_heads': 4, 
                      'e_layers': 2, 
                      'dff': 128,
                      'only_transformer': True,
                      'use_fft_transformer': False,
                      'observe': 'rmse',
                      'patience': 15,
                      'lr': 1e-05}


In [10]:
from stgnn_transformer import Model as Model2
from stgnn_transformer import BaseTransformer

setnum = 'set4'
# print(params_set[setnum])
model_params = {
                'lookback': 16,
                'horizon': 1,
                'batch_size': 32,
                'lr': 1e-04,
                'epochs': 100,
                'scheduler': None,
                'small_epochs': 1,
                'output_dir': './',
                'norm_method': 'min_max_m1',
                'exponential_decay_step':5,
                'validate_freq':1,
                'early_stop':True,
                'device': 'cuda',
            }
node_cnt = 1280
device = model_params['device']

# model = Model2(units=node_cnt,
#                stack_cnt=1,
#                time_step=model_params['lookback'],
#                multi_layer=2,
#                out_feature=1,
#                horizon=model_params['horizon'],
#                encoder2='fc',
#                order=params_set[setnum]['order'],  # tune parameter
#                factor=params_set[setnum]['factor'],
#                dropout=params_set[setnum]['dropout'],  # tune parameter, 0.05
#                d_model=params_set[setnum]['d_model'],
#                n_heads=params_set[setnum]['num_heads'],
#                e_layers=params_set[setnum]['e_layers'],
#                d_ff=params_set[setnum]['dff'],
#                only_transformer=params_set[setnum]['only_transformer'],
#                use_fft_transformer=params_set[setnum]['use_fft_transformer'],
#              )

model = BaseTransformer(num_layers=1,
                        d_model=64,
                        num_heads=1,
                        dff=32,
                        rate=0.0,
                        num_classes=2,
                        lstm_dim=32,
                        device=device,
                        image_data_type=image_data_type,
                        include_text=include_text,
                        include_item_categories=include_item_categories,
                        num_categories=154,
                        embedding_activation="linear",
                        encoder_activation="relu",
                        lstm_activation="linear",
                        final_activation="sigmoid")

total_params = 0
for name, parameter in model.named_parameters():
    if not parameter.requires_grad:
        continue
    param = parameter.numel()
    total_params += param
print(f"Total Trainable Params: {total_params}")
model.to(device)

Total Trainable Params: 3077729


BaseTransformer(
  (image_embedder): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [11]:
from utils_torch import CustomDataset, train

train_set = CustomDataset(train_X, 
                          train_y, 
                          item_dict, 
                          pv_items, 
                          image_dir=image_dir, 
                          batch_size=batch_size,
                          max_len=max_seq_len,
                          only_image=not include_text,
                          image_embedding_dim=image_embedding_dim,
                          image_embedding_file=image_embedding_file,
                          text_embedding_file=text_embedding_file,
                          number_items_in_batch=150,
                          variable_length_input=True,
                          text_embedding_dim=text_embedding_dim,
                          include_item_categories=include_item_categories,
                          image_data=image_data_type,
                          input_size=(3, 224, 224),
                         )
valid_set = CustomDataset(valid_X, 
                          valid_y, 
                          item_dict, 
                          pv_items, 
                          image_dir=image_dir, 
                          batch_size=batch_size,
                          max_len=max_seq_len,
                          only_image=not include_text,
                          image_embedding_dim=image_embedding_dim,
                          image_embedding_file=image_embedding_file,
                          text_embedding_file=text_embedding_file,
                          number_items_in_batch=150,
                          variable_length_input=True,
                          text_embedding_dim=text_embedding_dim,
                          include_item_categories=include_item_categories,
                          image_data=image_data_type,
                          input_size=(3, 224, 224),
                         )


In [14]:
len(train_set)//batch_size

3331

In [12]:
from utils_torch import train

train(model, train_set, valid_set, device='cuda', epochs=10, batch_size=128, learning_rate=1e-05)

119it [10:41,  5.39s/it]


KeyboardInterrupt: 

In [3]:
from torchvision.models import resnet18, resnet50
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.models.detection.mask_rcnn import MaskRCNN
from torchvision.models.detection.backbone_utils import LastLevelMaxPool
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork

In [6]:
# MaskRCNN requires a backbone with an attached FPN
class Resnet50WithFPN(torch.nn.Module):
    def __init__(self):
        super(Resnet50WithFPN, self).__init__()
        # Get a resnet50 backbone
        m = resnet50()
        # Extract 4 main layers (note: MaskRCNN needs this particular name
        # mapping for return nodes)
        self.body = create_feature_extractor(
            m, return_nodes={f'layer{k}': str(v)
                             for v, k in enumerate([1, 2, 3, 4])})
        # Dry run to get number of channels for FPN
        inp = torch.randn(2, 3, 224, 224)
        with torch.no_grad():
            out = self.body(inp)
        in_channels_list = [o.shape[1] for o in out.values()]
        # Build FPN
        self.out_channels = 256
        self.fpn = FeaturePyramidNetwork(
            in_channels_list, out_channels=self.out_channels,
            extra_blocks=LastLevelMaxPool())

    def forward(self, x):
        x = self.body(x)
        x = self.fpn(x)
        return x


# Now we can build our model!
model = MaskRCNN(Resnet50WithFPN(), num_classes=91).eval()  # 44 million parameters

In [7]:
total_params = 0
for name, parameter in model.named_parameters():
    if not parameter.requires_grad:
        continue
    param = parameter.numel()
    total_params += param
print(f"Total Trainable Params: {total_params}")
model.to('cpu')

Total Trainable Params: 44454513


MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Resnet50WithFPN(
    (body): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Module(
        (0): Module(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [8]:
m = resnet18()
train_nodes, eval_nodes = get_graph_node_names(m)

In [10]:
eval_nodes

['x',
 'conv1',
 'bn1',
 'relu',
 'maxpool',
 'layer1.0.conv1',
 'layer1.0.bn1',
 'layer1.0.relu',
 'layer1.0.conv2',
 'layer1.0.bn2',
 'layer1.0.add',
 'layer1.0.relu_1',
 'layer1.1.conv1',
 'layer1.1.bn1',
 'layer1.1.relu',
 'layer1.1.conv2',
 'layer1.1.bn2',
 'layer1.1.add',
 'layer1.1.relu_1',
 'layer2.0.conv1',
 'layer2.0.bn1',
 'layer2.0.relu',
 'layer2.0.conv2',
 'layer2.0.bn2',
 'layer2.0.downsample.0',
 'layer2.0.downsample.1',
 'layer2.0.add',
 'layer2.0.relu_1',
 'layer2.1.conv1',
 'layer2.1.bn1',
 'layer2.1.relu',
 'layer2.1.conv2',
 'layer2.1.bn2',
 'layer2.1.add',
 'layer2.1.relu_1',
 'layer3.0.conv1',
 'layer3.0.bn1',
 'layer3.0.relu',
 'layer3.0.conv2',
 'layer3.0.bn2',
 'layer3.0.downsample.0',
 'layer3.0.downsample.1',
 'layer3.0.add',
 'layer3.0.relu_1',
 'layer3.1.conv1',
 'layer3.1.bn1',
 'layer3.1.relu',
 'layer3.1.conv2',
 'layer3.1.bn2',
 'layer3.1.add',
 'layer3.1.relu_1',
 'layer4.0.conv1',
 'layer4.0.bn1',
 'layer4.0.relu',
 'layer4.0.conv2',
 'layer4.0.bn2',

In [27]:
import flash
from flash.core.data.utils import download_data
from flash.image import ImageClassificationData, ImageEmbedder