# Install Neptune and Faiss

In [1]:
!pip install neptune-client -q
!pip install faiss-cpu -q
!pip install pydantic -q
!pip install timm -q

# Get Tokens & Load Github Repo

In [2]:
import os
from kaggle_secrets import UserSecretsClient

secrets = UserSecretsClient()
GITHUB_TOKEN = secrets.get_secret("GITHUB_TOKEN")
NEPTUNE_TOKEN = secrets.get_secret("NEPTUNE_TOKEN")

os.environ['NEPTUNE_TOKEN'] = NEPTUNE_TOKEN
os.environ['PROJECT_NAME'] = 'chewzy/shopee-price-match'

In [3]:
!git clone https://{GITHUB_TOKEN}@github.com/Toukenize/price_match.git

Cloning into 'price_match'...
remote: Enumerating objects: 399, done.[K
remote: Counting objects: 100% (399/399), done.[K
remote: Compressing objects: 100% (247/247), done.[K
remote: Total 399 (delta 193), reused 316 (delta 121), pack-reused 0[K
Receiving objects: 100% (399/399), 73.58 KiB | 1.23 MiB/s, done.
Resolving deltas: 100% (193/193), done.


# Overwrite the configurations in constants.py

In [4]:
%%writefile price_match/src/config/constants.py
import os
from pathlib import Path
from src.config.base_model_config import NLPConfig, IMGConfig

# Data paths
DATA_FOLDER = Path('../input/shopee-price-match-datasplit/')
IMG_FOLDER = Path('../input/shopee-product-matching/')
TRAIN_IMG_FOLDER = IMG_FOLDER / 'train_images'
DATA_SPLIT_PATH = DATA_FOLDER / 'train_split_v3.csv'

# Pretrained model paths
MODEL_FOLDER = Path('model')
PRETRAINED_NLP_MLM = ''
PRETRAINED_TOKENIZER = ''
PRETRAINED_IMG = '../input/efficient-net-b0-b3/efficientnet_b3.pth'

# Output paths
NLP_MODEL_PATH = MODEL_FOLDER / 'indobert_lite_p2' / 'emb_model_v3'
IMG_MODEL_PATH = MODEL_FOLDER / 'efficient_net_b0' / 'emb_model_v1'

for path in [NLP_MODEL_PATH, IMG_MODEL_PATH]:
    if not path.exists():
        path.mkdir(parents=True)

# Dataloader Config
NUM_WORKER = 4

# KNN Chunksize
KNN_CHUNKSIZE = 1024

# NLP Configs
NLP_CONFIG = NLPConfig(
    epochs=50,
    dropout_prob=0.1,
    learning_rate=3e-5,
    train_batch_size=64,
    val_batch_size=128,
    scheduler='cosine_decay_w_warmup',
    scheduler_params={
        "num_warmup_epochs": 5,
        "num_training_epochs": 45,
        "num_cycles": 0.4},
    optimizer='adamw',
    loss_fn='arcmargin',
    loss_params={"m": 0.5, "s": 30.0, "easy_margin": False},
    model_max_length=48,
    pretrained_model_folder=PRETRAINED_NLP_MLM,
    pretrained_tokenizer_folder=PRETRAINED_TOKENIZER
)

# IMG Configs
IMG_CONFIG = IMGConfig(
    epochs=27,
    dropout_prob=0.15,
    learning_rate=9e-5,
    train_batch_size=16,
    val_batch_size=64,
    scheduler='cosine_decay_w_warmup',
    scheduler_params={
        "num_warmup_epochs": 4,
        "num_training_epochs": 23,
        "num_cycles": 0.4},
    optimizer='adamw',
    loss_fn='arcmargin',
    loss_params={"m": 0.5, "s": 30.0, "easy_margin": False},
    pretrained_model_path=PRETRAINED_IMG,
    img_dim=512,
    feature_dim=512
)

Overwriting price_match/src/config/constants.py


In [5]:
import sys

sys.path.append('price_match/')

In [6]:
!python price_match/train.py --env kaggle --model_type img --cv_type group --trainfolds 3

2021-05-08 16:40:05.154429: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.2
https://ui.neptune.ai/chewzy/shopee-price-match/e/SHOPEE-200
>> Fold 4, Epoch 1/27 - Train Loss : 23.7380 LR : 2.2e-05: 100%|█| 1598/1598 [15
>> Generating embeddings: 100%|███████████████| 135/135 [02:33<00:00,  1.14s/it]
>> Finding Neighbours: 100%|██████████████████████| 9/9 [00:00<00:00, 11.26it/s]
>> Finding Best Thres: 100%|████████████████████| 36/36 [00:23<00:00,  1.51it/s]
>> Fold 4, Epoch 2/27 - Train Loss : 21.3622 LR : 4.5e-05: 100%|█| 1598/1598 [15
>> Generating embeddings: 100%|███████████████| 135/135 [02:34<00:00,  1.14s/it]
>> Finding Neighbours: 100%|██████████████████████| 9/9 [00:00<00:00, 11.53it/s]
>> Finding Best Thres: 100%|████████████████████| 36/36 [00:24<00:00,  1.48it/s]
>> Fold 4, Epoch 3/27 - Train Loss : 19.7058 LR : 6.7e-05: 100%|█| 1598/1598 [15
>> Generating embeddings: 100%|███████████████| 135/1