In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)
     
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Tue Jul  4 08:45:02 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.41.03              Driver Version: 530.41.03    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3070         Off| 00000000:01:00.0 Off |                  N/A |
|  0%   50C    P8               17W / 220W|      6MiB /  8192MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
need_to_install = False

# Install and Import

In [6]:
if need_to_install:
  !pip install wandb==0.13.3
  !pip install transformers
  !pip install 'git+https://github.com/katsura-jp/pytorch-cosine-annealing-with-warmup'

In [7]:
import sys
code_path = 'code/'

sys.path.insert(0, code_path)

In [11]:
import os
import warnings
warnings.filterwarnings("ignore")

import wandb
import torch
import pandas as pd

import matplotlib.pyplot as plt

from GISLR_utils.utils import get_logger, class2dict
from GISLR_utils.pipeline import train_loop, full_train_loop

# Config

In [6]:
# ====================================================
# Config
# ====================================================
class CFG:
    ####################
    # MAIN
    ####################
    wandb = False
    wandb_project = 'sign_translate20230704'
    competition = 'sign_translate20230704'
    wb_group = None
    exp_name = 'exp1'
    base_path = '/home/jeff/project/poc-project/sign_translate/holistic/basepath/'

    seed = 333

    train = True
    LOOP = False
    full_train = True
    debug = False

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ####################
    # DATA
    ####################
    dataset = 'img_80_mixup' 

    num_workers = 12
    train_bs = 64
    valid_bs = 256 
    n_fold = 8
    trn_fold = [0] 
    fold_group = False
    
    ####################
    # TRAIN
    ####################
    
    early_stopping = 40  # None for dont use
    use_early_break = None # None for dont use
    
    FULL_TRAIN = False 
    apex = True
    
    eval_after = 0
    eval_every = 1
    eval_always_after = 1

    finetune = False
    finetune_path = 'PATH/TO/CKPT'
    finetune_fold = 0
    finetune_sched_opt = True
    finetune_epoch = 2
    finetune_change_seed = True
    
    # Scheduler step 1

    scheduler = 'onecycle'
    onecycle_start = 0.1 
    onecycle_m = 1. 
    num_cycles = 0.5 
    num_warmup_steps = 333

    # Loop step 1

    epochs = 180 

    # LR, optimizer step 1

    eps = 1e-8 
    betas = (0.9, 0.999)
    weight_decay = 0.01
    gradient_accumulation_steps = 1 
    optimizer = "RAdam" 
    
    data_dir = base_path + '/asl_signs/'
    BREAK_EPOCH = 100000
    fill_nan_value = 0.
    
    new_size= (160, 80, 3)
    encoder = 'rexnet_100'
    
    COLAB = False

if False:
    os.makedirs(CFG.base_path + 'results/', exist_ok=True)
    os.makedirs(CFG.base_path + 'results/' + CFG.exp_name, exist_ok=True)
    os.makedirs(CFG.base_path + 'results/' + CFG.exp_name + '/checkpoints', exist_ok=True)
    CFG.save_path = CFG.base_path + 'results/' + CFG.exp_name + '/checkpoints/'
    with open(CFG.base_path + 'results/' + CFG.exp_name + '/CFG.txt', 'w') as f:
        for key, value in CFG.__dict__.items():
            f.write('%s:%s\n' % (key, value))
     

# Load and Prepare Data

In [7]:
import json
def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dic = json.load(f)
    return dic

train = pd.read_csv(CFG.base_path + 'asl_signs/train.csv')
label_index = read_dict(f"{CFG.base_path}/asl_signs/sign_to_prediction_index_map.json")
index_label = dict([(label_index[key], key) for key in label_index])
train["label"] = train["sign"].map(lambda sign: label_index[sign])
print(train.shape)
display(train.head())

import numpy as np
from sklearn.model_selection import StratifiedGroupKFold, StratifiedKFold

if CFG.fold_group:
    print(f'FOLD SPLIT USING GROUPS')
    split = StratifiedGroupKFold(CFG.n_fold, random_state=42, shuffle=True) #rs = 42

    for k, (_, test_idx) in enumerate(split.split(train, train.sign, groups=train.participant_id)):
        train.loc[test_idx, 'fold'] = k
else:
    print(f'FOLD SPLIT ONLY ON SIGN')
    split = StratifiedKFold(CFG.n_fold, random_state=42, shuffle=True) #rs = 42

    for k, (_, test_idx) in enumerate(split.split(train, train.sign)):
        train.loc[test_idx, 'fold'] = k

train.fold = train.fold.astype(int)
display(train.groupby('fold').size())
     

(94477, 5)


Unnamed: 0,path,participant_id,sequence_id,sign,label
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow,25
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait,232
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud,48
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird,23
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie,164


FOLD SPLIT ONLY ON SIGN


fold
0    11810
1    11810
2    11810
3    11810
4    11810
5    11809
6    11809
7    11809
dtype: int64

# Training

In [None]:
import optuna

def objective(trial):
    """
    Function to optimize model params
    :param trial:(optuna instance) iteration
    :return:(float) metric of model iteration
    """
    
    # CFG.new_size = (128, 120, 3)
    param = {
        'seed': np.random.randint(20, 10000), #6374,
        'aug_prob': trial.suggest_float('aug_prob', 0.15, 0.25), # do_random_affine prob
        'invert_prob': trial.suggest_float('invert_prob', 0.25, 0.32), # it flips all points (hands, lips, pose)
        'scale_prob': trial.suggest_float('scale_prob', 0.17, 0.3), # prob to rescale some parts (e.g. one hand or hand and lips)
        'lr': trial.suggest_float('lr', 2e-3, 2.8e-3),  # LR
        'train_bs': trial.suggest_categorical('train_bs', [32, 64]),  # BS
        'drop_rate': 0.2, #trial.suggest_float('drop_rate', 0.18, 0.22), # model dropout
        'epochs': trial.suggest_int('epochs', 100, 210), # epochs
        'img_masking': 0.98, # prob to use torchaudio masks
        'model': 'img_v0', # model name
        'dataset': 'img_80_mixup', # img_80_onehand for one hand new_size=(T, 64, 3), img_80_mixup for 2 hands new_size=(T, 80, 3)
        'freq_m': trial.suggest_int('freq_m', 66, 80), # mask for time axis  (yes name freq but mask time :) )
        'time_m': trial.suggest_int('time_m', 3, 12), # mask some points. the max possible masked points
        'use_loss_wgt': True, # use wieghted loss
        'pw_bad': 1.45, # power to bad predicted classes
        'pw_com': 0.81, # power to classes which has common classes (e.g. dad, grandpa, grandma)      
        "label_smooth": trial.suggest_float('label_smooth', 0.50, 0.65), # loss label smoothing
        'shift_prob': trial.suggest_float('shift_prob', 0.12, 0.29),  # shift random parts to random value (one or few)
        'mixup_prob': trial.suggest_float('mixup_prob', 0.3, 0.42),  # mixup prob
        'zero_prob': 0., # pixel dropout prob. It didn't work
        'rotate_prob': trial.suggest_float('rotate_prob', 0.18, 0.26),  # rotate one or few parts
        'replace_prob': trial.suggest_float('replace_prob', 0.08, 0.17),  # replace one or two parts from another element with same class
        'deep_supervision': False, # DSV
        'interpol_prob': trial.suggest_float('interpol_prob', 0.15, 0.4), # interploation as in Carno' code
        'normalize': True, # mean std normalization
        'tree_rot_prob': trial.suggest_float('tree_rot_prob', 0.25, 0.55), # finger tree augmentation from Carno's Code
        'interp_nearest_random': trial.suggest_float('interp_nearest_random', 0.35, 0.5),
        'lookahed_k':trial.suggest_int('lookahed_k', 2, 7),
        'lookahed_alpha':trial.suggest_float('lookahed_alpha', 0.3, 0.6),
    }
    print(param)
    CFG.tree_rot_prob = param['tree_rot_prob']
    CFG.interpol_prob = param['interpol_prob']
    CFG.normalize = param['normalize']
    CFG.rotate_prob = param['rotate_prob']    
    CFG.zero_prob = param['zero_prob']
    CFG.replace_prob = param['replace_prob']
    
    CFG.deep_supervision = param['deep_supervision']
    CFG.mixup_prob = param['mixup_prob']
    CFG.shift_prob = param['shift_prob']
    CFG.scale_prob = param["scale_prob"]
    CFG.use_loss_wgt = param['use_loss_wgt'] #False
    CFG.pw_bad = param['pw_bad']
    CFG.pw_com = param['pw_com']
    CFG.img_masking = param["img_masking"]
    CFG.freq_m = param["freq_m"] 
    CFG.time_m = param["time_m"] 
    CFG.scheduler = 'onecycle'  
    CFG.new_size = (160, 80, 3) 
    CFG.loss = 'ce' 
    CFG.alpha = 0.3 
    CFG.model = param['model']
    #CFG.encoder = 'rexnet_100'  # if model == 'timm'
    CFG.aug_prob = param['aug_prob']
    CFG.invert_prob = param['invert_prob']
    CFG.train_bs = param['train_bs']
    CFG.drop_rate = param['drop_rate']
    CFG.lr = param['lr']
    CFG.epochs = param['epochs']
    CFG.num_cycles = 0.5 # param['num_cycles']
    CFG.dataset = param['dataset']
    CFG.seed = param['seed']
    CFG.optimizer = 'Lookahead_RAdam'  # param['optimizer']
    CFG.motion = False
    CFG.use_swa = False
    CFG.swa_start = 0    
    CFG.label_smooth = param['label_smooth'] 
    
    CFG.trn_fold = [0]
    fold_ = CFG.trn_fold[0]
    
    CFG.exp_name = f'EXP_NAME_f{fold_}_{trial.number}_bs{CFG.train_bs}_sm{CFG.label_smooth:.2f}_norm{int(CFG.normalize)}_inter{CFG.interpol_prob:.2f}_time{CFG.time_m}_freq{CFG.freq_m}_repl{CFG.replace_prob:.2f}_lr{CFG.lr:8f}_mixup{CFG.mixup_prob:.2f}_aug{CFG.aug_prob:.2f}_rot{CFG.rotate_prob:.2f}_zero{CFG.zero_prob:.2f}_shift{CFG.shift_prob:.2f}_scale{CFG.scale_prob:.2f}_inver{CFG.invert_prob:.2f}_ep{CFG.epochs}'
    
    os.makedirs(CFG.base_path + 'results/', exist_ok=True)
    os.makedirs(CFG.base_path + 'results/' + CFG.exp_name, exist_ok=True)
    os.makedirs(CFG.base_path + 'results/' + CFG.exp_name + '/checkpoints', exist_ok=True)
    CFG.save_path = CFG.base_path + 'results/' + CFG.exp_name + '/checkpoints/'
    with open(CFG.base_path + 'results/' + CFG.exp_name + '/CFG.txt', 'w') as f:
        for key, value in CFG.__dict__.items():
            f.write('%s:%s\n' % (key, value))

    wandb.init(project='PROJECT_NAME',
            name=CFG.exp_name,
            config=class2dict(CFG),
            group=CFG.wb_group,
            job_type="train",
            dir=CFG.base_path)
    
    LOGGER = get_logger(CFG.base_path + 'results/' + CFG.exp_name + f'/train_f{fold_}')
    acc, topk = train_loop(CFG, train, fold_, LOGGER)
    print(f'FOR PARAMS: {param}')
    print(f'Accuracy: {acc}')
    print(f'TOPK: {topk}')
    print()
    return acc

print('Starting train parameters optimization process.\n'
          f'With main metric Accuracy')
optuna.logging.disable_default_handler()
direct = 'maximize' 
study = optuna.create_study(direction=direct)
study.optimize(objective, n_trials=16)

model_params = study.best_trial.params
print('Best params:')
print(model_params, '\n')

Starting train parameters optimization process.
With main metric Accuracy
{'seed': 8978, 'aug_prob': 0.23083607583383073, 'invert_prob': 0.2967532728552612, 'scale_prob': 0.29775959440009914, 'lr': 0.0024301306246846863, 'train_bs': 64, 'drop_rate': 0.2, 'epochs': 187, 'img_masking': 0.98, 'model': 'img_v0', 'dataset': 'img_80_mixup', 'freq_m': 73, 'time_m': 5, 'use_loss_wgt': True, 'pw_bad': 1.45, 'pw_com': 0.81, 'label_smooth': 0.5372506781294002, 'shift_prob': 0.23963042471387047, 'mixup_prob': 0.3342119014191915, 'zero_prob': 0.0, 'rotate_prob': 0.24702122182941125, 'replace_prob': 0.09816106067857577, 'deep_supervision': False, 'interpol_prob': 0.3559747456629349, 'normalize': True, 'tree_rot_prob': 0.5}


[34m[1mwandb[0m: Currently logged in as: [33mforrato[0m ([33mislr[0m). Use [1m`wandb login --relogin`[0m to force relogin




> SEEDING DONE
Size of train dataset: 82667
Size of valid dataset: 11810
Loaded pretrained weights for efficientnet-b0
Epoch 1/187 | Fold 0


Ep.1 Train :   0%|          | 0/1291 [00:00<?, ?it/s]

Ep.1 Eval :   0%|          | 0/47 [00:00<?, ?it/s]

Epoch 1 - avg_train_loss: 13.12667  avg_val_loss: 13.03254  time: 229s
Epoch 1 - Accuracy: 0.01270 | TopK3: 0.03446
||||||||| Best Score Updated 0.00000 -->> 0.01270 | Model Saved |||||||||
Best params: {'fold': 0, 'epoch': 1, 'accuracy': 0.012701100762066046, 'topk3': 0.0344623200677392}
|||| Best TOPK Score Updated 0.00000 -->> 0.03446 | Model Saved |||||
Best params: {'fold': 0, 'epoch': 1, 'accuracy': 0.012701100762066046, 'topk3': 0.0344623200677392}


ACCURACY SCORE: 0.012701
TOPK SCORE: 0.034462
BASE BETTER
Epoch 2/187 | Fold 0


Ep.2 Train :   0%|          | 0/1291 [00:00<?, ?it/s]