# Exploratory Data Analysis

### Goal
- Play with model architecture and hyperparameterization (train/val split)
- Train the best model on all training data
- Generate predictions for test dataset

### Discussion
- due to time/gpu limitations the f1-score might be limited, a ton of improvements can be tested as Ensemble models, Data Augmetations, Transformer backbone pre-trainig, SWA...

### Notes


### Imports

In [1]:
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split

from src.data import NuweDataset
from src.model import focalnet_tiny_srf
from src.train import train_epoch, valid_epoch, generate_test_results
from src.loss import F1_Loss

### Data

In [2]:
DATA_PATH = 'data/raw/'

In [3]:
train = pd.read_csv(DATA_PATH + 'train.csv')
test = pd.read_csv(DATA_PATH +'test.csv')

print(f'train {train.shape} test {test.shape}')

train (1714, 5) test (635, 4)


In [4]:
device = torch.device('cuda') 
num_workers = 0
batch_size = 16

In [5]:
full_train_dataset = NuweDataset(data = train, directory = DATA_PATH)
full_train_dataloader = DataLoader(full_train_dataset, 
                          batch_size  = batch_size, 
                          shuffle = False, 
                          num_workers = num_workers,
                          pin_memory = True)

test_dataset = NuweDataset(data = test, directory = DATA_PATH, train = False)
test_dataloader = DataLoader(test_dataset, 
                          batch_size = batch_size, 
                          shuffle = False, 
                          num_workers = num_workers,
                          pin_memory = True)

In [6]:
train, val = train_test_split(train, test_size=0.33, random_state=42, stratify=train.label)
print(f'{train.shape} {val.shape}')

(1148, 5) (566, 5)


In [7]:
train_dataset = NuweDataset(data = train, directory = DATA_PATH)
val_dataset = NuweDataset(data = val, directory = DATA_PATH)


train_dataloader = DataLoader(train_dataset, 
                          batch_size  = batch_size, 
                          shuffle = False, 
                          num_workers = num_workers,
                          pin_memory = True)

val_dataloader = DataLoader(val_dataset, 
                          batch_size = batch_size, 
                          shuffle = False, 
                          num_workers = num_workers,
                          pin_memory = True)

### Modeling

In [8]:
class NuweModel(nn.Module):
    
    def __init__(self):
        
        super(NuweModel, self).__init__()
        
        self.image_backbone = focalnet_tiny_srf(pretrained=True)
        self.image_backbone.head = nn.Identity()
        self.image_proj = nn.Linear(768, 30)
        
        self.year = nn.Embedding(16, 5)
        
        self.neighbors_proj = nn.Linear(3, 5)
        
        self.head = nn.Sequential(
            nn.Linear(40,40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40,10),
            nn.BatchNorm1d(10),
            nn.ReLU(),
            nn.Linear(10,3)
        )
        
    def forward(self, image, year, neighbors_ctx):
        
        x_img = self.image_backbone(image)
        x_img = self.image_proj(x_img)
        
        x_year = self.year(year)
        x_neighbors_ctx = self.neighbors_proj(neighbors_ctx)
        
        x = torch.cat([x_img, x_year, x_neighbors_ctx], dim=1)
        
        x = self.head(x)
        
        return x

model = NuweModel()
model = model.to(device)
model

NuweModel(
  (image_backbone): FocalNet(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0): BasicLayer(
        dim=96, input_resolution=(56, 56), depth=2
        (blocks): ModuleList(
          (0): FocalNetBlock(
            dim=96, input_resolution=(56, 56), mlp_ratio=4.0
            (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (modulation): FocalModulation(
              dim=96
              (f): Linear(in_features=96, out_features=195, bias=True)
              (h): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
              (act): GELU(approximate='none')
              (proj): Linear(in_features=96, out_features=96, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (focal_layers): ModuleList(
                (0): Sequenti

### Training

In [9]:
#criterion = F1_Loss()
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(
            model.parameters(),
            lr = 3e-6
        )
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15000, gamma=0.5) # Not using
epochs = 100

In [10]:
with tqdm(total=epochs,leave=False) as pbar:
    for epoch_i in range(0, epochs):

        train_f1, train_acc, train_f1_na, avg_train_loss = train_epoch(model, train_dataloader, device, criterion, optimizer, scheduler=scheduler)
        valid_f1, valid_acc, valid_f1_na, avg_valid_loss = valid_epoch(model, val_dataloader, device, criterion)
        
        print(f'TRAIN Epoch: {epoch_i} {train_f1:.3f} {train_acc:.3f} {train_f1_na} {avg_train_loss:.3f}')
        print(f'VAL Epoch: {epoch_i} {valid_f1:.3f} {valid_acc:.3f} {valid_f1_na} {avg_valid_loss:.3f}')  
        
        pbar.update(1)

  0%|          | 0/100 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'artifacts/model2.pt')

### Final train on all train data

In [11]:
model = NuweModel()
model = model.to(device)

#criterion = F1_Loss()
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(
            model.parameters(),
            lr = 3e-6
        )
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15000, gamma=0.5) # Not being used
epochs = 100

In [12]:
with tqdm(total=epochs,leave=False) as pbar:
    for epoch_i in range(0, epochs):

        train_f1, train_acc, train_f1_na, avg_train_loss = train_epoch(model, full_train_dataloader, device, criterion, optimizer, scheduler=scheduler)
        print(f'TRAIN Epoch: {epoch_i} {train_f1:.3f} {train_acc:.3f} {train_f1_na} {avg_train_loss:.3f}')
        
        pbar.update(1)

  0%|          | 0/100 [00:00<?, ?it/s]

TRAIN Epoch: 0 0.316 0.440 [0.61182033 0.19897084 0.1369863 ] 1.063
TRAIN Epoch: 1 0.452 0.545 [0.69189714 0.38528897 0.27889447] 0.993
TRAIN Epoch: 2 0.517 0.593 [0.7235732  0.44680851 0.37926973] 0.951
TRAIN Epoch: 3 0.572 0.638 [0.75642965 0.50619469 0.45454545] 0.918
TRAIN Epoch: 4 0.613 0.669 [0.77484787 0.55555556 0.50873362] 0.896
TRAIN Epoch: 5 0.637 0.686 [0.77981651 0.58536585 0.54448017] 0.878
TRAIN Epoch: 6 0.652 0.701 [0.79532758 0.6015625  0.55966209] 0.860
TRAIN Epoch: 7 0.677 0.718 [0.79573171 0.6352459  0.60082305] 0.846
TRAIN Epoch: 8 0.695 0.734 [0.81256332 0.66252588 0.60968074] 0.832
TRAIN Epoch: 9 0.721 0.751 [0.81706064 0.69344609 0.65213082] 0.824
TRAIN Epoch: 10 0.734 0.765 [0.82829318 0.70212766 0.67130089] 0.807
TRAIN Epoch: 11 0.740 0.771 [0.83367769 0.70464135 0.68172888] 0.802
TRAIN Epoch: 12 0.754 0.782 [0.84188699 0.72765957 0.69387755] 0.790
TRAIN Epoch: 13 0.768 0.792 [0.8496124  0.75       0.70395371] 0.783
TRAIN Epoch: 14 0.776 0.797 [0.84731631 0.75

KeyboardInterrupt: 

In [13]:
torch.save(model.state_dict(), 'artifacts/model3.pt')

### Generate Test Predictions


In [14]:
predictions = generate_test_results(model, test_dataloader, device)
predictions.head()

69      2
469     1
6       0
351     2
1001    2
dtype: int64

In [15]:
# Set submission format
df = pd.DataFrame(predictions)
df.columns = ['target']
df.head()

Unnamed: 0,target
69,2
469,1
6,0
351,2
1001,2


In [16]:
# Save
df.to_json('predictions.json', orient='columns', index=True)