In [1]:
import numpy as np
import pandas as pd

from dataset import get_dataset
from trainer import Trainer
from inference import create_label, sum_label

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models

In [2]:
## hyperparameters
lr = 0.00001
batch_size = 64
num_epoch = 30
target = 'label'

In [3]:
## prepare dataset

#df_train = pd.read_csv('df_all.csv')
df_train = pd.read_csv('df_train.csv')
df_valid = pd.read_csv('df_valid.csv')
df_test = pd.read_csv('/opt/ml/input/data/eval/info.csv')

train_dataset, valid_dataset, test_dataset = get_dataset(df_train, df_valid, df_test, target=target)

In [4]:
target = train_dataset.target
weight = df_train[target].value_counts().sort_index().to_numpy()
print('count :', weight)
weight = 1. / weight
samples_weight = np.array([weight[t] for t in df_train[target]])
samples_weight = torch.from_numpy(samples_weight)

count : [2060 1175  670 2745 2510  965  412  235  134  549  502  193  412  235
  134  549  502  193]


In [5]:
from torch.utils.data.sampler import WeightedRandomSampler
sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight)//2)

In [6]:
## prepare model (resnet)

device = 'cuda'
model = models.resnet34(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [7]:
## prepare model (efficient_net)

from efficientnet_pytorch import EfficientNet

device = 'cuda'
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=len(train_dataset.classes))
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

Loaded pretrained weights for efficientnet-b0


In [8]:
## prepare Dataloader

dataloaders = {'train' : DataLoader(train_dataset, batch_size=batch_size, num_workers=2, drop_last=True, sampler=sampler),
               'valid' : DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=2, drop_last=False)}
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, drop_last=False)

In [9]:
## train

trainer = Trainer('/opt/ml/code/save')

In [12]:
trainer.train(model, dataloaders, criterion, optimizer, device, num_epochs=num_epoch, scheduler=None, save_name='effnet_test', sub_dir=target)

Epoch 1/30
----------


100%|██████████| 110/110 [00:43<00:00,  2.55it/s]


train Loss: 0.4358 Acc: 0.8341 F1: 0.8338286175682995


100%|██████████| 74/74 [00:14<00:00,  5.24it/s]


valid Loss: 0.4903 Acc: 0.8197 F1: 0.7724251655415304

Epoch 2/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.59it/s]


train Loss: 0.4224 Acc: 0.8419 F1: 0.8411750426207241


100%|██████████| 74/74 [00:14<00:00,  5.26it/s]


valid Loss: 0.4947 Acc: 0.8180 F1: 0.770853895555288

Epoch 3/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.4180 Acc: 0.8456 F1: 0.8459032159735026


100%|██████████| 74/74 [00:14<00:00,  5.22it/s]


valid Loss: 0.4979 Acc: 0.8163 F1: 0.7739057708170196

Epoch 4/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.59it/s]


train Loss: 0.4124 Acc: 0.8449 F1: 0.8452159072180412


100%|██████████| 74/74 [00:14<00:00,  5.26it/s]


valid Loss: 0.4926 Acc: 0.8182 F1: 0.7718690578841324

Epoch 5/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.59it/s]


train Loss: 0.3976 Acc: 0.8497 F1: 0.8490989166664105


100%|██████████| 74/74 [00:14<00:00,  5.28it/s]


valid Loss: 0.5022 Acc: 0.8161 F1: 0.7719212738058459

Epoch 6/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.56it/s]


train Loss: 0.3997 Acc: 0.8538 F1: 0.8534969611238985


100%|██████████| 74/74 [00:14<00:00,  5.24it/s]


valid Loss: 0.5034 Acc: 0.8169 F1: 0.7731041513263539

Epoch 7/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.4003 Acc: 0.8527 F1: 0.8516917767068123


100%|██████████| 74/74 [00:14<00:00,  5.19it/s]


valid Loss: 0.4913 Acc: 0.8207 F1: 0.7669707883517787

Epoch 8/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.56it/s]


train Loss: 0.3958 Acc: 0.8537 F1: 0.853069887404081


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.4924 Acc: 0.8216 F1: 0.7709017337406551

Epoch 9/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.59it/s]


train Loss: 0.3809 Acc: 0.8561 F1: 0.8552419674737138


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5022 Acc: 0.8188 F1: 0.7702054744689066

Epoch 10/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3846 Acc: 0.8543 F1: 0.85364131857178


100%|██████████| 74/74 [00:14<00:00,  5.23it/s]


valid Loss: 0.5002 Acc: 0.8214 F1: 0.770698027386916

Epoch 11/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.61it/s]


train Loss: 0.3900 Acc: 0.8578 F1: 0.858058969022456


100%|██████████| 74/74 [00:14<00:00,  5.24it/s]


valid Loss: 0.5098 Acc: 0.8201 F1: 0.7742982950586732

Epoch 12/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.60it/s]


train Loss: 0.3850 Acc: 0.8548 F1: 0.8544514774371431


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5060 Acc: 0.8188 F1: 0.7679278650135057

Epoch 13/30
----------


100%|██████████| 110/110 [00:43<00:00,  2.54it/s]


train Loss: 0.3740 Acc: 0.8670 F1: 0.8672674461384128


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5161 Acc: 0.8171 F1: 0.768061021557033

Epoch 14/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.59it/s]


train Loss: 0.3702 Acc: 0.8611 F1: 0.8602233821796215


100%|██████████| 74/74 [00:14<00:00,  5.21it/s]


valid Loss: 0.5138 Acc: 0.8205 F1: 0.772389959323887

Epoch 15/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.58it/s]


train Loss: 0.3757 Acc: 0.8602 F1: 0.8597263658772532


100%|██████████| 74/74 [00:14<00:00,  5.23it/s]


valid Loss: 0.5090 Acc: 0.8210 F1: 0.7678788180329553

Epoch 16/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.58it/s]


train Loss: 0.3572 Acc: 0.8659 F1: 0.8660533608784221


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5128 Acc: 0.8207 F1: 0.7710473204453867

Epoch 17/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3610 Acc: 0.8659 F1: 0.8652056909820781


100%|██████████| 74/74 [00:14<00:00,  5.24it/s]


valid Loss: 0.5166 Acc: 0.8190 F1: 0.7707650253757227

Epoch 18/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3497 Acc: 0.8675 F1: 0.8661377105096344


100%|██████████| 74/74 [00:14<00:00,  5.26it/s]


valid Loss: 0.5145 Acc: 0.8216 F1: 0.7730658961754945

Epoch 19/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.60it/s]


train Loss: 0.3605 Acc: 0.8656 F1: 0.864548777242042


100%|██████████| 74/74 [00:14<00:00,  5.21it/s]


valid Loss: 0.5124 Acc: 0.8218 F1: 0.7685135505377279

Epoch 20/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3372 Acc: 0.8750 F1: 0.8746488052845864


100%|██████████| 74/74 [00:14<00:00,  5.23it/s]


valid Loss: 0.5225 Acc: 0.8190 F1: 0.7704823762709768

Epoch 21/30
----------


100%|██████████| 110/110 [00:43<00:00,  2.54it/s]


train Loss: 0.3538 Acc: 0.8727 F1: 0.8732251111924532


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5250 Acc: 0.8203 F1: 0.7693932794924879

Epoch 22/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.61it/s]


train Loss: 0.3381 Acc: 0.8767 F1: 0.8767713211488378


100%|██████████| 74/74 [00:14<00:00,  5.24it/s]


valid Loss: 0.5298 Acc: 0.8199 F1: 0.7711278239040711

Epoch 23/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3336 Acc: 0.8800 F1: 0.8801761691835563


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5226 Acc: 0.8239 F1: 0.7725116283140382

Epoch 24/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.61it/s]


train Loss: 0.3351 Acc: 0.8776 F1: 0.8781903118757669


100%|██████████| 74/74 [00:14<00:00,  5.24it/s]


valid Loss: 0.5264 Acc: 0.8214 F1: 0.7727101025800074

Epoch 25/30
----------


100%|██████████| 110/110 [00:43<00:00,  2.53it/s]


train Loss: 0.3249 Acc: 0.8818 F1: 0.8807959728640239


100%|██████████| 74/74 [00:14<00:00,  5.25it/s]


valid Loss: 0.5272 Acc: 0.8258 F1: 0.7751788159348503

Epoch 26/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.56it/s]


train Loss: 0.3340 Acc: 0.8770 F1: 0.8764444708043445


100%|██████████| 74/74 [00:14<00:00,  5.27it/s]


valid Loss: 0.5330 Acc: 0.8252 F1: 0.7723092624795432

Epoch 27/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3067 Acc: 0.8895 F1: 0.88901464525243


100%|██████████| 74/74 [00:14<00:00,  5.22it/s]


valid Loss: 0.5417 Acc: 0.8184 F1: 0.7674335499129049

Epoch 28/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.57it/s]


train Loss: 0.3111 Acc: 0.8857 F1: 0.8855722647815895


100%|██████████| 74/74 [00:14<00:00,  5.27it/s]


valid Loss: 0.5358 Acc: 0.8229 F1: 0.7694593041154053

Epoch 29/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.60it/s]


train Loss: 0.3211 Acc: 0.8837 F1: 0.8834393998838623


100%|██████████| 74/74 [00:14<00:00,  5.27it/s]


valid Loss: 0.5415 Acc: 0.8229 F1: 0.7709474456889102

Epoch 30/30
----------


100%|██████████| 110/110 [00:42<00:00,  2.59it/s]


train Loss: 0.3065 Acc: 0.8915 F1: 0.89146416219421


100%|██████████| 74/74 [00:14<00:00,  5.23it/s]


valid Loss: 0.5396 Acc: 0.8239 F1: 0.7665696535123461

Training complete!


NameError: name 'best_acc' is not defined

In [11]:
## inference

model.load_state_dict(torch.load('/opt/ml/code/save/label/resnet18_58_008.pt'))
df_submit = create_label(model, test_dataloader, df_test.copy(), device)

# model.load_state_dict(torch.load('/opt/ml/code/save/age/6e5_9110.pt'))
# df_submit = create_label(model, test_dataloader, df_test.copy(), device, target='age')

# model.load_state_dict(torch.load('/opt/ml/code/save/mask/6e5_9987.pt'))
# df_submit = create_label(model, test_dataloader, df_submit, device, target='mask')

# model.fc = nn.Linear(model.fc.in_features, 2).cuda()
# model.load_state_dict(torch.load('/opt/ml/code/save/gender/6e5_9851.pt'))
# df_submit = create_label(model, test_dataloader, df_submit, device, target='gender')

# df_submit = sum_label(df_submit)

df_submit.to_csv('test.csv', index=False)

100%|██████████| 197/197 [00:37<00:00,  5.32it/s]

inference ans complete!





In [None]:
from utils import imshow
batch = next(iter(dataloaders['train']))
imshow(batch[0])
print(batch[1])

In [None]:
next(iter(dataloaders['train']))

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

model.eval()
y_true = []
y_pred = []
for inputs, labels in dataloaders['valid']:
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.tolist())
        y_pred.extend(preds.tolist())

mtx = confusion_matrix(y_true, y_pred)
print(mtx)


In [None]:
import matplotlib.pyplot as plt

label_ = list(range(18))

fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(mtx, cmap='Blues', vmax=100, vmin=-20)

ax.set_xticks(np.arange(len(label_)))
ax.set_yticks(np.arange(len(label_)))
# ... and label them with the respective list entries
ax.set_xticklabels(label_)
ax.set_yticklabels(label_)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
for i in range(len(label_)):
    for j in range(len(label_)):
        if mtx[i, j]:
            text = ax.text(j, i, mtx[i, j],
                           ha="center", va="center", color="w", size=10)
ax.set_xlabel('Prediction')
ax.set_ylabel('True Label')
ax.set_title("Confusion mtx")
fig.tight_layout()
plt.show()