In [1]:
import numpy as np
import pandas as pd

from dataset import get_dataset
from trainer import Trainer
from inference import create_label, sum_label

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models

In [None]:
## hyperparameters
lr = 0.00003
batch_size = 64
num_epoch = 30
target = 'label'

In [3]:
## prepare dataset

df_train = pd.read_csv('df_all.csv')
#df_train = pd.read_csv('df_train.csv')
df_valid = pd.read_csv('df_valid.csv')
df_test = pd.read_csv('/opt/ml/input/data/eval/info.csv')

train_dataset, valid_dataset, test_dataset = get_dataset(df_train, df_valid, df_test, target=target)

In [4]:
target = train_dataset.target
weight = df_train[target].value_counts().sort_index().to_numpy()
print('count :', weight)
weight = 1. / weight
samples_weight = np.array([weight[t] for t in df_train[target]])
samples_weight = torch.from_numpy(samples_weight)

count : [2745 1570  895 3660 3345 1285  549  314  179  732  669  257  549  314
  179  732  669  257]


In [5]:
from torch.utils.data.sampler import WeightedRandomSampler
sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight)//2)

In [6]:
## prepare model (resnet)

device = 'cuda'
model = models.resnet34(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [6]:
## prepare model (efficient_net)

from efficientnet_pytorch import EfficientNet

device = 'cuda'
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=len(train_dataset.classes))
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

Loaded pretrained weights for efficientnet-b0


In [7]:
## prepare Dataloader

dataloaders = {'train' : DataLoader(train_dataset, batch_size=batch_size, num_workers=3, drop_last=True, sampler=sampler),
               'valid' : DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=3, drop_last=False)}
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=3, drop_last=False)

In [8]:
## train

trainer = Trainer('/opt/ml/code/save')

In [None]:
# change lr manually:

# for g in optim.param_groups:
#     g['lr'] = 0.001

In [12]:
#model.load_state_dict(torch.load('/opt/ml/code/save/label/effnet_test018.pt'))

trainer.train(model, dataloaders, criterion, optimizer, device, num_epochs=num_epoch, scheduler=None, save_name='effnet_test', sub_dir=target)

Epoch 1/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.81it/s]


train Loss: 0.1771 Acc: 0.9374 F1: 0.9375016756797544


100%|██████████| 74/74 [00:09<00:00,  7.61it/s]


valid Loss: 0.0201 Acc: 0.9953 F1: 0.9960286941481069

Epoch 2/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.84it/s]


train Loss: 0.1656 Acc: 0.9412 F1: 0.9413592865880133


100%|██████████| 74/74 [00:09<00:00,  7.68it/s]


valid Loss: 0.0189 Acc: 0.9960 F1: 0.9962856179034192

Epoch 3/30
----------


100%|██████████| 147/147 [00:39<00:00,  3.74it/s]


train Loss: 0.1662 Acc: 0.9411 F1: 0.9410989647867074


100%|██████████| 74/74 [00:09<00:00,  7.67it/s]


valid Loss: 0.0189 Acc: 0.9966 F1: 0.9967373458800045

Epoch 4/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.82it/s]


train Loss: 0.1623 Acc: 0.9442 F1: 0.9442162826714972


100%|██████████| 74/74 [00:09<00:00,  7.66it/s]


valid Loss: 0.0212 Acc: 0.9951 F1: 0.9956419970254627

Epoch 5/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.79it/s]


train Loss: 0.1671 Acc: 0.9391 F1: 0.9389898263620895


100%|██████████| 74/74 [00:09<00:00,  7.57it/s]


valid Loss: 0.0177 Acc: 0.9960 F1: 0.9958063774507439

Epoch 6/30
----------


100%|██████████| 147/147 [00:39<00:00,  3.74it/s]


train Loss: 0.1632 Acc: 0.9399 F1: 0.939798809926105


100%|██████████| 74/74 [00:09<00:00,  7.61it/s]


valid Loss: 0.0198 Acc: 0.9945 F1: 0.9955746556248486

Epoch 7/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.82it/s]


train Loss: 0.1574 Acc: 0.9426 F1: 0.9426183504492206


100%|██████████| 74/74 [00:09<00:00,  7.66it/s]


valid Loss: 0.0158 Acc: 0.9968 F1: 0.9965806953679512

Epoch 8/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.83it/s]


train Loss: 0.1568 Acc: 0.9426 F1: 0.9423914913431299


100%|██████████| 74/74 [00:09<00:00,  7.59it/s]


valid Loss: 0.0172 Acc: 0.9968 F1: 0.996768974034466

Epoch 9/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.81it/s]


train Loss: 0.1654 Acc: 0.9404 F1: 0.9402585513478608


100%|██████████| 74/74 [00:09<00:00,  7.58it/s]


valid Loss: 0.0188 Acc: 0.9949 F1: 0.9958557001379786

Epoch 10/30
----------


100%|██████████| 147/147 [00:39<00:00,  3.74it/s]


train Loss: 0.1426 Acc: 0.9482 F1: 0.9480032862433352


100%|██████████| 74/74 [00:09<00:00,  7.60it/s]


valid Loss: 0.0134 Acc: 0.9975 F1: 0.9973957478371082

Epoch 11/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.80it/s]


train Loss: 0.1553 Acc: 0.9472 F1: 0.9472783810682525


100%|██████████| 74/74 [00:09<00:00,  7.65it/s]


valid Loss: 0.0143 Acc: 0.9970 F1: 0.9975384573720911

Epoch 12/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.79it/s]


train Loss: 0.1513 Acc: 0.9466 F1: 0.9464577348637506


100%|██████████| 74/74 [00:09<00:00,  7.62it/s]


valid Loss: 0.0131 Acc: 0.9970 F1: 0.9975456158705046

Epoch 13/30
----------


100%|██████████| 147/147 [00:39<00:00,  3.77it/s]


train Loss: 0.1478 Acc: 0.9479 F1: 0.9478772246211774


100%|██████████| 74/74 [00:09<00:00,  7.58it/s]


valid Loss: 0.0101 Acc: 0.9977 F1: 0.9979837564233487

Epoch 14/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.84it/s]


train Loss: 0.1509 Acc: 0.9457 F1: 0.945865075437547


100%|██████████| 74/74 [00:09<00:00,  7.67it/s]


valid Loss: 0.0121 Acc: 0.9979 F1: 0.998103825104232

Epoch 15/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.82it/s]


train Loss: 0.1388 Acc: 0.9513 F1: 0.9512386739177224


100%|██████████| 74/74 [00:09<00:00,  7.65it/s]


valid Loss: 0.0097 Acc: 0.9979 F1: 0.9980964385836776

Epoch 16/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.85it/s]


train Loss: 0.1568 Acc: 0.9430 F1: 0.94253162830043


100%|██████████| 74/74 [00:09<00:00,  7.57it/s]


valid Loss: 0.0105 Acc: 0.9977 F1: 0.9976421534400948

Epoch 17/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.83it/s]


train Loss: 0.1379 Acc: 0.9517 F1: 0.9515540623723461


100%|██████████| 74/74 [00:09<00:00,  7.65it/s]


valid Loss: 0.0108 Acc: 0.9979 F1: 0.9980945443355732

Epoch 18/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.78it/s]


train Loss: 0.1491 Acc: 0.9462 F1: 0.9461732777390418


100%|██████████| 74/74 [00:09<00:00,  7.59it/s]


valid Loss: 0.0112 Acc: 0.9972 F1: 0.9978409249833713

Epoch 19/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.80it/s]


train Loss: 0.1393 Acc: 0.9483 F1: 0.9479939332885805


100%|██████████| 74/74 [00:09<00:00,  7.61it/s]


valid Loss: 0.0092 Acc: 0.9983 F1: 0.9982783481728753

Epoch 20/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.83it/s]


train Loss: 0.1346 Acc: 0.9499 F1: 0.9499520850867715


100%|██████████| 74/74 [00:09<00:00,  7.61it/s]


valid Loss: 0.0092 Acc: 0.9983 F1: 0.9981110548687006

Epoch 21/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.86it/s]


train Loss: 0.1346 Acc: 0.9522 F1: 0.9519988269945464


100%|██████████| 74/74 [00:09<00:00,  7.58it/s]


valid Loss: 0.0090 Acc: 0.9985 F1: 0.9982932386060956

Epoch 22/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.79it/s]


train Loss: 0.1485 Acc: 0.9461 F1: 0.9462317336306683


100%|██████████| 74/74 [00:09<00:00,  7.62it/s]


valid Loss: 0.0112 Acc: 0.9981 F1: 0.9980572809053986

Epoch 23/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.80it/s]


train Loss: 0.1414 Acc: 0.9487 F1: 0.9485982468766425


100%|██████████| 74/74 [00:09<00:00,  7.61it/s]


valid Loss: 0.0075 Acc: 0.9985 F1: 0.9983419832558084

Epoch 24/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.79it/s]


train Loss: 0.1350 Acc: 0.9523 F1: 0.951889503413923


100%|██████████| 74/74 [00:09<00:00,  7.64it/s]


valid Loss: 0.0075 Acc: 0.9981 F1: 0.9982147595147102

Epoch 25/30
----------


100%|██████████| 147/147 [00:39<00:00,  3.76it/s]


train Loss: 0.1349 Acc: 0.9476 F1: 0.9476926881185371


100%|██████████| 74/74 [00:09<00:00,  7.63it/s]


valid Loss: 0.0084 Acc: 0.9981 F1: 0.9982221391124537

Epoch 26/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.84it/s]


train Loss: 0.1321 Acc: 0.9524 F1: 0.9523314422517899


100%|██████████| 74/74 [00:09<00:00,  7.67it/s]


valid Loss: 0.0077 Acc: 0.9987 F1: 0.9984619400973325

Epoch 27/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.86it/s]


train Loss: 0.1384 Acc: 0.9498 F1: 0.9495692997789682


100%|██████████| 74/74 [00:09<00:00,  7.60it/s]


valid Loss: 0.0071 Acc: 0.9989 F1: 0.9985329221769342

Epoch 28/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.79it/s]


train Loss: 0.1274 Acc: 0.9527 F1: 0.9526224274975862


100%|██████████| 74/74 [00:09<00:00,  7.63it/s]


valid Loss: 0.0080 Acc: 0.9983 F1: 0.9983337636179004

Epoch 29/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.81it/s]


train Loss: 0.1381 Acc: 0.9505 F1: 0.95028499536375


100%|██████████| 74/74 [00:09<00:00,  7.66it/s]


valid Loss: 0.0066 Acc: 0.9981 F1: 0.9983903428777116

Epoch 30/30
----------


100%|██████████| 147/147 [00:38<00:00,  3.80it/s]


train Loss: 0.1320 Acc: 0.9528 F1: 0.952827951213968


100%|██████████| 74/74 [00:09<00:00,  7.70it/s]


valid Loss: 0.0068 Acc: 0.9985 F1: 0.9985736080432789

Training complete!


NameError: name 'best_acc' is not defined

In [13]:
## inference

model.load_state_dict(torch.load('/opt/ml/code/save/label/effnet_test028.pt'))
df_submit = create_label(model, test_dataloader, df_test.copy(), device)

# model.load_state_dict(torch.load('/opt/ml/code/save/age/6e5_9110.pt'))
# df_submit = create_label(model, test_dataloader, df_test.copy(), device, target='age')

# model.load_state_dict(torch.load('/opt/ml/code/save/mask/6e5_9987.pt'))
# df_submit = create_label(model, test_dataloader, df_submit, device, target='mask')

# model.fc = nn.Linear(model.fc.in_features, 2).cuda()
# model.load_state_dict(torch.load('/opt/ml/code/save/gender/6e5_9851.pt'))
# df_submit = create_label(model, test_dataloader, df_submit, device, target='gender')

# df_submit = sum_label(df_submit)

df_submit.to_csv('submit.csv', index=False)

100%|██████████| 197/197 [00:24<00:00,  7.92it/s]

inference ans complete!





In [None]:
from utils import imshow
batch = next(iter(dataloaders['train']))
imshow(batch[0])
print(batch[1])

In [None]:
next(iter(dataloaders['train']))

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

model.eval()
y_true = []
y_pred = []
for inputs, labels in dataloaders['valid']:
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.tolist())
        y_pred.extend(preds.tolist())

mtx = confusion_matrix(y_true, y_pred)
print(mtx)


In [None]:
import matplotlib.pyplot as plt

label_ = list(range(18))

fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(mtx, cmap='Blues', vmax=100, vmin=-20)

ax.set_xticks(np.arange(len(label_)))
ax.set_yticks(np.arange(len(label_)))
# ... and label them with the respective list entries
ax.set_xticklabels(label_)
ax.set_yticklabels(label_)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
for i in range(len(label_)):
    for j in range(len(label_)):
        if mtx[i, j]:
            text = ax.text(j, i, mtx[i, j],
                           ha="center", va="center", color="w", size=10)
ax.set_xlabel('Prediction')
ax.set_ylabel('True Label')
ax.set_title("Confusion mtx")
fig.tight_layout()
plt.show()