In [4]:
import torch
import random
import pandas as pd
import numpy as np
import os

# 시드값 고정
seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
raw_data = pd.read_csv("./dataset/sign_mnist_train.csv")
test_data = pd.read_csv("./dataset/sign_mnist_test.csv")
# raw_data = pd.read_csv("./drive/MyDrive/Colab Notebooks/classification/data/sign_mnist_train.csv")

In [7]:
raw_data = raw_data[:2000]

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=50)

labels = raw_data.iloc[:, 0]
features = raw_data

val_data = pd.DataFrame()
train_data = pd.DataFrame()

for i, (_, label_index) in enumerate( kfold.split(features, labels)):
    pic_fold_data = features.iloc[label_index]

    if not i:
        val_data = pic_fold_data.reset_index(drop=True)
        
    else:
        train_data = pd.concat([train_data, pic_fold_data], ignore_index=True)

In [8]:
train_data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,187,188,188,187,187,186,187,188,187,...,202,201,200,199,198,199,198,195,194,195
1,13,164,167,170,172,176,179,180,184,185,...,92,105,105,108,133,163,157,163,164,179
2,16,161,168,172,173,178,184,189,193,196,...,76,74,68,62,53,55,48,238,255,255
3,3,169,174,176,180,183,185,187,188,190,...,119,118,123,120,118,114,94,74,61,57
4,10,0,25,38,40,41,46,50,56,69,...,129,85,60,64,72,70,67,65,62,64


In [9]:
transforms_for_train = A.Compose([

                ])

transforms_for_test = A.Compose([

                ])

In [10]:
class Sign(Dataset):
    def __init__(self, data = None, is_train = False, transforms=None):
        self.label_data = data['label']
        self.pic_data = data.drop(labels = ["label"],axis = 1)
        self.pic_data = self.pic_data.values.reshape(-1,1, 28,28)
        self.pic_data = self.pic_data/255
        self.pic_data = torch.from_numpy(self.pic_data).float()
        self.pic_data = self.pic_data.expand(-1,3,-1,-1)
        self.is_train = is_train
        self.transforms = transforms

    def __len__(self):
        return len(self.label_data)

    def __getitem__(self, item):
        
        labels = self.label_data[item]
        images = self.pic_data[item]
        images = images.numpy()

        if self.is_train:
            images = self.transforms(image = images)['image']
        images = torch.from_numpy(images)
        # images = images/255

        return images, labels

In [11]:
train_dataset = Sign(data=train_data, is_train=True, transforms=transforms_for_train)
val_dataset = Sign(data=val_data, is_train=False)
test_dataset = Sign(data=test_data, is_train=False)

In [12]:
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=80,
    shuffle=True,
    num_workers=0,  # 로컬에서 작동한다면 4, 8 로 바꿔서 멀티프로세싱해도 됩니다
    drop_last=True,
)

val_loader = DataLoader(
    dataset=val_dataset,
    batch_size=80,
    shuffle=True,
    num_workers=0,
    drop_last=True,
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=80,
    shuffle=True,
    num_workers=0,
    drop_last=True,
)

In [13]:
NUM_EPOCHS = 50
VAL_EVERY = 10
SAVE_DIR = './Save/'

In [14]:
def save_model(model, file_name='best_model_Adam.pt'):
    output_path = os.path.join(SAVE_DIR, file_name)
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)
    torch.save(model, output_path)

def validation(epoch, model, data_loader, criterion):
    print(f'Start validation #{epoch:2d}')
    with torch.no_grad():
        all_preds = []
        all_labels = []
        for step, (images, labels) in tqdm(enumerate(data_loader), total=len(data_loader)):
            images, labels = images.cuda(), labels.cuda()
            model = model.cuda()

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        f1 = f1_score(all_labels, all_preds, average='micro')
        print(f'F1 Score: {f1}')
    return f1

def train(model, data_loader, criterion, optimizer):
    print(f'Start training..')
    best_acc = 0

    for epoch in range(NUM_EPOCHS):
        model.train()
        for step, (images, labels) in enumerate(tqdm(data_loader)):
            images, labels = images.cuda(), labels.cuda()
            model = model.cuda()

            outputs = model(images)

            # loss를 계산합니다.
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # step 주기에 따라 loss를 출력합니다.
            if (step + 1) % 400 == 0:
                print(
                    f'Epoch [{epoch+1}/{NUM_EPOCHS}], '
                    f'Step [{step+1}/{len(train_loader)}], '
                    f'Loss: {round(loss.item(),4)}'
                )
        if (epoch + 1) % VAL_EVERY == 0:
            acc = validation(epoch + 1, model, val_loader, criterion)
            if best_acc < acc:
                print(f"Best performance at epoch: {epoch + 1}, {best_acc:.4f} -> {acc:.4f}")
                best_acc = acc
                save_model(model)

In [15]:
model = timm.create_model('resnet50', pretrained=True, num_classes=26)

criterion = nn.CrossEntropyLoss()

LR = 1e-3
optimizer = optim.AdamW(params=model.parameters(), lr=LR)

train(model, train_loader, criterion, optimizer)

Start training..


100%|██████████| 20/20 [00:05<00:00,  3.76it/s]
100%|██████████| 20/20 [00:03<00:00,  5.85it/s]
100%|██████████| 20/20 [00:03<00:00,  5.64it/s]
100%|██████████| 20/20 [00:04<00:00,  4.59it/s]
100%|██████████| 20/20 [00:07<00:00,  2.59it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.94it/s]
100%|██████████| 20/20 [00:10<00:00,  1.97it/s]
100%|██████████| 20/20 [00:10<00:00,  1.97it/s]


Start validation #10


100%|██████████| 5/5 [00:00<00:00,  5.10it/s]


F1 Score: 0.925
Best performance at epoch: 10, 0.0000 -> 0.9250


100%|██████████| 20/20 [00:10<00:00,  1.90it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:10<00:00,  1.91it/s]
100%|██████████| 20/20 [00:10<00:00,  1.97it/s]
100%|██████████| 20/20 [00:10<00:00,  1.97it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]


Start validation #20


100%|██████████| 5/5 [00:00<00:00,  5.39it/s]


F1 Score: 0.94
Best performance at epoch: 20, 0.9250 -> 0.9400


100%|██████████| 20/20 [00:10<00:00,  1.97it/s]
100%|██████████| 20/20 [00:10<00:00,  1.94it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.98it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:10<00:00,  1.91it/s]


Start validation #30


100%|██████████| 5/5 [00:00<00:00,  5.50it/s]


F1 Score: 0.96
Best performance at epoch: 30, 0.9400 -> 0.9600


100%|██████████| 20/20 [00:10<00:00,  1.97it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]
100%|██████████| 20/20 [00:10<00:00,  1.90it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]


Start validation #40


100%|██████████| 5/5 [00:00<00:00,  5.26it/s]


F1 Score: 0.9525


100%|██████████| 20/20 [00:10<00:00,  1.90it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.90it/s]
100%|██████████| 20/20 [00:10<00:00,  1.88it/s]
100%|██████████| 20/20 [00:10<00:00,  1.89it/s]
100%|██████████| 20/20 [00:10<00:00,  1.93it/s]


Start validation #50


100%|██████████| 5/5 [00:00<00:00,  5.19it/s]


F1 Score: 0.9675
Best performance at epoch: 50, 0.9600 -> 0.9675


In [16]:
def test(model, test_loader, criterion):
    model_path = './Save/best_model_Adam.pt'
    model = torch.load(model_path)
    model.cuda()

    print(f'Start inference')
    model.eval()

    all_preds = []
    all_labels =[]

    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        count = sum(1 for pred, label in zip(preds, labels) if pred==label)

    acc = count/len(preds)
    f1 = f1_score(all_preds, all_labels, average='micro')
    return acc, f1


In [17]:
criterion = nn.CrossEntropyLoss()
acc, f1 = test(model, test_loader, criterion)

print(f'acc : {acc:.4f}')
print(f'f1 : {f1:.4f}')

Start inference


100%|██████████| 89/89 [00:15<00:00,  5.74it/s]


acc : 0.9500
f1 : 0.9265
