In [1]:
import torch # 파이토치
import random
import pandas as pd
import numpy as np
import os

# 시드값 고정
seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
raw_data = pd.read_csv("./dataset/sign_mnist_train.csv")
test_data = pd.read_csv("./dataset/sign_mnist_test.csv")
# raw_data = pd.read_csv("./drive/MyDrive/Colab Notebooks/classification/data/sign_mnist_train.csv")

In [5]:
raw_data = raw_data[:2000]
train_data, val_data = train_test_split(raw_data,test_size=0.2, random_state=50)
train_data = train_data.reset_index(drop=True)
val_data = val_data.reset_index(drop=True)
print(len(train_data))
print(len(val_data))

1600
400


In [6]:
train_data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,11,156,159,163,164,165,167,169,170,170,...,199,197,197,195,194,192,188,186,183,181
1,11,163,163,164,164,165,165,164,164,164,...,48,49,42,39,49,26,33,34,42,94
2,0,172,175,177,181,183,185,187,187,188,...,19,19,27,32,19,16,16,18,27,32
3,0,154,155,158,159,160,163,165,165,166,...,188,187,187,187,187,186,186,185,185,184
4,23,162,163,165,168,168,169,168,170,170,...,230,230,230,230,229,229,228,228,227,227


In [7]:
transforms_for_train = A.Compose([
                ])

transforms_for_test = A.Compose([
                ])

In [8]:
class Sign(Dataset):
    def __init__(self, data = None, is_train = False, transforms=None):
        self.label_data = data['label']
        self.pic_data = data.drop(labels = ["label"],axis = 1)
        self.pic_data = self.pic_data.values.reshape(-1,1, 28,28)
        self.pic_data = self.pic_data/255
        self.pic_data = torch.from_numpy(self.pic_data).float()
        self.pic_data = self.pic_data.expand(-1,3,-1,-1)
        self.is_train = is_train
        self.transforms = transforms

    def __len__(self):
        return len(self.label_data)

    def __getitem__(self, item):
        
        labels = self.label_data[item]
        images = self.pic_data[item]
        images = images.numpy()

        if self.is_train:
            images = self.transforms(image = images)['image']
        images = torch.from_numpy(images)

        return images, labels

In [9]:
train_dataset = Sign(data=train_data, is_train=True, transforms=transforms_for_train)
val_dataset = Sign(data=val_data, is_train=False)
test_dataset = Sign(data=test_data, is_train=False)

In [10]:
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=80,
    shuffle=True,
    num_workers=0,  # 로컬에서 작동한다면 4, 8 로 바꿔서 멀티프로세싱해도 됩니다
    drop_last=True,
)

val_loader = DataLoader(
    dataset=val_dataset,
    batch_size=80,
    shuffle=True,
    num_workers=0,
    drop_last=True,
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=80,
    shuffle=True,
    num_workers=0,
    drop_last=True,
)

In [11]:
NUM_EPOCHS = 50
VAL_EVERY = 10
SAVE_DIR = './Save/'

In [12]:
def save_model(model, file_name='best_model_base.pt'):
    output_path = os.path.join(SAVE_DIR, file_name)
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)
    torch.save(model, output_path)

def validation(epoch, model, data_loader, criterion):
    print(f'Start validation #{epoch:2d}')
    model.eval()
    with torch.no_grad():
        all_preds = []
        all_labels = []
        for step, (images, labels) in tqdm(enumerate(data_loader), total=len(data_loader)):
            images, labels = images.cuda(), labels.cuda()
            model = model.cuda()

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        f1 = f1_score(all_labels, all_preds, average='micro')
        print(f'F1 Score: {f1}')
    return f1

def train(model, data_loader, criterion, optimizer):
    print(f'Start training..')
    best_acc = 0

    for epoch in range(NUM_EPOCHS):
        model.train()
        for step, (images, labels) in enumerate(tqdm(data_loader)):
            images, labels = images.cuda(), labels.cuda()
            model = model.cuda()

            outputs = model(images)

            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # step 주기에 따라 loss를 출력.
            if (step + 1) % 400 == 0:
                print(
                    f'Epoch [{epoch+1}/{NUM_EPOCHS}], '
                    f'Step [{step+1}/{len(train_loader)}], '
                    f'Loss: {round(loss.item(),4)}'
                )
        if (epoch + 1) % VAL_EVERY == 0:
            acc = validation(epoch + 1, model, val_loader, criterion)
            if best_acc < acc:
                print(f"Best performance at epoch: {epoch + 1}, {best_acc:.4f} -> {acc:.4f}")
                best_acc = acc
                save_model(model)

In [13]:
model = timm.create_model('resnet50', pretrained=True, num_classes=26)

criterion = nn.CrossEntropyLoss()

LR = 1e-3
optimizer = optim.SGD(params=model.parameters(), lr=LR)

train(model, train_loader, criterion, optimizer)

Start training..


100%|██████████| 20/20 [00:06<00:00,  3.04it/s]
100%|██████████| 20/20 [00:03<00:00,  5.57it/s]
100%|██████████| 20/20 [00:03<00:00,  5.66it/s]
100%|██████████| 20/20 [00:03<00:00,  5.48it/s]
100%|██████████| 20/20 [00:03<00:00,  5.65it/s]
100%|██████████| 20/20 [00:03<00:00,  5.68it/s]
100%|██████████| 20/20 [00:03<00:00,  5.68it/s]
100%|██████████| 20/20 [00:03<00:00,  5.78it/s]
100%|██████████| 20/20 [00:03<00:00,  5.79it/s]
100%|██████████| 20/20 [00:03<00:00,  5.78it/s]


Start validation #10


100%|██████████| 5/5 [00:00<00:00, 15.47it/s]


F1 Score: 0.06
Best performance at epoch: 10, 0.0000 -> 0.0600


100%|██████████| 20/20 [00:03<00:00,  6.04it/s]
100%|██████████| 20/20 [00:03<00:00,  5.93it/s]
100%|██████████| 20/20 [00:03<00:00,  5.78it/s]
100%|██████████| 20/20 [00:03<00:00,  5.79it/s]
100%|██████████| 20/20 [00:03<00:00,  5.71it/s]
100%|██████████| 20/20 [00:03<00:00,  5.96it/s]
100%|██████████| 20/20 [00:03<00:00,  5.95it/s]
100%|██████████| 20/20 [00:03<00:00,  5.97it/s]
100%|██████████| 20/20 [00:03<00:00,  6.01it/s]
100%|██████████| 20/20 [00:03<00:00,  5.85it/s]


Start validation #20


100%|██████████| 5/5 [00:00<00:00, 16.16it/s]


F1 Score: 0.07
Best performance at epoch: 20, 0.0600 -> 0.0700


100%|██████████| 20/20 [00:03<00:00,  5.82it/s]
100%|██████████| 20/20 [00:03<00:00,  5.84it/s]
100%|██████████| 20/20 [00:03<00:00,  5.70it/s]
100%|██████████| 20/20 [00:03<00:00,  5.67it/s]
100%|██████████| 20/20 [00:03<00:00,  5.25it/s]
100%|██████████| 20/20 [00:06<00:00,  3.04it/s]
100%|██████████| 20/20 [00:06<00:00,  2.92it/s]
100%|██████████| 20/20 [00:06<00:00,  2.94it/s]
100%|██████████| 20/20 [00:06<00:00,  2.92it/s]
100%|██████████| 20/20 [00:08<00:00,  2.42it/s]


Start validation #30


100%|██████████| 5/5 [00:00<00:00,  5.43it/s]


F1 Score: 0.12
Best performance at epoch: 30, 0.0700 -> 0.1200


100%|██████████| 20/20 [00:09<00:00,  2.13it/s]
100%|██████████| 20/20 [00:09<00:00,  2.04it/s]
100%|██████████| 20/20 [00:09<00:00,  2.08it/s]
100%|██████████| 20/20 [00:09<00:00,  2.01it/s]
100%|██████████| 20/20 [00:09<00:00,  2.14it/s]
100%|██████████| 20/20 [00:09<00:00,  2.08it/s]
100%|██████████| 20/20 [00:09<00:00,  2.18it/s]
100%|██████████| 20/20 [00:09<00:00,  2.03it/s]
100%|██████████| 20/20 [00:09<00:00,  2.11it/s]
100%|██████████| 20/20 [00:09<00:00,  2.03it/s]


Start validation #40


100%|██████████| 5/5 [00:00<00:00,  5.39it/s]


F1 Score: 0.1425
Best performance at epoch: 40, 0.1200 -> 0.1425


100%|██████████| 20/20 [00:10<00:00,  2.00it/s]
100%|██████████| 20/20 [00:09<00:00,  2.02it/s]
100%|██████████| 20/20 [00:10<00:00,  1.98it/s]
100%|██████████| 20/20 [00:10<00:00,  1.99it/s]
100%|██████████| 20/20 [00:09<00:00,  2.00it/s]
100%|██████████| 20/20 [00:09<00:00,  2.04it/s]
100%|██████████| 20/20 [00:10<00:00,  1.96it/s]
100%|██████████| 20/20 [00:09<00:00,  2.04it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]
100%|██████████| 20/20 [00:09<00:00,  2.06it/s]


Start validation #50


100%|██████████| 5/5 [00:00<00:00,  6.06it/s]


F1 Score: 0.1525
Best performance at epoch: 50, 0.1425 -> 0.1525


In [14]:
def test(model, test_loader, criterion):
    model_path = './Save/best_model_base.pt'
    model = torch.load(model_path)
    model.cuda()

    print(f'Start inference')
    model.eval()

    all_preds = []
    all_labels =[]

    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        count = sum(1 for pred, label in zip(preds, labels) if pred==label)

    acc = count/len(preds)
    f1 = f1_score(all_preds, all_labels, average='micro')
    return acc, f1


In [15]:
criterion = nn.CrossEntropyLoss()
acc, f1 = test(model, test_loader, criterion)

print(f'acc : {acc:.4f}')
print(f'f1 : {f1:.4f}')

Start inference


100%|██████████| 89/89 [00:15<00:00,  5.90it/s]


acc : 0.0750
f1 : 0.1022
