In [None]:
# data manipulation
import pandas as pd
import numpy as np

import random
import os

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

from maskdataset import MaskDataset
from labeling import MakeLabel



In [None]:
path = {"train_label" : '/opt/ml/input/data/train/train_with_labels_fix_wrong_data.csv',
        "train_vanilla" : '/opt/ml/input/data/train/train.csv'}

wrong_data = {"gender" :  ['006359', '006360', '006361', '006362', '006363', '006364', '001498-1', '004432'],
              "mask" : (['000020', '004418', '005227'], 'incollect_mask', 'normal')}

train_label_fixed = MakeLabel(path=path, wrong_data=wrong_data)
train_label_fixed.labeling()


In [None]:
# Set random seed
SEED = 2021
random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)  # type: ignore
torch.backends.cudnn.deterministic = True  # type: ignore
torch.backends.cudnn.benchmark = True  # type: ignore

In [None]:
model=torchvision.models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad=False

print("네트워크 입력 채널 개수: ",model.conv1.weight.shape[1])
print("네트워크 입력 차원 개수: ",model.fc.weight.shape[0])
print(''.join(["*"*15,"변경중","*"*30]))

import math
import torch.nn as nn
# model.fc = torch.nn.Linear(in_features=model.fc.in_features, out_features=18, bias=True)
# model.fc = torch.nn.Sequential(
#     nn.Linear(in_features=model.fc.in_features, out_features=18, bias=True)
# )
model.fc = nn.Sequential(
                      nn.Linear(model.fc.in_features, 256), 
                      nn.ReLU(), 
                      nn.Dropout(0.4),
                      nn.Linear(256, 18))
torch.nn.init.xavier_uniform_(model.fc[0].weight)

stdv = 1. / math.sqrt(model.fc[0].weight.size(1))
model.fc[0].bias.data.uniform_(-stdv, stdv)

torch.nn.init.xavier_uniform_(model.fc[3].weight)
stdv = 1. / math.sqrt(model.fc[3].weight.size(1))
model.fc[3].bias.data.uniform_(-stdv, stdv)



print("네트워크 입력 채널 개수", model.conv1.weight.shape[1])
print("네트워크 출력 차원 개수", model.fc[3].weight.shape[0])

# model

In [None]:
from maskdataset import MaskDataset
from torchvision import transforms
from PIL import Image
transform=transforms.Compose([
            # transforms.Resize((300,384), Image.BILINEAR),
            transforms.ToTensor(),
            # transforms.CenterCrop((200,200)),
            # transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
            transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.2,0.2,0.2))
            ])

train_dataset = MaskDataset(transform=transform,train=True)


batch_size =25
dataloader_train = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.to(device) #regnet을 GPU에 load

from collections import Counter
class_weights = [v for _, v in sorted(Counter(train_dataset.y).items())]
class_weights = list(map(lambda x : max(class_weights)/x,class_weights))
class_weights = torch.FloatTensor(class_weights).to(device)
print(class_weights)
lr = 0.0001
num_epoch = 5
loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import f1_score

### 학습 시작
best_test_f1_score = 0
best_test_loss = 9999
model.train()

for epoch in range(num_epoch):
    tmp_loss = 0
    tmp_f1score = 0
    tmp_acc = 0

    for idx, (images,labels) in enumerate(tqdm(dataloader_train)):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        logits = model(images)
        _, preds = torch.max(logits,dim=-1)
        loss = loss_fn(logits,labels)
        loss.backward()
        optimizer.step()
        f1score = f1_score(labels.cpu(),preds.cpu(),average="macro")
        tmp_acc += sum(labels.cpu()==preds.cpu())
        tmp_loss += loss.item() * images.size(0)
        tmp_f1score += f1score # 각 batch별로 f1 score 계산

    epoch_acc = tmp_acc / len(dataloader_train.dataset)
    epoch_f1score = tmp_f1score/(idx+1) 
    epoch_loss = tmp_loss / len(dataloader_train.dataset)
    print(f"현재 epoch-{epoch+1} -데이터 셋에서 평균 Loss : {epoch_loss:.5f}, 평균 f1-score : {epoch_f1score:.3f}, 평균 acc : {epoch_acc:.3f}")
    if best_test_f1_score < epoch_f1score:
        best_test_f1_score = epoch_f1score
        best_test_loss = epoch_loss
        best_test_acc = epoch_acc
        print("best f1 score updated")
print("학습 종료!")
print(f"최고 f1_score : {best_test_f1_score} 일 떄 loss : {best_test_loss}, acc : {best_test_acc}")

In [None]:
from maskdataset import MaskDataset
validation_dataset = MaskDataset(transform=transform, train=False)
dataloader_validation = DataLoader(dataset=validation_dataset,
                                   shuffle=False)

submission = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
# # 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(dataloader_validation):
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())

submission = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
submission['ans'] = all_predictions

from pytz import timezone
import datetime as dt
# 제출할 파일을 저장합니다.
now = (dt.datetime.now().astimezone(timezone("Asia/Seoul")).strftime("%Y-%m-%d_%H%M%S"))
submission.to_csv(f"/opt/ml/input/data/eval/submission_{now}.csv", index=False)

print('test inference is done!')