In [1]:
import os
import sys
import tarfile
import platform
import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import torchvision
from torchvision import transforms, utils,models
from torch.utils.data import Dataset, DataLoader

import re
from glob import glob
from PIL import Image
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
current_os = platform.system()
print(f"Current OS: {current_os}")
print(f"CUDA: {torch.cuda.is_available()}")
print(f"Python Version: {platform.python_version()}")
print(f"torch Version: {torch.__version__}")
print(f"torchvision Version: {torchvision.__version__}")

Current OS: Linux
CUDA: True
Python Version: 3.8.5
torch Version: 1.7.1
torchvision Version: 0.8.2


In [3]:
train_path = './input/data/train/'
path_img = os.path.join(train_path,'images')


In [4]:
class TrainDataset(Dataset):
    def __init__(self, train_path, is_train = True, transform=None):
        super().__init__()
        
        self.train_data = pd.read_csv(os.path.join(train_path,'train.csv'))
        self.img_dir = os.path.join(train_path,'images')
        self.transform = transform
        self.is_train = is_train
        self.img_paths = []
        self.target_class = []
        
        def age_cls(age):
            age=int(age)
            if age<30: return 0
            elif age <60 : return 1
            else : return 0
        
        for img_p in self.train_data.path:
            images_path = os.path.join(self.img_dir,img_p)
            images_path = [os.path.join(images_path,img_name) for img_name in os.listdir(images_path) if img_name[0] != '.' ]
            t_class = img_p.split('_')
            for image in images_path:
                self.img_paths.append(image)
                self.target_class.append((image.split('/')[-1].split('.')[0][:3],t_class[1],age_cls(t_class[3])))
                
        self.target_dict = {}
        tgt = [(mask, gender, age) for mask in ['mas', 'inc', 'nor'] for gender in ['male', 'female'] for age in [0, 1, 2]]
        
        for value,key in enumerate(tgt):
            self.target_dict[key]=value
        
    def __len__(self):
        return len(self.img_paths)
        
    def __getitem__(self,idx):
        im = Image.open(self.img_paths[idx])
        if self.transform != None:
            im = self.transform(im)
            
        img_output=im     
        
        if self.is_train:
            target_output = self.target_dict[self.target_class[idx]]
            
            return img_output, target_output
        
        else : 
            return img_output

        

In [5]:
tran = transforms.Compose([transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize((0.5),(0.5))
                       ])
dataset=TrainDataset(train_path, is_train = True, transform=tran)

In [None]:
# pip install ipywidgets

# jupyter nbextension enable --py widgetsnbextension

# Model Define

In [6]:
import torchvision.models as models

In [12]:
class imgDenseNet(nn.Module):
    def __init__(self):
        super(imgDenseNet, self).__init__()
        self.densenet = models.densenet161(pretrained=True)
        self.linear_layers = nn.Linear(1000, 18)
        
    def forward(self, x):
        x = self.densenet(x)
        return self.linear_layers(x)

imgDN = imgDenseNet().to(device)
for param in imgDN.parameters():
    param.requires_grad = False
    
for param in imgDN.linear_layers.parameters():
    param.requires_grad = True

True
True


In [10]:
list(imgDN.modules())

[imgDenseNet(
   (densenet): DenseNet(
     (features): Sequential(
       (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
       (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (relu0): ReLU(inplace=True)
       (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
       (denseblock1): _DenseBlock(
         (denselayer1): _DenseLayer(
           (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
           (relu1): ReLU(inplace=True)
           (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
           (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
           (relu2): ReLU(inplace=True)
           (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
         )
         (denselayer2): _DenseLayer(
           (norm1): BatchNorm2d(144,

In [14]:
loss = nn.CrossEntropyLoss()
optm = optim.Adam(imgDN.parameters(),lr=1e-4)
print("Done")

Done


In [15]:
def func_eval(model,data_iter,device):
    with torch.no_grad():
        n_total,n_correct = 0,0
        model.eval() # evaluate (affects DropOut and BN)
        for batch_in,batch_out in data_iter:
            y_trgt = batch_out.to(device)
            model_pred = model(batch_in.view(-1,3,224,224).to(device))
            _,y_pred = torch.max(model_pred.data,1)
            n_correct += (y_pred==y_trgt).sum().item()
            n_total += batch_in.size(0)
        val_accr = (n_correct/n_total)
        model.train() # back to train mode 
    return val_accr
print ("Done")

Done


In [None]:
EPOCHS,print_every = 10,1
train_iter = DataLoader(dataset, batch_size=32, shuffle = True, drop_last=True)

for epoch in range(EPOCHS):
    loss_val_sum = 0
    for batch_in,batch_out in train_iter:
        # Forward path
        y_pred = imgDN.forward(batch_in.view(-1,3,224,224).to(device))
        loss_out = loss(y_pred,batch_out.to(device))
        # Update
        optm.zero_grad()    # FILL IN HERE      # reset gradient 
        loss_out.backward()    # FILL IN HERE      # backpropagate
        optm.step()    # FILL IN HERE      # optimizer update
        loss_val_sum += loss_out
    loss_val_avg = loss_val_sum/len(train_iter)
    # Print
    if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
        train_accr = func_eval(imgDN,train_iter,device)
#         test_accr = func_eval(imgDN,test_iter,device)
        print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] "% #test_accr:[%.3f].  /  ,test_accr
               (epoch,loss_val_avg,train_accr))
print ("Done")

epoch:[0] loss:[1.242] train_accr:[0.731] 


In [36]:
test_path = './input/data/eval/'

In [37]:
class TestDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.img_paths = image_dir
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)
    


In [39]:
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_path, 'info.csv'))
image_dir = os.path.join(test_path, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
testset = TestDataset(image_paths, tran)

loader = DataLoader(
    testset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
# device = torch.device('cuda')

imgDN.eval()
device = torch.device("cuda:0")
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = imgDN(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_path, 'submission.csv'), index=False)
print('test inference is done!')

test inference is done!


In [40]:
model_path = '/opt/ml/weights/'
torch.save(imgDN.state_dict(), model_path + 'model.pt')

In [None]:
from pytz import timezone
import datetime as dt
# 제출할 파일을 저장합니다.
now = (dt.datetime.now().astimezone(timezone("Asia/Seoul")).strftime("%Y-%m-%d_%H%M%S"))
submission.to_csv(f"/opt/ml/pytorch-template/project-hun/output/sub/sub_{now}.csv", index=False)
 