In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

In [None]:
!mkdir 'dogs_train'
!unzip /content/drive/MyDrive/졸업논문/data/강아지/dogs_train.zip -d /content/dogs_train/

In [None]:
dataframe = pd.read_csv('./drive/MyDrive/졸업논문/data/labels.csv')
print('Training set: {}'.format(dataframe.shape[0]))

Training set: 6898


In [None]:
dataframe.breed.value_counts()

scottish_deerhound    126
maltese_dog           117
shih-tzu              112
great_pyrenees        111
pomeranian            111
                     ... 
german_shepherd        69
tibetan_mastiff        69
giant_schnauzer        69
brabancon_griffon      67
eskimo_dog             66
Name: breed, Length: 80, dtype: int64

In [None]:
dataframe['answer'] = LabelEncoder().fit_transform(dataframe.breed)
answer_data = dataframe[['answer','breed']].copy()
answer_data.drop_duplicates(inplace=True)
answer_data.set_index('answer',drop=True,inplace=True)
to_breed = answer_data.to_dict()['breed']

In [None]:
train_dir = './dogs_train'
dataframe.id = dataframe.id.apply(lambda x: x+'.jpg')
dataframe.id = dataframe.id.apply(lambda x:train_dir+'/'+x)
dataframe.pop('breed')

0                    boston_bull
1                          dingo
2                       pekinese
3                       bluetick
4                         borzoi
                  ...           
6893                      borzoi
6894              dandie_dinmont
6895                    airedale
6896          miniature_pinscher
6897    chesapeake_bay_retriever
Name: breed, Length: 6898, dtype: object

In [None]:
class setting(Dataset):
    def __init__(self,dataframe,transform):
        self.dataframe = dataframe
        self.transform = transform
        
    def __getitem__(self,index):
        x = Image.open(self.dataframe.iloc[index,0]).convert('RGB')
        if self.transform:
            x = self.transform(x)
        y = self.dataframe.iloc[index,1]
        return x,y
        
    def __len__(self):
        return self.dataframe.shape[0]

In [None]:
transformer = transforms.Compose([transforms.Resize(256),
                                  transforms.CenterCrop(224),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [None]:
def print_result(train_loss,val_loss,val_acc):
    print('train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}'.format(train_loss, val_loss, val_acc))

In [None]:
# 정확도를 높이는 방향으로 train 시키는 함수

def best_training(model, loss_function, optimizer, num_epochs):

    best_valid_loss = 1000
    best_valid_acc = 0

    for epoch in range(num_epochs):
        print('-'*20)
        print('best_training epoch {}/{}'.format(epoch+1, num_epochs))
        print('-'*20)
        
        # Training
        train_losses = []
        model.train()
        for x,y in train_loader:
            optimizer.zero_grad()
            x,y = x.to(device),y.to(device)
            y_hat = model(x)
            loss = loss_function(y_hat,y)
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            

            
        # Validation
        val_losses = []
        model.eval()
        with torch.no_grad():
          correct = 0
          total = 0
          for x,y in val_loader:
            x,y = x.to(device),y.to(device)
            y_hat = model(x)
            loss = loss_function(y_hat,y)
            val_losses.append(loss.item())
            _, predicted = torch.max(y_hat.data, 1)
            total += len(y)
            correct += (predicted == y).sum().item()

        # Save best_model
        if best_valid_loss > np.mean(val_losses):
          torch.save(model.state_dict(), './best_model.pth')
          best_valid_loss = np.mean(val_losses)
          best_valid_acc = correct/total
        
        # print the result of epoch
        print_result(np.mean(train_losses),np.mean(val_losses),correct/total)
        
    print('Finish Training.')
    return best_valid_acc

In [None]:
# 정확도를 낮추는 방향으로 train시키는 함수

def worst_training(model, loss_function, optimizer, num_epochs):

    best_valid_loss = 1000
    best_valid_acc = 0
    softmax = torch.nn.Softmax(dim=1)
    end = 0

    for epoch in range(num_epochs):
        print('-'*20)
        print('worst_trainig epoch {}/{}'.format(epoch+1, num_epochs))
        print('-'*20)
        
        # Training
        train_losses = []
        model.train()
        for x,y in train_loader:
            optimizer.zero_grad()
            x,y = x.to(device),y.to(device)
            y_hat = model(x)
            output = 1-softmax(y_hat)
            if torch.min(output)<1e-10:
              end = 1
              break
            else:
              output = torch.log(output)
              loss = loss_function(output,y)
              train_losses.append(loss.item())
              loss.backward()
              optimizer.step()
        
        if end == 1 :
          break
            

            
        # Validation
        val_losses = []
        model.eval()
        with torch.no_grad():
          correct = 0
          total = 0
          for x,y in val_loader:
            x,y = x.to(device),y.to(device)
            y_hat = model(x)
            output = torch.log(1-softmax(y_hat))
            loss = loss_function(output,y)
            val_losses.append(loss.item())
            _, predicted = torch.max(y_hat.data, 1)
            total += len(y)
            correct += (predicted == y).sum().item()

        # Save best_model
        if best_valid_loss > np.mean(val_losses):
          torch.save(model.state_dict(), './worst_model.pth')
          best_valid_loss = np.mean(val_losses)
          best_valid_acc = correct/total
        
        # print the result of epoch
        print_result(np.mean(train_losses),np.mean(val_losses),correct/total)
        
    print('Finish Training.')
    return best_valid_acc

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')

In [None]:
training_samples = dataframe.shape[0] 
test_size=0.05
batch_size = 64

sample_dataframe = dataframe.sample(training_samples)

x_train,x_val,_,_ = train_test_split(sample_dataframe,sample_dataframe,test_size=test_size)

train_set = setting(x_train, transform=transformer)
val_set = setting(x_val, transform=transformer)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set , batch_size=batch_size, shuffle=True)

Training set: 6553, Validation set: 345


In [None]:
class net(torch.nn.Module):
    def __init__(self, base_model, base_out_features, num_classes):
        super(net,self).__init__()
        self.base_model=base_model
        self.linear1 = torch.nn.Linear(base_out_features, 512)
        self.output = torch.nn.Linear(512,num_classes)
    def forward(self,x):
        x = F.relu(self.base_model(x))
        x = F.relu(self.linear1(x))
        x = self.output(x)
        return x

res = torchvision.models.resnet50(pretrained=True)
for param in res.parameters():
    param.requires_grad=False
# model_best는 best_traing의 결과를 저장한 모델
model_best = net(base_model=res, base_out_features=res.fc.out_features, num_classes=80)
model_best = model_best.to(device)

# model worst는 worst_traing의 결과를 저장한 모델
model_worst = net(base_model=res, base_out_features=res.fc.out_features, num_classes=80)
model_worst = model_worst.to(device)

In [None]:
CE_loss = torch.nn.CrossEntropyLoss()
nll_loss = torch.nn.NLLLoss()
optimizer1 = torch.optim.Adam([param for param in model_best.parameters() if param.requires_grad], lr=0.0003)
optimizer2 = torch.optim.Adam([param for param in model_worst.parameters() if param.requires_grad], lr=0.0003)

EPOCHS = 30

In [None]:
val_acc_best = best_training(model=model_best, loss_function=CE_loss, optimizer=optimizer1, num_epochs=EPOCHS)
print(val_acc_best)
!mv './best_model.pth' './drive/MyDrive/졸업논문/data/model/'

--------------------
best_training epoch 1/30
--------------------
train_loss: 1.9398, val_loss: 0.7930, val_acc: 0.7942
--------------------
best_training epoch 2/30
--------------------
train_loss: 0.5732, val_loss: 0.6499, val_acc: 0.8087
--------------------
best_training epoch 3/30
--------------------
train_loss: 0.4083, val_loss: 0.5261, val_acc: 0.8290
--------------------
best_training epoch 4/30
--------------------
train_loss: 0.3293, val_loss: 0.5187, val_acc: 0.8348
--------------------
best_training epoch 5/30
--------------------
train_loss: 0.2696, val_loss: 0.5060, val_acc: 0.8290
--------------------
best_training epoch 6/30
--------------------
train_loss: 0.2360, val_loss: 0.5040, val_acc: 0.8435
--------------------
best_training epoch 7/30
--------------------
train_loss: 0.1905, val_loss: 0.4842, val_acc: 0.8377
--------------------
best_training epoch 8/30
--------------------
train_loss: 0.1697, val_loss: 0.5187, val_acc: 0.8174
--------------------
best_traini

In [None]:
val_acc_worst = worst_training(model=model_worst, loss_function=nll_loss, optimizer=optimizer2, num_epochs=EPOCHS)
print(val_acc_worst)
!mv './worst_model.pth' './drive/MyDrive/졸업논문/data/model/'

--------------------
worst_trainig epoch 1/30
--------------------
train_loss: 0.0019, val_loss: 0.0000, val_acc: 0.0000
--------------------
worst_trainig epoch 2/30
--------------------
Finish Training.
0.0
