In [81]:
import os
import random
import time

import PIL
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn
from torch import optim
from torch.utils.data import Subset

from torchvision import transforms

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from efficientnet_pytorch import EfficientNet

from torch.optim.lr_scheduler import ReduceLROnPlateau
import argparse


In [70]:
class TrainDataset(Dataset):
    def __init__(self, data_frame: pd.DataFrame, root_dir: str, transform=None):
        self.data_frame = data_frame
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        sample = dict()
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = str(self.data_frame.iloc[idx]['id'])
        img_path = os.path.join(self.root_dir, img_name+'.jpg')
        image = PIL.Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        tag_name = self.data_frame.iloc[idx]['class']
        
        sample['image'] = image
        sample['class'] = tag_name
        
        return sample


In [8]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
                [0.485, 0.456, 0.406], 
                [0.229, 0.224, 0.225])

])

In [87]:
def train(model, train_loader, optimizer, criterion, epoch ):
    
    model.train()
    total_loss = 0.0
    
    for i, data in enumerate(train_loader):
        x = data['image']
        emotion = data['class']
        x = x.to(device)
        emotion = emotion.to(device)

        optimizer.zero_grad() 

        out = model(x)

        
        loss = criterion(out, emotion)

        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() / len(train_loader)
        running_loss += loss.item()
        total_loss += loss.item()
        correct += torch.sum(pred == xlabel).item()
        num_data += xlabel.size(0)
        
        train_loss += loss.item() / len(train_loader)
        print(f'{epoch} | {train_loss}')
    del x, emotion
    torch.cuda.empty_cache()
    return train_loss

def validation(model, criterion, valid_loader):
    
    model.eval()
    val_loss = 0.0
    emotion = []
    prediction = []
    
    with torch.no_grad():
        for i, data in enumerate(valid_loader):
            x = data['image']
            emotion = data['class']
            x = x.to(device)
            emotion = emotion.to(device)
            out = model(x)

            
            loss = criterion(out, xlabel)
            
            pred = torch.argmax(out,dim=-1)
            
            val_loss += loss.item() / len(valid_loader)
            
            emotion = emotion + emotion.tolist()
            prediction = prediction + pred.detach().cpu().tolist()
        del x, emotion

    torch.cuda.empty_cache()
    val_score = f1_score(emotion, prediction, average='micro') 
    
    return val_loss, val_score

In [88]:
def train_model(args,num_epochs=60, cv_checkpoint=True, fine_tune=False,
                weight_file_name='weight_best.pt', **train_kwargs):
    # choose scheduler
    lr = args.learning_rate
    optimizer = optim.AdamW(model.parameters(),lr = 0.001,weight_decay = 1e-5)   
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.1)
    
    train_result = {}
    train_result['weight_file_name'] = weight_file_name
    best_epoch = -1
    best_score = 0.
    lrs = []
    score = []

    for epoch in range(num_epochs):
        
        start_time = time.time()

        train_loss = train(model, train_loader, optimizer,criterion,epoch)
        val_loss, val_score = validation(model, criterion, valid_loader)
        score.append(val_score)

        if cv_checkpoint:
            if val_score > best_score:
                best_score = val_score
                train_result['best_epoch'] = epoch + 1
                train_result['best_score'] = round(best_score, 5)
                if args.multi_parallel:
                    torch.save(model.module.state_dict(),weight_file_name)
                else:
                    torch.save(model.state_dict(), weight_file_name)
                print("Score is higher than last model .....Saving Model.....")
        else:
            if val_loss < best_loss:
                best_loss = val_loss
                train_result['best_epoch'] = epoch + 1
                train_result['best_loss'] = round(best_loss, 5)
                if args.multi_parallel:
                    torch.save(model.module.state_dict(),weight_file_name)
                else:
                    torch.save(model.state_dict(), weight_file_name)
        
        elapsed = time.time() - start_time
        now = time.localtime()
        lr = [_['lr'] for _ in optimizer.param_groups]
        print("Epoch {} - train_loss: {:.4f}  val_loss: {:.4f}  cv_score: {:.4f}  lr: {:.6f}  time: {:.0f}s nowtime: {},{}:{}.{}".format(
                epoch+1, train_loss, val_loss, val_score, lr[0], elapsed,now.tm_mday,now.tm_hour,now.tm_min,now.tm_sec))
      
        for param_group in optimizer.param_groups:
            lrs.append(param_group['lr'])
        
        # scheduler update
        if cv_checkpoint:
            scheduler.step(val_score)
        else:
            scheduler.step(val_loss)
       
     
    return train_result, lrs, score

In [48]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [96]:
parser = argparse.ArgumentParser()
args = parser.parse_args("")

df = pd.read_csv(f'../data/assignment/write.csv')
data_dir = '../data/assignment/preprocessed_train/'

SEED = 42
seed_everything(SEED)
args.batch_size = 128
args.learning_rate = 0.01
args.multi_parallel = True
args.num_epochs = 50


In [97]:

result_arr = []

batch_size = args.batch_size
#batch_size = 128*2
traindataset = TrainDataset(df,root_dir = data_dir,transform = train_transform)
train_set, val_set = torch.utils.data.random_split(traindataset, [int(len(traindataset)*0.8), len(traindataset)-int(len(traindataset)*0.8)])


train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

torch.cuda.empty_cache()

model = EfficientNet.from_pretrained('efficientnet-b3', num_classes=5)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

criterion = nn.CrossEntropyLoss(reduction='mean')
criterion = criterion.to(device)

train_kwargs = dict(
    train_loader=train_loader,
    valid_loader=valid_loader,
    model=model,
    criterion=criterion,
)

num_epochs = args.num_epochs
result, lrs, score = train_model(args,num_epochs=num_epochs, cv_checkpoint=True, fine_tune=False, **train_kwargs)
result_arr.append(result)
print(result)

# learning rate plot
plt.figure(figsize=(18,4))
plt.subplot(1,2,1)
plt.plot(lrs, 'b')
plt.xlabel('Epochs', fontsize=12, fontweight='bold')
plt.ylabel('Learning rate', fontsize=14, fontweight='bold')
plt.title(f'Fold {fold} Learning rate schedule', fontsize=15, fontweight='bold')

x = [x for x in range(0, num_epochs, 10)]
y = [0.01, 0.005, 0.000001]
ylabel = ['1e-2', '1e-4', '1e-6']
plt.xticks(x)
plt.yticks(y, ylabel)

plt.subplot(1,2,2)
plt.plot(score, 'r')
plt.xlabel('Epochs', fontsize=12, fontweight='bold')
plt.ylabel('Valid score', fontsize=14, fontweight='bold')
plt.title(f'Fold {fold} F1 Score', fontsize=15, fontweight='bold')

x = [x for x in range(0, num_epochs, 10)]

plt.show()
time.sleep(5)
plt.close()
print(f'{fold} fold finish')

print("train finish")

Loaded pretrained weights for efficientnet-b3


RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 31.72 GiB total capacity; 26.15 GiB already allocated; 26.62 MiB free; 26.22 GiB reserved in total by PyTorch)