In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import Compose, ToTensor,  Resize, Normalize
from torch.utils.data import DataLoader, Dataset
import torchvision.models as models

from PIL import Image
from sklearn.metrics import accuracy_score, f1_score

import pandas as pd
from tqdm import tqdm

## Using ResNet for Image Classification

### Parameter
- ResNet50: 25.6M
- ResNet101: 44.5M

In [2]:
DIRECTROY = 'data'
MODEL_PATH = 'models'
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 100
LR = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
df_train = pd.read_csv(f'{DIRECTROY}/reduced_train.csv') 
df_test = pd.read_csv(f'{DIRECTROY}/reduced_test.csv') 
num_classes = len(df_train['class'].unique())
classes = df_train['class'].unique().tolist()

In [4]:
image_transforms = Compose([
    Resize((IMG_SIZE, IMG_SIZE)),
    ToTensor(), 
    Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
])

In [5]:
class CustomDataset(Dataset):
    def __init__(self, df, transforms, directory):
        self.tokenizer =  None
        self.df = df
        self.transforms = transforms
        self.directory = directory
        self.labels = torch.Tensor(df['newid'].values).long()
        self.imgs = torch.cat([ self.transforms(self.resize_img(Image.open(f'{DIRECTROY}/{self.directory}/{x}')).convert('RGB')).half().reshape(1,3,IMG_SIZE,IMG_SIZE) for x in tqdm(df['name'].values)])
        self.tokenized = self.tokenizer(df['label'].tolist(), padding=True, truncation=True, return_tensors="pt")
        self.input_ids = self.tokenized['input_ids']
        self.attention_mask = self.tokenized['attention_mask']
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img = self.imgs[idx]
        label = self.labels[idx]
        input_ids = self.input_ids[idx]
        attention_mask = self.attention_mask[idx]
        return img, label, input_ids, attention_mask

### Load train and test dataset from drive

In [6]:
train_dataset = torch.load(f'{DIRECTROY}/train_dataset/train_dataset_reduced_all.pth')
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = torch.load(f'{DIRECTROY}/test_public_dataset/test_public_reduced_dataset_0.pth')
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

### Train

In [8]:
criterion = nn.CrossEntropyLoss()

model = models.resnet101(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)


optimizer = optim.Adam(model.parameters(), lr=LR)
scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=EPOCHS)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to C:\Users\Admin/.cache\torch\hub\checkpoints\resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:08<00:00, 20.2MB/s] 


In [9]:
max_accuracy = 0.0

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    
    # Training loop
    print('Training epoch:', epoch+1)
    len_train = 0
    
    for inputs, labels, input_ids, attention_mask in tqdm(train_dataloader):
            optimizer.zero_grad()
            inputs = inputs.to(device).type(torch.cuda.FloatTensor)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
    len_train += len(train_dataset)
        
    scheduler.step()    
    train_loss/=len_train
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss}')
    
    eval_loss = 0.0
    model.eval()
    
    true_labels = []
    pred_labels = []
    
    print('Evaluating epoch:', epoch+1)
    with torch.no_grad():
        len_test = 0
        for inputs, labels, input_ids, attention_mask in tqdm(test_dataloader):
            inputs = inputs.to(device).type(torch.cuda.FloatTensor)
            labels = labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels).to(device)
            eval_loss += loss.item()
            
            outputs = torch.argmax(outputs, 1).flatten().cpu().numpy()
            labels = labels.flatten().cpu().numpy()
            
            true_labels.extend(labels)
            pred_labels.extend(outputs)
        
        len_test += len(test_dataset)
        
        
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {eval_loss/len_test}')
    print(f'Accuracy: {accuracy_score(true_labels, pred_labels)}')
    print(f'F1 Score Weighted: {f1_score(true_labels, pred_labels, average="weighted")}')
    print(f'F1 Score Macro: {f1_score(true_labels, pred_labels, average="macro")}')
    if accuracy_score(true_labels, pred_labels) > max_accuracy:
        max_accuracy = accuracy_score(true_labels, pred_labels)
        torch.save(model.state_dict(), f'{MODEL_PATH}/resnet101_aug_model_{epoch+1}.pth')
        torch.save(optimizer.state_dict(), f'{MODEL_PATH}/optimizer/resnet101_aug_optimizer_{epoch+1}.pth')
            

 10%|▉         | 44/457 [00:10<01:40,  4.12it/s]