In [1]:
# !pip install --upgrade torch

from tqdm import tqdm, tqdm_notebook
import math
import random
import pickle
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import time
import os
from glob import glob
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm, tqdm_notebook

import torch.nn as nn
from PIL import Image
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

print("PyTorch Version: ", torch.__version__)
print("Torchvision Version: ", torchvision.__version__)

train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

PyTorch Version:  1.9.1
Torchvision Version:  0.10.1
CUDA is available!  Training on GPU ...


In [2]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
DATA_MODES = ['train', 'val', 'test']
RESCALE_SIZE = 256
DEVICE = torch.device("cuda")

In [22]:
class SimpsonsDataset(Dataset):
  def __init__(self, files, mode, augmentations = None):
    super().__init__()
    self.files = files
    self.mode = mode
    self.augmentations = augmentations

    if self.mode not in DATA_MODES:
      print(f'wrong mode: {self.mode}')
      raise NameError

    self.len_ = len(self.files)
    self.label_encoder = LabelEncoder()

    if self.mode != 'test':
      self.labels = [path.parent.name for path in self.files]
      self.label_encoder.fit(self.labels)

      with open('label_encoder.pkl', 'wb') as le_dump:
        pickle.dump(self.label_encoder, le_dump)

  def __len__(self):
    return self.len_

  def load_sample(self, file):
    image = Image.open(file)
    image.load()
    return image

  def __getitem__(self, index):
    transform = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),  
      transforms.ToTensor(),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])                                
    ])

    x = self.load_sample(self.files[index])
#     x = self._prepare_sample(x)
#     x = np.array(x / 255, dtype='float32')

    x = transform(x)
  
    if self.mode == 'test':
      return x
    else:
    
        
      label = self.labels[index]
      label_id = self.label_encoder.transform([label])
      y = label_id.item()
      return x, y

#   def _prepare_sample(self, image):
#     image = image.resize((RESCALE_SIZE, RESCALE_SIZE))
#     return np.array(image)

In [24]:
class CustomImageDataset(Dataset):
    def __init__(self, files, mode, augmentations = None):
        super().__init__()
        self.files = files
        self.mode = mode
        self.augmentations = augmentations
        self.len_ = len(self.files)
        self.label_encoder = LabelEncoder()
        self.labels = [path.parent.name for path in self.files]
        self.label_encoder.fit(self.labels)


    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label


In [25]:
train_dir = Path('/kaggle/input/the-simpsons-characters-dataset/simpsons_dataset/')
test_dir = Path('/kaggle/input/the-simpsons-characters-dataset/kaggle_simpson_testset/')
files_training = glob(os.path.join(train_dir,'simpsons_dataset', '*/*.jpg'))
num_images = len(files_training)
print('Number of images in Training file:', num_images)

Number of images in Training file: 20933


In [26]:
min_images = 1000
im_cnt = []
class_names = []
print('{:18s}'.format('class'), end='')
print('Count:')
print('-' * 24)
for folder in os.listdir(os.path.join(train_dir, 'simpsons_dataset')):
    folder_num = len(os.listdir(os.path.join(train_dir,'simpsons_dataset',folder)))
    im_cnt.append(folder_num)
    class_names.append(folder)
    print('{:20s}'.format(folder), end=' ')
    print(folder_num)
    if (folder_num < min_images):
        min_images = folder_num
        folder_name = folder
        
num_classes = len(class_names)
print("\nMinumum imgages per category:", min_images, 'Category:', folder)    
print('Average number of Images per Category: {:.0f}'.format(np.array(im_cnt).mean()))
print('Total number of classes: {}'.format(num_classes))

class             Count:
------------------------
rainier_wolfcastle   45
maggie_simpson       128
krusty_the_clown     1206
waylon_smithers      181
professor_john_frink 65
fat_tony             27
ralph_wiggum         89
otto_mann            32
martin_prince        71
barney_gumble        106
ned_flanders         1454
patty_bouvier        72
sideshow_mel         40
marge_simpson        1291
abraham_grampa_simpson 913
selma_bouvier        103
mayor_quimby         246
disco_stu            8
lionel_hutz          3
troy_mcclure         8
agnes_skinner        42
groundskeeper_willie 121
kent_brockman        498
charles_montgomery_burns 1193
carl_carlson         98
chief_wiggum         986
apu_nahasapeemapetilon 623
bart_simpson         1342
edna_krabappel       457
gil                  27
comic_book_guy       469
principal_skinner    1194
cletus_spuckler      47
milhouse_van_houten  1079
snake_jailbird       55
nelson_muntz         358
lisa_simpson         1354
lenny_leonard        310
moe

In [27]:
train_val_files = sorted(list(train_dir.rglob('*.jpg')))
test_files = sorted(list(test_dir.rglob('*.jpg')))

In [28]:


from sklearn.model_selection import train_test_split

train_val_labels = [path.parent.name for path in train_val_files]
train_files, val_files = train_test_split(train_val_files, test_size=0.3, \
                                          stratify=train_val_labels)
val_labels = [path.parent.name for path in val_files]
val_files, oos_files = train_test_split(val_files, test_size=0.4, stratify=val_labels)



In [29]:


val_dataset = SimpsonsDataset(val_files, mode='val')
train_dataset = SimpsonsDataset(train_files, mode='train')



In [30]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataset = SimpsonsDataset(test_files, mode="test")
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=64)

In [12]:
# class Classifier(torch.nn.Module):
#     def __init__(self, num_classes):
#         super(Classifier, self).__init__()
#         self.encoder = torchvision.models.resnet18(pretrained=True)
#         self.linear_classifier = torch.nn.Linear(self.encoder.fc.out_features, num_classes)
        
#     def forward(self, sample):
#         final_feature_map = self.encoder(sample)
#         logits = self.linear_classifier(final_feature_map) # argmax(logits) = pred_class
#         return logits

In [13]:
import torch.nn.functional as F


class Classifier(torch.nn.Module):

    def __init__(self, num_classes: int):
        super().__init__()
        self.encoder = torchvision.models.mobilenet_v2(pretrained=False).features
        self.classifier = torch.nn.Linear(
            in_features=1280,  # For mobilenet_v2.
            out_features=num_classes,
        )

    def forward(self, x):
        feature_map = self.encoder(x)
        pooled_features = F.adaptive_avg_pool2d(feature_map, (1, 1))
        flattened_features = torch.flatten(pooled_features, 1)
        logits = self.classifier(flattened_features)
        return logits


classifier = Classifier(num_classes=10)
print(classifier(torch.ones((1, 3, 224, 224), dtype=torch.float32)))  # Test output of your neural network.

tensor([[-0.3362, -0.1077, -0.0213,  0.4108, -0.0395, -0.3927, -0.1759,  0.2118,
          0.0911, -0.3335]], grad_fn=<AddmmBackward>)


In [14]:
torchvision.models.resnet18().fc.out_features

1000

In [None]:
torchvision.models.mobilenet_v2().features

In [38]:
def fit_epoch(model, train_loader, criterion, optimizer):
    running_loss = 0.0
    running_corrects = 0
    processed_data = 0
  
    for inputs, labels in tqdm(train_loader):
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        preds = torch.argmax(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        processed_data += inputs.size(0)
              
    train_loss = running_loss / processed_data
    train_acc = running_corrects.cpu().numpy() / processed_data
    return train_loss, train_acc
  
def eval_epoch(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    processed_size = 0

    for inputs, labels in tqdm(val_loader):
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = torch.argmax(outputs, 1)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        processed_size += inputs.size(0)
    val_loss = running_loss / processed_size
    val_acc = running_corrects.double() / processed_size
    return val_loss, val_acc
  
def train(train_files, val_files, model, epochs, batch_size):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    print('Train data loader \n')
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    print('val data loader \n')

    history = []
    log_template = "\nEpoch {ep:03d} train_loss: {t_loss:0.4f} \
    val_loss {v_loss:0.4f} train_acc {t_acc:0.4f} val_acc {v_acc:0.4f}"

    #with tqdm(desc="epoch", total=epochs) as pbar_outer:
        #Здесь можно добавить схему изменения learning rate
        
    opt = torch.optim.Adam(model.parameters(), lr = 1e-4)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        train_loss, train_acc = fit_epoch(model, train_loader, criterion, opt)
#             print("loss", train_loss)

        val_loss, val_acc = eval_epoch(model, val_loader, criterion)
        history.append((train_loss, train_acc, val_loss, val_acc))

        # pbar_outer.update(1)
        # tqdm.write(log_template.format(ep=epoch+1, t_loss=train_loss,\
        #                               v_loss=val_loss, t_acc=train_acc, v_acc=val_acc))
        print("train loss: " + str(train_loss) + " train acc: " + str(train_acc) + " val loss: " + str(val_loss) + " val acc: " + str(val_acc.item()))
            
    return history

In [39]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []
    
        for inputs in test_loader:
            inputs = inputs.to(DEVICE)
            model.eval()
            outputs = model(inputs).cpu()
            logits.append(outputs)
            
    probs = nn.functional.softmax(torch.cat(logits), dim=-1).numpy()
    return probs

In [40]:
n_classes = len(np.unique(train_val_labels))
model = Classifier(n_classes).to(DEVICE)

In [41]:
np.unique(train_val_labels)

array(['abraham_grampa_simpson', 'agnes_skinner',
       'apu_nahasapeemapetilon', 'barney_gumble', 'bart_simpson',
       'carl_carlson', 'charles_montgomery_burns', 'chief_wiggum',
       'cletus_spuckler', 'comic_book_guy', 'disco_stu', 'edna_krabappel',
       'fat_tony', 'gil', 'groundskeeper_willie', 'homer_simpson',
       'kent_brockman', 'krusty_the_clown', 'lenny_leonard',
       'lionel_hutz', 'lisa_simpson', 'maggie_simpson', 'marge_simpson',
       'martin_prince', 'mayor_quimby', 'milhouse_van_houten',
       'miss_hoover', 'moe_szyslak', 'ned_flanders', 'nelson_muntz',
       'otto_mann', 'patty_bouvier', 'principal_skinner',
       'professor_john_frink', 'rainier_wolfcastle', 'ralph_wiggum',
       'selma_bouvier', 'sideshow_bob', 'sideshow_mel', 'snake_jailbird',
       'troy_mcclure', 'waylon_smithers'], dtype='<U24')

In [None]:

history = train(train_dataset, val_dataset, model=model, epochs=15, batch_size=32)

In [None]:
torch.save(model.state_dict(), "saved_model.pth")

In [None]:
if("saved_model.pth" in os.listdir(".")):
    model.load_state_dict(torch.load("saved_model.pth"))

In [None]:
history = train(train_dataset, val_dataset, model=model, epochs=5, batch_size=32)

In [42]:
class Classifier2(torch.nn.Module):

    def __init__(self, num_classes: int):
        super().__init__()
        self.encoder = torchvision.models.mobilenet_v2(pretrained=True).features
        self.classifier = torch.nn.Linear(
            in_features=1280,  # For mobilenet_v2.
            out_features=num_classes,
        )

    def forward(self, x):
        feature_map = self.encoder(x)
        pooled_features = F.adaptive_avg_pool2d(feature_map, (1, 1))
        flattened_features = torch.flatten(pooled_features, 1)
        logits = self.classifier(flattened_features)
        return logits

In [43]:
model2 = Classifier2(n_classes).to(DEVICE)

In [44]:
history2 = train(train_dataset, val_dataset, model=model2, epochs=5, batch_size=32)

Train data loader 

val data loader 



100%|██████████| 916/916 [04:16<00:00,  3.57it/s]
100%|██████████| 236/236 [00:54<00:00,  4.35it/s]


train loss: 0.5483097536795084 train acc: 0.8801610591687709 val loss: 0.12986927495719647 val acc: 0.9704087048832272


100%|██████████| 916/916 [04:15<00:00,  3.58it/s]
100%|██████████| 236/236 [00:50<00:00,  4.64it/s]


train loss: 0.15973138793282698 train acc: 0.95929161263905 val loss: 0.1330784114914098 val acc: 0.9633757961783439


100%|██████████| 916/916 [04:17<00:00,  3.56it/s]
100%|██████████| 236/236 [00:50<00:00,  4.66it/s]


train loss: 0.06633412364960817 train acc: 0.9818467208080257 val loss: 0.07816050464552857 val acc: 0.980228237791932


100%|██████████| 916/916 [04:16<00:00,  3.57it/s]
100%|██████████| 236/236 [00:51<00:00,  4.62it/s]


train loss: 0.039036425001917556 train acc: 0.9887053845628881 val loss: 0.07861121501669786 val acc: 0.9792993630573248


100%|██████████| 916/916 [04:14<00:00,  3.60it/s]
100%|██████████| 236/236 [00:50<00:00,  4.65it/s]

train loss: 0.03148371529326985 train acc: 0.9910598512250052 val loss: 0.1716242297003817 val acc: 0.9554140127388535





In [45]:
torch.save(model.state_dict(), "saved_model_pretrained.pth")