In [1]:
import os
import pandas as pd
import torch
import torchvision
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
from torch.autograd import Variable
import os
from sklearn.model_selection import train_test_split
from PIL import Image
from tempfile import TemporaryDirectory
from collections import Counter
from skimage import io

import warnings
warnings.filterwarnings('ignore')



# В этом ноутбуке будет приведен пример обучения модели без аугментации и преобразований данных. 

In [2]:
os.listdir('/kaggle/input/dataset/train')

['neutral', 'sad', 'surprised', 'happy']

# Буден создат датасет с сбалансированными данными (~3000 на каждый класс). Пришлось избавиться от части данных, чтобы выровнить их. 
# В дальнейшем будем пытаться решить эту проблему агументацией

In [3]:
df = pd.DataFrame(columns=['image', 'label'])

for i, label in enumerate(os.listdir('/kaggle/input/dataset/train')):
    for img in os.listdir(f'/kaggle/input/dataset/train/{label}'):
        df.loc[len(df)] = [f'/kaggle/input/dataset/train/{label}/{img}', i]

In [4]:
df = df.sample(frac=1)

In [5]:
df['label'].value_counts()

label
3    7215
0    4965
1    4830
2    3171
Name: count, dtype: int64

In [6]:
rows_to_drop = df[df['label'] == 3].index[:4000]
df = df.drop(rows_to_drop)

rows_to_drop = df[df['label'] == 1].index[:1700]
df = df.drop(rows_to_drop)

rows_to_drop = df[df['label'] == 0].index[:1800]
df = df.drop(rows_to_drop)

# Размер данных по классам

In [7]:
df['label'].value_counts()

label
3    3215
2    3171
0    3165
1    3130
Name: count, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(df['image'], df.label, test_size=0.33, random_state=42, stratify=df.label)

In [9]:
pd.concat([X_train, y_train], axis=1)

Unnamed: 0,image,label
14391,/kaggle/input/dataset/train/happy/im3683.png,3
1044,/kaggle/input/dataset/train/neutral/im1667.png,0
11979,/kaggle/input/dataset/train/surprised/im1230.png,2
14969,/kaggle/input/dataset/train/happy/im699.png,3
13796,/kaggle/input/dataset/train/happy/im1898.png,3
...,...,...
4432,/kaggle/input/dataset/train/neutral/im2691.png,0
19212,/kaggle/input/dataset/train/happy/im6689.png,3
16923,/kaggle/input/dataset/train/happy/im2255.png,3
7388,/kaggle/input/dataset/train/sad/im2181.png,1


In [20]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        #row = self.dataframe.iloc[index]
        image=io.imread(self.dataframe.iloc[index]['image'])
        
        rgb_image = np.zeros((48, 48, 3), dtype=np.uint8)

        # Копируем одноканальное изображение в каждый канал RGB
        rgb_image[:,:,0] = image  # Красный канал
        rgb_image[:,:,1] = image  # Зеленый канал
        rgb_image[:,:,2] = image  # Синий канал
        image = rgb_image
        
        if self.transform:
            image = self.transform(image)   
        y_label=torch.tensor(float(self.dataframe.iloc[index]['label']))
        return (image,y_label)

data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(256),
        transforms.RandomHorizontalFlip(),
        transforms.Normalize([0.51145715, 0.51145715, 0.51145715], [0.250773, 0.250773, 0.250773])
        
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(256),
        
    ]),
}

train_data = MyDataset(pd.concat([X_train, y_train], axis=1), data_transforms['train'])
test_data = MyDataset(pd.concat([X_test, y_test], axis=1), data_transforms['val'])

In [21]:
io.imread(df.iloc[1]['image'])

array([[166, 167, 167, ..., 168, 169, 168],
       [165, 167, 166, ..., 168, 167, 166],
       [167, 167, 165, ..., 167, 167, 167],
       ...,
       [157, 130,  90, ...,  17,  35,  80],
       [157, 148,  76, ...,  24,  40,  89],
       [159, 158, 114, ...,  29,  49, 111]], dtype=uint8)

In [12]:
image_datasets = {'train' : train_data, 'val' : test_data}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [30]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    labels = labels.type(torch.LongTensor)
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)

            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return model
    

In [31]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()
    class_names = ['neutral', 'sad', 'surprised', 'happy']
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                plt.imshow(inputs.cpu().data[j].permute(1, 2, 0))

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

# В качестве модели, методом подбора, самые лучшие результаты показала архитектура EfficientNet

In [32]:
model_ft = models.efficientnet_b1(torchvision.models.EfficientNet_B1_Weights.IMAGENET1K_V2)

In [33]:
model_ft.classifier[-1] = nn.Linear(in_features=1280, out_features=4, bias=True)

In [34]:
model_ft = model_ft.to('cuda')

In [35]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [36]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20);

Epoch 0/19
----------
train Loss: 0.8252 Acc: 0.6581
val Loss: 0.6496 Acc: 0.7417

Epoch 1/19
----------
train Loss: 0.6027 Acc: 0.7648
val Loss: 0.5871 Acc: 0.7704

Epoch 2/19
----------
train Loss: 0.4891 Acc: 0.8028
val Loss: 0.6707 Acc: 0.7446

Epoch 3/19
----------
train Loss: 0.3986 Acc: 0.8452
val Loss: 0.6652 Acc: 0.7622

Epoch 4/19
----------
train Loss: 0.3393 Acc: 0.8696
val Loss: 0.6962 Acc: 0.7513

Epoch 5/19
----------
train Loss: 0.2874 Acc: 0.8889
val Loss: 0.6718 Acc: 0.7804

Epoch 6/19
----------
train Loss: 0.2224 Acc: 0.9144
val Loss: 0.6986 Acc: 0.7900

Epoch 7/19
----------
train Loss: 0.1110 Acc: 0.9628
val Loss: 0.6928 Acc: 0.8014

Epoch 8/19
----------
train Loss: 0.0699 Acc: 0.9770
val Loss: 0.7436 Acc: 0.8062

Epoch 9/19
----------
train Loss: 0.0578 Acc: 0.9803
val Loss: 0.7806 Acc: 0.8086

Epoch 10/19
----------
train Loss: 0.0430 Acc: 0.9861
val Loss: 0.8297 Acc: 0.8067

Epoch 11/19
----------
train Loss: 0.0374 Acc: 0.9886
val Loss: 0.8549 Acc: 0.8103

Ep

# Максимальная точность на валидационной выборке составила 80%. На тестовой может быть чуть поменьше. В других ноутбуках будет попробована аугментация и различные архитектуры