In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transfroms
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from tqdm.auto import tqdm

In [2]:
from zipfile import ZipFile

with ZipFile('/content/drive/MyDrive/art_classify.zip', 'r') as zipObj:
    zipObj.extractall('art_data')

#Data PreProcessing

In [3]:
df = pd.read_csv('/content/art_data/train.csv')
df.head(5)

Unnamed: 0,id,img_path,artist
0,0,./train/0000.jpg,Diego Velazquez
1,1,./train/0001.jpg,Vincent van Gogh
2,2,./train/0002.jpg,Claude Monet
3,3,./train/0003.jpg,Edgar Degas
4,4,./train/0004.jpg,Hieronymus Bosch


In [4]:
LabelEncoder = preprocessing.LabelEncoder()
df['artist'] = LabelEncoder.fit_transform(df['artist'].values)
df.head(5)

Unnamed: 0,id,img_path,artist
0,0,./train/0000.jpg,9
1,1,./train/0001.jpg,48
2,2,./train/0002.jpg,7
3,3,./train/0003.jpg,10
4,4,./train/0004.jpg,24


In [5]:
for i in range(0, len(df['img_path'].values)):

    path = df['img_path'][i]
    path = path[2:]

    df['img_path'][i] = os.path.join('/content/art_data', path)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['img_path'][i] = os.path.join('/content/art_data', path)


In [88]:
df.head(5)

Unnamed: 0,id,img_path,artist
0,0,/content/art_data/train/0000.jpg,9
1,1,/content/art_data/train/0001.jpg,48
2,2,/content/art_data/train/0002.jpg,7
3,3,/content/art_data/train/0003.jpg,10
4,4,/content/art_data/train/0004.jpg,24


In [6]:
CLASS_NUM = len(df['artist'].unique())
print(CLASS_NUM)

50


#Train Test Split

In [7]:
train_df, valid_df, _, _ = train_test_split(df, df['artist'].values, test_size = 0.2, random_state = 10)

In [8]:
train_df = train_df.sort_values(by = ['id'])
valid_df = valid_df.sort_values(by = ['id'])

In [41]:
train_df.head(5)

Unnamed: 0,id,img_path,artist
0,0,./train/0000.jpg,9
2,2,./train/0002.jpg,7
3,3,./train/0003.jpg,10
5,5,./train/0005.jpg,38
6,6,./train/0006.jpg,43


In [42]:
valid_df.head(5)

Unnamed: 0,id,img_path,artist
1,1,./train/0001.jpg,48
4,4,./train/0004.jpg,24
16,16,./train/0016.jpg,48
17,17,./train/0017.jpg,10
21,21,./train/0021.jpg,29


#Data Load

In [9]:
def getdata(data, inference = False):

    if inference:
        return data['img_path'].values
    return data['img_path'].values, data['artist'].values

In [10]:
train_path, train_labels = getdata(train_df)
valid_path, valid_labels = getdata(valid_df)

In [95]:
train_path[0]

'/content/art_data/train/0000.jpg'

In [11]:
class ArtistCustomDataset(Dataset):
    def __init__(self, data_dir, labels, transformer = None):
        self.data_dir = data_dir
        self.labels = labels
        self.transformer = transformer

    def __len__(self):
        return len(self.data_dir)

    def __getitem__(self, index):
        img_path = self.data_dir[index]
        image = cv.imread(img_path)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

        if self.transformer:
            image = self.transformer(image = image)['image']

        if self.labels is not None:
            label = self.labels[index]
            return image, label
        else:
            return image



In [12]:
train_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value = 255.0, always_apply = False, p=1.0),
    ToTensorV2()
])

In [13]:
valid_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value = 255.0, always_apply = False, p=1.0),
    ToTensorV2()
])

In [14]:
train_dataset = ArtistCustomDataset(train_path, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = 12, shuffle = True, num_workers = 0)

valid_dataset = ArtistCustomDataset(valid_path, valid_labels, valid_transform)
valid_loader = DataLoader(valid_dataset, batch_size = 12, shuffle = False, num_workers = 0)

In [15]:
class DepthSeparableConv2d(nn.Module):
    def __init__(self, input_channels, output_channels, kernel_size, stride=1, padding = 1, bias = False):
        super().__init__()

        self.depthwise = nn.Sequential(
            nn.Conv2d(
                in_channels = input_channels,
                out_channels = input_channels,
                kernel_size=kernel_size,
                groups = input_channels, #keypoint for mobilenet. each input channel convolved in only their filters.
                stride = stride,
                padding = padding,
                bias = bias),
            nn.BatchNorm2d(input_channels),
            nn.ReLU(inplace = True)
        )

        self.pointwise = nn.Sequential(
            nn.Conv2d(
                in_channels = input_channels,
                out_channels = output_channels,
                kernel_size = 1),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(inplace = True)
        )

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)

        return x


In [16]:
class OriginalConv2d(nn.Module):
    def __init__(self, input_channels, output_channels, kernel_size, stride = 1, padding = 1, bias = False):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(
                in_channels = input_channels,
                out_channels = output_channels,
                kernel_size = kernel_size,
                stride = stride,
                padding = padding,
                bias = bias
            ),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(inplace = True)
        )

    def forward(self, x):
        x = self.conv(x)

        return x


In [17]:
class MobileNetV1(nn.Module):
    def __init__(self, width_multiplier, class_num = 100):
        super().__init__()

        alpha = width_multiplier
        self.layer1 = nn.Sequential(
            OriginalConv2d(input_channels=3,
                           output_channels = int(32*alpha),
                           kernel_size = 3,
                           stride = 2)
        )

        self.layer2 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(32*alpha),
                                 output_channels = int(64*alpha),
                                 kernel_size = 3,
                                 stride = 1)
        )

        self.layer3 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(64*alpha),
                                 output_channels = int(128*alpha),
                                 kernel_size = 3,
                                 stride = 2)
        )

        self.layer4 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(128*alpha),
                                 output_channels = int(128*alpha),
                                 kernel_size = 3,
                                 stride = 1),
            DepthSeparableConv2d(input_channels = int(128*alpha),
                                 output_channels = int(256*alpha),
                                 kernel_size = 3,
                                 stride = 2)
        )

        self.layer5 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(256*alpha),
                                 output_channels = 256*alpha,
                                 kernel_size = 3,
                                 stride = 1),
            DepthSeparableConv2d(input_channels = int(256*alpha),
                                 output_channels = int(512*alpha),
                                 kernel_size = 3,
                                 stride = 2)
        )

        self.layer6 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(512*alpha),
                                 output_channels = int(512*alpha),
                                 kernel_size = 1,
                                 stride = 1),
            DepthSeparableConv2d(input_channels = int(512*alpha),
                                 output_channels = int(512*alpha),
                                 kernel_size = 1,
                                 stride = 1),
            DepthSeparableConv2d(input_channels = int(512*alpha),
                                 output_channels = int(512*alpha),
                                 kernel_size = 1,
                                 stride = 1),
            DepthSeparableConv2d(input_channels = int(512*alpha),
                                 output_channels = int(512*alpha),
                                 kernel_size = 1,
                                 stride = 1),
            DepthSeparableConv2d(input_channels = int(512*alpha),
                                 output_channels = int(512*alpha),
                                 kernel_size = 1,
                                 stride = 1)
        )

        self.layer7 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(512*alpha),
                                 output_channels = int(1024*alpha),
                                 kernel_size = 3,
                                 stride = 1)
        )

        self.layer8 = nn.Sequential(
            DepthSeparableConv2d(input_channels = int(1024*alpha),
                                 output_channels = int(1024*alpha),
                                 kernel_size = 3,
                                 stride = 2)
        )

        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(int(1024*alpha), class_num)
        self.softmax = nn.Softmax(1)


    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x



In [18]:
def train(model, epochs, optimizer, train_loader, test_loader, scheduler, device):
    model.to(device)

    criterion = nn.CrossEntropyLoss().to(device)

    best_score = 1000
    best_model = None

    for epoch in range(1, epochs+1):
        model.train()
        train_loss = []
        for img, label in tqdm(iter(train_loader)): #iter으로 Next를 했을 때 다음 데이터가 나옴
            img, label = img.float().to(device), label.to(device)

            optimizer.zero_grad()

            model_pred = model(img)

            loss = criterion(model_pred, label)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        tr_loss = np.mean(train_loss)
        val_loss= validation(model, criterion, test_loader, device)

        print(f'Epoch [{epoch}], Train Loss : [{tr_loss:.5f}] Val Loss : [{val_loss:.5f}]')

        if scheduler is not None:
            scheduler.step()

        if best_score > val_loss:
            best_model = model
            best_score = val_loss

    return best_model


In [19]:
def validation(model, criterion, test_loader, device):
    model.eval() #모든 레이어가 evaluation mode에 들어가게 한다. 즉, 학습에만 필요한 dropout, batch norm을 비활성화한다는 것.

    val_loss = []

    with torch.no_grad():
        for img, label in tqdm(iter(test_loader)):
            img, label = img.float().to(device), label.to(device)

            model_pred = model(img)

            loss = criterion(model_pred, label)

            val_loss.append(loss.item())

    return np.mean(val_loss)


In [20]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [23]:
model = MobileNetV1(1, class_num = CLASS_NUM)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.01)
scheduler = None

infer_model = train(model, 10, optimizer, train_loader, valid_loader, scheduler, device)

  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [1], Train Loss : [3.54167] Val Loss : [3.57322]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [2], Train Loss : [3.31343] Val Loss : [3.29767]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [3], Train Loss : [3.26023] Val Loss : [3.28679]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [4], Train Loss : [3.20143] Val Loss : [3.26069]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [5], Train Loss : [3.16265] Val Loss : [3.13772]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [6], Train Loss : [3.09844] Val Loss : [3.17285]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [7], Train Loss : [3.06204] Val Loss : [3.06310]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [8], Train Loss : [3.02342] Val Loss : [2.99680]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [9], Train Loss : [2.97434] Val Loss : [2.96457]


  0%|          | 0/394 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Epoch [10], Train Loss : [2.92049] Val Loss : [2.91263]


In [32]:
torch.save(infer_model.state_dict(), '/content/drive/MyDrive/Pytorch_Models/MobileNetV1.pth')

In [25]:
test_df = pd.read_csv('/content/art_data/test.csv')
test_df.head(5)

Unnamed: 0,id,img_path
0,TEST_00000,./test/TEST_00000.jpg
1,TEST_00001,./test/TEST_00001.jpg
2,TEST_00002,./test/TEST_00002.jpg
3,TEST_00003,./test/TEST_00003.jpg
4,TEST_00004,./test/TEST_00004.jpg


In [26]:
for i in range(0, len(test_df['img_path'].values)):

    path = test_df['img_path'][i]
    path = path[2:]

    test_df['img_path'][i] = os.path.join('/content/art_data', path)

In [27]:
test_df.head(5)

Unnamed: 0,id,img_path
0,TEST_00000,/content/art_data/test/TEST_00000.jpg
1,TEST_00001,/content/art_data/test/TEST_00001.jpg
2,TEST_00002,/content/art_data/test/TEST_00002.jpg
3,TEST_00003,/content/art_data/test/TEST_00003.jpg
4,TEST_00004,/content/art_data/test/TEST_00004.jpg


In [29]:
test_img_paths = getdata(test_df, inference=True)

In [30]:
test_dataset = ArtistCustomDataset(test_img_paths, None, valid_transform)
test_loader = DataLoader(test_dataset, batch_size=12, shuffle=False, num_workers=0)

In [31]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()

    model_preds = []

    with torch.no_grad():
        for img in tqdm(iter(test_loader)):
            img = img.float().to(device)

            model_pred = model(img)
            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()

    print('Done.')
    return model_preds

In [33]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/1056 [00:00<?, ?it/s]

Done.


In [34]:
preds = LabelEncoder.inverse_transform(preds)

In [35]:
submit = pd.read_csv('/content/art_data/sample_submission.csv')
submit['artist'] = preds

In [36]:
submit.head(5)

Unnamed: 0,id,artist
0,TEST_00000,Rembrandt
1,TEST_00001,Edgar Degas
2,TEST_00002,Peter Paul Rubens
3,TEST_00003,Albrecht Du rer
4,TEST_00004,Edgar Degas


In [37]:
submit.to_csv('/content/drive/MyDrive/Pytorch_Inference_Result/MobileNetV1.csv', index=False)