In [2]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
from PIL import Image
from sklearn.model_selection import train_test_split
import pandas as pd
import os
import torch.nn.functional as F
import numpy as np
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
# !unzip /content/drive/MyDrive/art_dataset.zip -d /content/drive/MyDrive/cs342fp

Archive:  /content/drive/MyDrive/art_dataset.zip
replace /content/drive/MyDrive/cs342fp/kaggle/working/Realism/boris-kustodiev_man-with-accounts-v-a-kastalsky-1917.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [12]:
%cd /content/drive/MyDrive/cs342fp

/content/drive/MyDrive/cs342fp


In [14]:
%pwd

'/content/drive/MyDrive/cs342fp'

In [15]:
parent_dir = "kaggle/working/"

# Define valid_genres list
valid_genres = [
    "Art Nouveau Modern",
    "Baroque",
    "Color Field Painting",
    "Cubism",
    "Early Renaissance",
    "Expressionism",
    "High Renaissance",
    "Impressionism",
    "Mannerism Late Renaissance",
    "Minimalism",
    "Naive Art Primitivism",
    "Northern Renaissance",
    "Realism",
    "Rococo",
    "Romanticism",
    "Ukiyo e"
]

# Read the CSV file into a DataFrame
df = pd.read_csv(parent_dir + 'classes.csv')

# Drop rows with genre_count greater than 1
df = df[df['genre_count'] <= 1]

# Drop columns except 'filename', 'genre', 'subset'
df = df[['filename', 'genre', 'subset']]


# Drop rows with genres not in the valid_genres list
df = df[df['genre'].apply(lambda x: x[2:-2] in valid_genres)]
print(len(df['genre'].unique()))

# Reset index after dropping rows
df.reset_index(drop=True, inplace=True)

genre_counts = df['genre'].value_counts()
print(genre_counts)
print(min(genre_counts))

df = df.groupby('genre', group_keys=False).apply(lambda x: x.sample(min(len(x), min(genre_counts))))

# Reset index after dropping rows
df.reset_index(drop=True, inplace=True)

# Replace genre values with their indices in the valid_genres list
df['genre'] = df['genre'].apply(lambda x: valid_genres.index(x[1:-1].split(',')[0].strip()[1:-1]))

df = df[df['filename'].apply(lambda x: os.path.exists(parent_dir + x))]

genre_counts = df['genre'].value_counts()
print(genre_counts)
print(min(genre_counts))

16
genre
['Impressionism']                 12847
['Realism']                       10534
['Romanticism']                    6896
['Expressionism']                  6280
['Baroque']                        4202
['Art Nouveau Modern']             4155
['Northern Renaissance']           2550
['Naive Art Primitivism']          2299
['Rococo']                         2070
['Cubism']                         2002
['Color Field Painting']           1486
['Early Renaissance']              1387
['High Renaissance']               1339
['Mannerism Late Renaissance']     1275
['Minimalism']                     1250
['Ukiyo e']                        1159
Name: count, dtype: int64
1159
genre
3     670
9     450
12    146
5       6
Name: count, dtype: int64
6


In [16]:
print(df.head())

                                               filename  genre subset
3477  Cubism/gosta-adrian-nilsson_gratulations-nskni...      3   test
3479                Cubism/raoul-dufy_birdcage-1914.jpg      3   test
3481                         Cubism/juan-gris_clown.jpg      3  train
3482  Cubism/christian-schad_portrait-de-walter-sern...      3  train
3483                Cubism/andre-masson_prison-gray.jpg      3   test


In [17]:
class GatedAttention(nn.Module):
    def __init__(self, in_channels):
        super(GatedAttention, self).__init__()
        self.attention_gate = nn.Sequential(
            nn.Conv2d(in_channels, 1, kernel_size=1),  # Reduce channel dimension to 1
            nn.Sigmoid()  # Activation to get coefficients between 0 and 1
        )

    def forward(self, x):
        attention_weights = self.attention_gate(x)
        return x * attention_weights  # Element-wise multiplication

class CNNArtModel(nn.Module):
    def __init__(self, num_classes,input_size):
        super(CNNArtModel, self).__init__()

        final_size = (input_size // 4, input_size // 4)  # This accounts for two pooling layers with stride 2 each

        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, padding=2)
        self.maxpool_conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
        self.maxpool = nn.MaxPool2d(2)
        self.maxpool_conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        self.avgpool_conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
        self.avgpool = nn.AvgPool2d(2)
        self.avgpool_conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        # Attention Gates
        self.attention_gate_max = GatedAttention(64)
        self.attention_gate_avg = GatedAttention(64)

#       print("FC1 SHAPE: ", 64 * 2 * final_size[0] * final_size[1], 512)

        self.fc1 = nn.Linear(2097152, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.conv1(x))

        maxpool_x = F.relu(self.maxpool_conv2(x))
        maxpool_x = self.maxpool(maxpool_x)
        maxpool_x = F.relu(self.maxpool_conv3(maxpool_x))
        maxpool_x = self.attention_gate_max(maxpool_x)

        avgpool_x = F.relu(self.avgpool_conv2(x))
        avgpool_x = self.avgpool(avgpool_x)
        avgpool_x = F.relu(self.avgpool_conv3(avgpool_x))
        avgpool_x = self.attention_gate_avg(avgpool_x)

        # Flatten the output for each path
        maxpool_x = torch.flatten(maxpool_x, 1)
        avgpool_x = torch.flatten(avgpool_x, 1)

        # Concatenate the flattened outputs from both paths
        combined_x = torch.cat((maxpool_x, avgpool_x), dim=1)

        # print(combined_x.shape)
        # print(self.fc1)

        # Pass through fully connected layers
        combined_x = F.relu(self.fc1(combined_x))
        combined_x = self.dropout(combined_x)
        combined_x = self.fc2(combined_x)

        return combined_x

# Example model creation
model = CNNArtModel(num_classes=16, input_size=256)
print(model)


CNNArtModel(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool_conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (maxpool_conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (avgpool_conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (avgpool_conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (attention_gate_max): GatedAttention(
    (attention_gate): Sequential(
      (0): Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1))
      (1): Sigmoid()
    )
  )
  (attention_gate_avg): GatedAttention(
    (attention_gate): Sequential(
      (0): Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1))
      (1): Sigmoid()
    )
  )
  (fc1): Linear(in_features=2097152, out_features=512, bias=True)
  (fc2): Linear(in_features=

In [21]:
# Split the dataframe into train and test subsets
train_df = df[df['subset'] == 'train']
test_df = df[df['subset'] == 'test']


class SquarePad:
    def __call__(self, image):
        w, h = image.size
        max_wh = max([w, h])
        hp = int((max_wh - w) // 2)
        vp = int((max_wh - h) // 2)
        padding = (hp, hp, vp, vp)
        if hp * 2 + w < 256:
            padding = (hp, hp + 1, vp, vp)
        if vp * 2 + h < 256:
            padding = (hp, hp, vp, vp + 1)

        image_tensor = torch.tensor(np.array(image), dtype=torch.float).permute(2,0,1)
        padded_tensor = F.pad(image_tensor,padding, mode='replicate')

        return padded_tensor


def resize_larger_dimension(image, size):
    width, height = image.size
    aspect_ratio = width / height
    if width > height:
        new_width = size
        new_height = int(size / aspect_ratio)
    else:
        new_width = int(size * aspect_ratio)
        new_height = size

    return image.resize((new_width, new_height))

transform = transforms.Compose([
    transforms.Lambda(lambda x: resize_larger_dimension(x, 256)),  # Resize the larger dimension to 256 while preserving aspect ratio
    SquarePad(),
#     transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

# Define a custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        filename = parent_dir + self.dataframe.iloc[idx]['filename']
        image = Image.open(filename).convert('RGB')
        label = self.dataframe.iloc[idx]['genre']

        if self.transform:
            w, h = image.size
            image = self.transform(image)

        return image, label

# Create custom datasets for train and test
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create train and test dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [22]:
# Move the model to GPU if available
device = "cpu" #torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = CNNArtModel(16, 256).to(device)

# Modify the train_one_epoch function to move inputs and labels to GPU
def train_one_epoch(model, train_loader, optimizer, criterion):
    total_loss = 0
    count = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        count += 1
    print('{:>12s} {:>7.5f}'.format('Train loss:', total_loss/count))

    # with torch.no_grad():
    #     total_loss = 0
    #     count = 0
    #     for inputs, labels in val_loader:
    #         inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
    #         outputs = model(inputs)
    #         loss = criterion(outputs, labels)
    #         total_loss += loss.item()
    #         count += 1
    #     print('{:>12s} {:>7.5f}'.format('Val loss:', total_loss/count))
    print()

# Example of using the train_loader and val_loader in a training loop
nepoch = 5
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(nepoch):
    train_one_epoch(model, train_loader, optimizer, criterion)

cpu


KeyboardInterrupt: 