In [60]:
import matplotlib.pyplot as plt # for plotting
import numpy as np # for transformation

import torch # PyTorch package
import torchvision # load datasets
import torchvision.transforms as transforms # transform data
import torch.nn as nn # basic building block for neural neteorks
import torch.nn.functional as F # import convolution functions like Relu
import torch.optim as optim # optimzer
import os
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, BatchNorm2d, Dropout
# for reading and displaying images
from torchvision.io import read_image
import torchvision.transforms.functional as fn
from torch.utils.data import random_split, Subset
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from sklearn.model_selection import train_test_split
from torchvision.transforms import Compose, ToTensor, Resize
from torchvision.datasets import ImageFolder
from torch.optim import Adam
from torch.autograd import Variable
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [20]:
pwd

'/Users/ayushsharma/Documents/Programming/Python/dtd_custom'

In [21]:
def train_val_dataset(dataset, val_split=0.25):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['val'] = Subset(dataset, val_idx)
    return datasets

dataset = ImageFolder('/Users/ayushsharma/Documents/Programming/Python/dtd_custom/images/', transform=Compose([Resize((224,224)),ToTensor()]))
print(len(dataset))
datasets = train_val_dataset(dataset)
print(len(datasets['train']))
print(len(datasets['val']))
# The original dataset is available in the Subset class
print(datasets['train'].dataset)

dataloaders = {x:DataLoader(datasets[x],32, shuffle=True, num_workers=4) for x in ['train','val']}
x,y = next(iter(dataloaders['train']))
print(x.shape, y.shape)

5540
4155
1385
Dataset ImageFolder
    Number of datapoints: 5540
    Root location: /Users/ayushsharma/Documents/Programming/Python/dtd_custom/images/
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )
torch.Size([32, 3, 224, 224]) torch.Size([32])


In [23]:
x

tensor([[[[0.6667, 0.6667, 0.6667,  ..., 0.7020, 0.6941, 0.6902],
          [0.6706, 0.6706, 0.6706,  ..., 0.7020, 0.6941, 0.6902],
          [0.6745, 0.6745, 0.6745,  ..., 0.7020, 0.6941, 0.6902],
          ...,
          [0.4000, 0.5373, 0.7294,  ..., 0.4275, 0.5137, 0.6078],
          [0.3490, 0.4588, 0.6745,  ..., 0.3922, 0.3804, 0.5765],
          [0.3529, 0.4235, 0.5922,  ..., 0.3804, 0.3725, 0.4902]],

         [[0.6667, 0.6667, 0.6667,  ..., 0.7137, 0.7176, 0.7216],
          [0.6706, 0.6706, 0.6706,  ..., 0.7137, 0.7176, 0.7216],
          [0.6745, 0.6745, 0.6745,  ..., 0.7098, 0.7137, 0.7176],
          ...,
          [0.5137, 0.6510, 0.8510,  ..., 0.5137, 0.6196, 0.7137],
          [0.4392, 0.5647, 0.7961,  ..., 0.4627, 0.4706, 0.6706],
          [0.4196, 0.5216, 0.7176,  ..., 0.4431, 0.4471, 0.5686]],

         [[0.6667, 0.6667, 0.6667,  ..., 0.6863, 0.6745, 0.6706],
          [0.6706, 0.6706, 0.6706,  ..., 0.6863, 0.6824, 0.6745],
          [0.6706, 0.6706, 0.6706,  ..., 0

In [29]:
y

tensor([25, 35, 24, 39, 25, 25, 33, 42, 14, 11, 14, 29, 32, 29,  3, 11, 20, 38,
         0, 10, 37, 10, 30, 27, 43, 37, 29, 44, 16, 39, 36, 13])

In [30]:
train_x = np.array(x)
train_y = np.array(y)

In [32]:
gen = torch.Generator().manual_seed(42)
train_x, val_x = random_split(train_x, [0.8,0.2], generator=gen)
train_y, val_y = random_split(train_y, [0.8,0.2], generator=gen)

In [35]:
val_x = np.array(val_x)
val_y = np.array(val_y) 

In [50]:
print(val_x)
print(val_y)

[[[[0.8235294  0.8509804  0.88235295 ... 0.8784314  0.93333334
    0.9843137 ]
   [0.8039216  0.8745098  0.89411765 ... 0.94509804 0.95686275
    0.972549  ]
   [0.6862745  0.8392157  0.8666667  ... 0.98039216 0.93333334
    0.8980392 ]
   ...
   [0.5254902  0.5058824  0.5176471  ... 0.78039217 0.8156863
    0.78431374]
   [0.56078434 0.53333336 0.52156866 ... 0.61960787 0.6627451
    0.68235296]
   [0.62352943 0.60784316 0.6        ... 0.6509804  0.59607846
    0.5882353 ]]

  [[0.85490197 0.8862745  0.92156863 ... 0.8627451  0.9254902
    0.9843137 ]
   [0.8352941  0.90588236 0.92941177 ... 0.93333334 0.9490196
    0.9607843 ]
   [0.7176471  0.87058824 0.9019608  ... 0.9764706  0.92941177
    0.89411765]
   ...
   [0.6156863  0.6039216  0.61960787 ... 0.8156863  0.84313726
    0.8117647 ]
   [0.64705884 0.6313726  0.627451   ... 0.6627451  0.7019608
    0.72156864]
   [0.7058824  0.7019608  0.70980394 ... 0.69803923 0.6392157
    0.6313726 ]]

  [[0.9411765  0.95686275 0.96862745 ...

In [38]:
# train_x = np.load("train_x.npy")
# train_y = np.load("train_y.npy")
# val_x = np.load("val_x.npy")
# val_y = np.load("val_y.npy")

In [58]:
train_x = np.array(train_x)
train_y = np.array(train_y)

In [59]:
# train_x = train_x.reshape(4155, 1, 128, 128)
train_x  = torch.from_numpy(train_x)

# converting the target into torch format
train_y = torch.from_numpy(train_y)

# converting validation images into torch format
# val_x = val_x.reshape(1355, 1, 128, 128)
val_x  = torch.from_numpy(val_x)

# converting the target into torch format
val_y = torch.from_numpy(val_y)

In [27]:
class Net(Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = Sequential(
            Conv2d(1, 64, 2),
            BatchNorm2d(64),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            Dropout(0.4),

            # Conv2d(32, 64, 3),
            # BatchNorm2d(64),
            # ReLU(inplace=True),
            # MaxPool2d(kernel_size=2, stride=2),
            # Dropout(0.35)
        )

        self.linear_layers = Sequential(
            Linear(64*63*63, 47)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x
    

# defining the model
model = Net()
# defining the optimizer
optimizer = Adam(model.parameters(), lr=5e-5, weight_decay=5e-6)
# defining the loss function for multi class
criterion = CrossEntropyLoss()
# checking if GPU is available
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    
print(model)

Net(
  (cnn_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(2, 2), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.4, inplace=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=254016, out_features=47, bias=True)
  )
)


In [62]:
def accuracy_score(arr1, arr2):
    accuracy = np.sum(np.equal(arr1,arr2))/len(arr1)
    return accuracy

In [61]:
pwd

'/Users/ayushsharma/Documents/Programming/Python/dtd_custom'

In [63]:
# defining the number of epochs
print("Number of epochs to run(ideal 15 to 20): ")
n_epochs = 10
# empty list to store training losses
train_losses = []
# empty list to store validation losses
val_losses = []
# training the model
print("Training started")
best_accuracy = 0

for epoch in range(n_epochs):
    model.train()
    tr_loss = 0
    loss_train = 0
    loss_val = 0
    predictions = []

    for i in range(4155):
        # getting the training set
        x_train, y_train = Variable(train_x[i:i+1]), Variable(train_y[i:i+1]).type(torch.LongTensor)
        
        # converting the data into GPU format
        if torch.cuda.is_available():
            x_train = x_train.cuda()
            y_train = y_train.cuda()

        # clearing the Gradients of the model parameters
        optimizer.zero_grad()
        
        # prediction for training set
        output_train = model(x_train)

        if torch.cuda.is_available():
            output_train = output_train.cuda()

        # computing the training loss
        loss = criterion(output_train, y_train)
        loss_train += loss.item()
        loss.backward()
        optimizer.step()

        softmax = torch.exp(output_train).cpu()
        prob = list(softmax.detach().numpy())
        predictions.append(np.argmax(prob, axis=1).item())

        
    for i in range(1385):
        # # prediction for validation set
        x_val, y_val = Variable(val_x[i:i+1]), Variable(val_y[i:i+1]).type(torch.LongTensor)
        if torch.cuda.is_available():
            x_val = x_val.cuda()
            y_val = y_val.cuda()
        
        output_val = model(x_val)
        loss_val += criterion(output_val, y_val).item()
  
    # Appending for plotting graph
    val_losses.append(loss_val/1385)
    train_losses.append(loss_train/4155)

    writer.add_scalar("Loss/train", loss_train/4155, epoch)
    accuracy = accuracy_score(train_y.tolist(), predictions)
    print('Epoch : ',epoch+1, '\t', 'val loss :',loss_val/1385, '\t', 'train loss :',loss_train/4155, '\t',"Accuracy: ", accuracy)

    if accuracy > best_accuracy:
        best_accuracy  = accuracy
        torch.save(model.state_dict(), "/Users/ayushsharma/Documents/Programming/Python/dtd_custom")
    
writer.flush()
writer.close()
# np.save("train_losses.npy", train_losses)
# np.save("val_losses.npy", val_losses)

print("Training done!!")

Number of epochs to run(ideal 15 to 20): 
Training started


RuntimeError: Given groups=1, weight of size [64, 1, 2, 2], expected input[1, 3, 224, 224] to have 1 channels, but got 3 channels instead