In [9]:
# from google.colab import drive
# drive.mount('/content/drive')

In [10]:
# !unzip "/content/drive/My Drive/Pet_images.zip" -d "/content/drive/My Drive/PetImages/"


In [11]:


import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchsummary import summary
from tqdm import tqdm


In [12]:
transform = transforms.Compose([
            transforms.Resize((224, 224)),   # 227 * 227 -->alexnet   # 224 * 224 --> vgg, resnet
            transforms.ToTensor(),
            transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])

In [13]:

from torchvision.datasets import ImageFolder


In [14]:
train_dataset = ImageFolder(r"C:\Users\acer\Downloads\PetImages\PetImages\train", transform)
val_dataset = ImageFolder(r"C:\Users\acer\Downloads\PetImages\PetImages\val", transform)
test_dataset = ImageFolder(r"C:\Users\acer\Downloads\PetImages\PetImages\test", transform)

In [15]:
train_dataloader = DataLoader(train_dataset,
                              batch_size=64 ,
                              shuffle = True,
                              num_workers=2,
                              pin_memory=True
)

val_dataloader = DataLoader(train_dataset,
                            batch_size=64 ,
                            shuffle = False,
                            num_workers=2,
                            pin_memory=True
)

test_dataloader = DataLoader(train_dataset,
                             batch_size=64 ,
                             shuffle = False,
                             num_workers=2,
                             pin_memory=True
)

In [16]:
for image, _ in train_dataloader:
      print(image.shape)
      break

torch.Size([64, 3, 224, 224])


AlexNet Model Architecture

In [17]:
class AlexNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.feature_extractor = nn.Sequential(
        nn.Conv2d(3, 96, kernel_size=11, stride=4),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3, stride=2),
        nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3, stride=2),
        nn.Conv2d(256, 384, kernel_size=3, stride=1, padding = 1),
        nn.ReLU(inplace=True),
        nn.Conv2d(384, 384, kernel_size=3, stride=1,padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(384, 256, kernel_size=3, stride=1,padding=1),   # (384, 13, 13) -> (256, 13, 13)
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3, stride=2),      # (256, 13, 13) -> (256, 6, 6)
        nn.Flatten()
    )

    self.fcn = nn.Sequential(
        nn.Linear(9216,4096 ),      # 256 * 6 * 6 = 9216
        nn.ReLU(inplace=True),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Linear(4096, 1)
    )

  def forward(self, x):
      feature_map = self.feature_extractor(x)
      output = self.fcn(feature_map)
      return output.shape
    



In [18]:
# for image, _ in train_dataloader:
#       image = image[0].unsqueeze(0)
#       print({alexnet(image)})
#       break

VGG16 Model Architecture

In [19]:
# class VGG16(nn.Module):
#     def __init__(self):
#         super(VGG16, self). __init__()

#         self.features = nn.Sequential(
#             nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1),   #<---224 *224 *3
#             nn.ReLU(inplace = True),
#             nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.MaxPool2d(kernel_size = 2, stride = 2) ,          #----> 112 * 112 * 64

#             nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.MaxPool2d(kernel_size = 2, stride = 2) ,     #---> 56 * 56 * 128

#             nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.MaxPool2d(kernel_size = 2, stride = 2),      #---> 28 * 28 * 256

#             nn.Conv2d(256, 512, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.MaxPool2d(kernel_size = 2, stride = 2),      #---> 14 * 14 * 512


#             nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace = True),
#             nn.MaxPool2d(kernel_size = 2, stride = 2),      #--> 7 * 7 * 512

#             nn.Flatten()     
#         )

#         self.fcn = nn.Sequential(
#             nn.Linear(7 * 7 * 512, 4096),
#             nn.ReLU(inplace = True),
#             nn.Linear(4096, 4096),
#             nn.ReLU(inplace = True),
#             nn.Linear(4096, 1)
#         )
    
#     def forward(self, x):
#         feature_map = self.features(x)
#         output = self.fcn(feature_map)
#         return output.shape

# vgg16 = VGG16()

In [20]:
# for image, _ in train_dataloader:
#     image = image[0].unsqueeze(0)
#     print(vgg16(image))
#     break

In [21]:
# from torchinfo import summary

# summary(vgg16, input_size=(1, 3, 224, 224))


Training Pipe

In [22]:
model = AlexNet()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
criteria = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
epochs = 10


In [34]:
def train_one_step(model, optimizer, images, labels):
    #forward pass
    model.train()
    optimizer.zero_grad()
    #make sure images shape is right
    images = images.reshape(-1, 3, 224, 224)
    outputs = model(images)
    # print("shpae of outputs",outputs.shape)
    # print("shpae of labes is ",labels.shape)
    #make sure the shape of outputs and labels are same
    outputs = outputs.squeeze(1)
    loss = criteria(outputs, labels)

    #backward pass
    loss.backward()
    optimizer.step()

    return loss, outputs

def validate_one_step(model, criteria, images, labels):
    #forward pass
    model.eval()
    outputs = model(images)
    loss = criteria(outputs, labels)

    return loss,outputs


In [31]:
labels.shape

torch.Size([64])

In [25]:
from torch.nn.functional import sigmoid

In [35]:
train_loss, validation_loss = [], []
train_accuracy, validation_accuracy = [], []
for epoch in range(1,epochs +1):
    # Training
    epoch_train_loss = 0
    epoch_correct_prediction = 0
    for images, labels in tqdm(train_dataloader, desc=f'Training {epoch} of {epochs}'):
        images, labels = images.to(device), labels.to(device)
        loss,results = train_one_step(model,optimizer,images,labels)
        epoch_train_loss+= loss.item()
        results = sigmoid(results).round()
        print(results)
        break
    break
        

Training 1 of 10:   0%|          | 0/274 [00:16<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x6400 and 9216x4096)

## Training Curve

In [None]:
n_epoch = 10
model = AlexNet()
criteria = nnBCEWithLogitsLoss()