<a href="https://colab.research.google.com/github/hrishabhsaini18/basic_deep_learning/blob/main/Image_classifier_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

In [40]:
# When we have raw images(0,255)RGB, we want those to change to (-1,1)
# and want them as tensors before they get fed into the network!

transform = transforms.Compose([
    transforms.ToTensor(), # make it (0,1) also
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) #give (mean) & (std dev) for each RGB channel
])

In [41]:
train_data = torchvision.datasets.CIFAR10(root='./sample_data/', train=True, transform=transform, download=True)
test_data = torchvision.datasets.CIFAR10(root='./sample_data/', train=False, transform=transform, download=True)

In [42]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=2)

In [43]:
image, label = train_data[0]
image.size()

torch.Size([3, 32, 32])

In [44]:
# these are the classes corresponding to the output label number in the classification
class_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [45]:
class CNNclassifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d( 3, 12, 5 ) # (3channels, 12featureMaps, 5FilterSize)  --> gives {(32-5)/stride +1} i.e. (12, 28, 28) size new
    self.pool = nn.MaxPool2d(2, 2) # take a 2*2 filter, thus make new size as (12, 14, 14)
    self.conv2 = nn.Conv2d(12,24,5) # (12, 14, 14) --> (24, 10, 10) --> MaxPool-> Flatten(24*5*5)
    self.fc1 = nn.Linear(24*5*5, 128)
    self.fc2 = nn.Linear(128,64)
    self.out = nn.Linear(64, 10)

  def forward(self,x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.out(x)

    return x


In [46]:
model = CNNclassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.75)

In [47]:
# 1. Check for CUDA availability and define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [49]:
# 2. Move your model to the selected device
model = CNNclassifier().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [50]:
# 3. In your training loop, move data to the device for each batch
epoch = 35

for i in range(epoch):
  print(f'Training epoch: {i}...')
  running_loss = 0.0

  for j, data in enumerate(train_loader):
    inputs, labels = data
    # Move data to the GPU
    inputs = inputs.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels)

    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  print(f'Loss: {running_loss/len(train_loader):.4f}')

Training epoch: 0...
Loss: 1.5292
Training epoch: 1...
Loss: 1.2309
Training epoch: 2...
Loss: 1.0874
Training epoch: 3...
Loss: 0.9787
Training epoch: 4...
Loss: 0.8980
Training epoch: 5...
Loss: 0.8332
Training epoch: 6...
Loss: 0.7821
Training epoch: 7...
Loss: 0.7354
Training epoch: 8...
Loss: 0.6935
Training epoch: 9...
Loss: 0.6540
Training epoch: 10...
Loss: 0.6175
Training epoch: 11...
Loss: 0.5876
Training epoch: 12...
Loss: 0.5538
Training epoch: 13...
Loss: 0.5263
Training epoch: 14...
Loss: 0.5008
Training epoch: 15...
Loss: 0.4759
Training epoch: 16...
Loss: 0.4519
Training epoch: 17...
Loss: 0.4321
Training epoch: 18...
Loss: 0.4136
Training epoch: 19...
Loss: 0.3908
Training epoch: 20...
Loss: 0.3739
Training epoch: 21...
Loss: 0.3575
Training epoch: 22...
Loss: 0.3440
Training epoch: 23...
Loss: 0.3327
Training epoch: 24...
Loss: 0.3191
Training epoch: 25...
Loss: 0.3050
Training epoch: 26...
Loss: 0.2910
Training epoch: 27...
Loss: 0.2820
Training epoch: 28...
Loss: 0.

In [51]:
torch.save(model.state_dict(), 'trained_net.pth')

In [52]:
model_2 = CNNclassifier()
model_2.load_state_dict(torch.load('trained_net.pth'))

<All keys matched successfully>

In [53]:
correct = 0
total = 0

model_2.eval()

with torch.no_grad():
  for data in test_loader:
    images, labels = data

    # Move data to the GPU
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    total +=labels.size(0)
    correct += (predicted==labels).sum().item()

accuracy = 100*(correct/total)
print(f'Accuracy:{accuracy}%')

Accuracy:64.35%


In [54]:
### Test images from internet
new_transform = transforms.Compose([
    transforms.Resize((32,32)), # since our images are not in 32*32, provide size as a tuple
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [55]:
from PIL import Image # Import Image from PIL

def load_image(image_path):
  image = Image.open(image_path) # Use Image.open
  image = new_transform(image)
  image = image.unsqueeze(0) # we want to have it like a batch
  return image

image_path = ['./sample_data/validation_dataset_CNN/image1.jpeg', './sample_data/validation_dataset_CNN/image2.jpeg', './sample_data/validation_dataset_CNN/image3.jpeg', './sample_data/validation_dataset_CNN/image4.jpeg','./sample_data/validation_dataset_CNN/image5.jpeg']
images = [load_image(img) for img in image_path]

In [56]:
model_2.eval()
with torch.no_grad():
  for image in images:
    output = model_2(image)
    _, predicted = torch.max(output, 1)
    print(f'Prediction:{class_names[predicted.item()]}')

Prediction:plane
Prediction:dog
Prediction:dog
Prediction:dog
Prediction:dog
