In [4]:
import torch
import numpy as np
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from PIL import Image, ImageDraw
import scipy.misc

In [5]:
def process_image(image):
    # Read the image
    img = Image.open(image)  # Open the image file

    # Resize the image
    img_resized = img.resize((256, 256))  # Resize to 256x256

    # Convert to NumPy array if needed
    img = np.array(img_resized)  # Shape: (256, 256, 3) for RGB images

    img = img.transpose(2, 0, 1)   #channels first
    img = img / 255.
    img = torch.FloatTensor(img).cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([normalize])
    img = transform(img)  # (3, 256, 256)
    return img

In [6]:
# Define model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # in_channels = 3, out_channels = 6, filter_size = 5, 0 padding, stride 1
        self.conv1 = nn.Conv2d(3, 6, 5)    
        self.pool = nn.MaxPool2d(2, 2)     
        self.conv2 = nn.Conv2d(6, 12, 5)   
        self.fc1 = nn.Linear(12 * 61 * 61, 120) #flatten the image
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        # consider an image of size 256x256
        x = F.relu(self.conv1(x))                # output size = [ (256 - 5 + 2(0) ) / 1 ] + 1 --> 252x252
        x = self.pool(x)                         # output_size = 252 / 2 --> 126x126
        x = F.relu(self.conv2(x))                # output size = [ (126 - 5 + 2(0) ) / 1 ] + 1 --> 122x122
        x = self.pool(x)                         # output size = 122/2 --> 61x61
        x = x.view(-1, 12 * 61 * 61)             # (1, 44652)
        x = F.relu(self.fc1(x))                  # (1, 120)
        x = self.fc2(x)                          # (1, 10)
        return x

In [None]:
# Initialize model
model = CNN()
model = model.cuda()
# Initialize optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
image = process_image('test.jpg')
image = image.unsqueeze(0)      #batch dimension

In [None]:
output = model(image)
print(output.shape)

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

In [None]:
torch.save(model.state_dict(), 'model.pth.tar')
#model = CNN()
model.load_state_dict(torch.load('model.pth.tar')) #load the weights
model.eval()     #set dropout and batch normalization layers to evaluation mode before inference (testing)

In [None]:
"""Saving & Loading a General Checkpoint for Inference and/or Resuming Training"""

In [None]:
model = CNN().cuda()

In [None]:
checkpoint = {'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': 0.2}
torch.save(checkpoint, 'model.pth.tar')

In [None]:
checkpoint = torch.load('model.pth.tar')

In [None]:
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']


In [None]:
# If testing
model.eval()
# If resume training
model.train()