In [1]:
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image

In [2]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel,stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size = 3, stride = stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size = 3, stride = 1, padding=1)  #we change the size only once
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample
    def forward(self,x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:#to be used when input size does not match output size
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return(out)

class ResNet(nn.Module):
    def __init__(self, block, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channel = 16
        self.conv1 = nn.Conv2d(3,16, stride =1, kernel_size = 3, padding = 1)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU()
        self.block1 = self.make_layer(block, 16, 1)
        self.block2 = self.make_layer(block, 16, 1)
        self.block3 = self.make_layer(block, 32, 2)
        self.block4 = self.make_layer(block, 32, 1)
        self.block5 = self.make_layer(block, 64, 2)
        self.block6 = self.make_layer(block, 64, 1)
        self.avg_pool = nn.AvgPool2d(8) #8 is the kernel size so it is taking average of 8x8
        self.fc = nn.Linear(64, num_classes)
    def make_layer(self, block, out_channel, stride=1):
        downsample = None
        if(stride!=1) or (self.in_channel != out_channel):#input size not equal to output size only when stride not 1 or input channel and output channel are not same 
            downsample = nn.Sequential(
            nn.Conv2d(self.in_channel, out_channel, kernel_size = 3, stride = stride, padding = 1),
            nn.BatchNorm2d(out_channel))
        out_layer = block(self.in_channel, out_channel, stride, downsample)
        self.in_channel = out_channel
        return(out_layer)
    def forward(self,x):
        out = self.conv1(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.block4(out)
        out = self.block5(out)
        out = self.block6(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
model = ResNet(ResidualBlock)
i=0
# this is very important. To freeze the weights we use iteration and freeze accordingly.
# Here I have freezed all layers except the last fc.
#try to play with this by yourself. 
for c in model.children():
    print("The child is -",c)
    print("The value of child number is ",i)
    i+=1
    if(i<=9):
        for param in c.parameters():
            param.reqires_grad = False
    
print(model.fc)
model.fc = nn.Linear(model.fc.in_features, 5)
print(model.fc)


The child is - Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
The value of child number is  0
The child is - BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
The value of child number is  1
The child is - ReLU()
The value of child number is  2
The child is - ResidualBlock(
  (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
The value of child number is  3
The child is - ResidualBlock(
  (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (b

In [33]:
# the below is to download a pretrained model and then playing with that
resnet = torchvision.models.resnet18(pretrained=True)
print(resnet.fc)
for param in resnet.parameters():
    param.requires_grad = False

images = torch.randn(64,3,224,224)
output = resnet(images)
print(output.shape)
resnet.fc = nn.Linear(resnet.fc.in_features, 100)
# print(resnet.fc)
images = torch.randn(64,3,224,224)
output = resnet(images)
print(output.shape)



Linear(in_features=512, out_features=1000, bias=True)
torch.Size([64, 1000])
torch.Size([64, 100])


In [8]:
#It's a good practice to first convert your data into the numpy file and then load. It makes computation faster as well as writing code for that is realtively easy. 
#For fun, check the dataLoader of COCO for image captioning. 
class CustomDatasetFromCSV(torch.utils.data.Dataset):
    def __init__(self, csv_path, height, width):#this is used to initialize the datapath and define data transform
        """
        Args:
            csv_path (string): path to csv file
            height (int): image height
            width (int): image width
            transform: pytorch transforms for transforms and tensor conversion
        """
        self.data = pd.read_csv(csv_path)
        self.labels = np.asarray(self.data.iloc[:, 0])
        self.height = height
        self.width = width
    
    def transform(self): # this 
        return (transforms.Compose([
            transforms.Pad(4),
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32),
            transforms.ToTensor()]))
    
    def __getitem__(self, index):#this is used to get single image. As this will be iteratively used.
        single_image_label = self.labels[index]
        # Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28]) 
        img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
        # Convert image from numpy array to PIL image, mode 'L' is for grayscale
        img_as_img = Image.fromarray(img_as_np)
        img_as_img = img_as_img.convert('L')
        # Transform image to tensor
        img_as_tensor = self.transform(img_as_img)
        # Return image and the label
        return (img_as_tensor, single_image_label)

    def __len__(self):
        return len(self.data.index)
    
#another approach is to define the transforms below 
class CustomDatasetFromCSV(Dataset):
    def __init__(self, csv_path, height, width, transforms=None):
        """
        Args:
            csv_path (string): path to csv file
            height (int): image height
            width (int): image width
            transform: pytorch transforms for transforms and tensor conversion
        """
        self.data = pd.read_csv(csv_path)
        self.labels = np.asarray(self.data.iloc[:, 0])
        self.height = height
        self.width = width
        self.transforms = transform

    def __getitem__(self, index):
        single_image_label = self.labels[index]
        # Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28]) 
        img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
	# Convert image from numpy array to PIL image, mode 'L' is for grayscale
        img_as_img = Image.fromarray(img_as_np)
        img_as_img = img_as_img.convert('L')
        # Transform image to tensor
        if self.transforms is not None:
            img_as_tensor = self.transforms(img_as_img)
        # Return image and the label
        return (img_as_tensor, single_image_label)

    def __len__(self):
        return len(self.data.index)

#Above was the case when you have a normal classification problem, when you have other kind of problem like object detection of so 
#then you can make a csv file which has annotations and filename(with path) to parse and load the data.  


# transformations = transforms.Compose([transforms.ToTensor()])
# custom_mnist_from_csv = CustomDatasetFromCSV('../data/mnist_in_csv.csv', 28, 28, transformations)