In [1]:
import os
import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
from PIL import Image


import xml.etree.ElementTree as ET

In [2]:
MAIN_DIR = '/kaggle/input/pascal-voc-2012/VOC2012'

In [3]:
def xml_to_csv(path = os.path.join(MAIN_DIR,'Annotations')):
    xml_list = []
    
    for xml_file in tqdm(glob.glob(os.path.join(path, '2007*.xml'))):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        
        for obj in root.findall('object'):
            bbx = obj.find('bndbox')
            xmin = int(bbx.find('xmin').text)
            ymin = int(bbx.find('ymin').text)
            xmax = int(bbx.find('xmax').text)
            ymax = int(bbx.find('ymax').text)
            label = obj.find('name').text

            # it would be better to use column name instead of index
            value = (root.find('filename').text,
                     int(root.find('size').find('depth').text), #0 , 2
                     int(root.find('size').find('width').text), #1 , 0
                     int(root.find('size').find('height').text), #2 , 1
                     label,
                     xmin,
                     ymin,
                     xmax,
                     ymax
                     )
            xml_list.append(value)
            
    column_name = ['filename', 'channels', 'width', 'height',
                   'class', 'xmin', 'ymin', 'xmax', 'ymax']
    
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    
    return xml_df

In [4]:
xml_df = xml_to_csv()

100%|██████████| 756/756 [00:10<00:00, 71.74it/s]


In [5]:
xml_df.to_csv('data_descriptor.csv', index = False)

In [6]:
xml_df

Unnamed: 0,filename,channels,width,height,class,xmin,ymin,xmax,ymax
0,2007_005144.jpg,3,332,500,person,1,12,331,500
1,2007_005989.jpg,3,500,375,motorbike,140,130,408,273
2,2007_005989.jpg,3,500,375,person,213,96,355,260
3,2007_002107.jpg,3,500,375,aeroplane,408,243,449,257
4,2007_000822.jpg,3,500,374,motorbike,98,165,230,346
...,...,...,...,...,...,...,...,...,...
1989,2007_001583.jpg,3,500,375,person,70,16,232,375
1990,2007_006400.jpg,3,500,335,train,433,80,500,256
1991,2007_006400.jpg,3,500,335,train,189,101,440,235
1992,2007_006400.jpg,3,500,335,train,40,125,199,203


In [7]:
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", 
           "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
           "train", "tvmonitor"]
num_classes = len(classes)

## Vanilla KD Training

In [8]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torch.nn.functional as F

In [9]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(MAIN_DIR,'JPEGImages',self.dataframe.iloc[idx]['filename'])
        label = self.dataframe.iloc[idx]['class']

        img = Image.open(img_path).convert('RGB')

        if self.transform:
            img = self.transform(img)

        return img, label

In [10]:
label_encoder = LabelEncoder().fit(xml_df['class'])
xml_df['class'] = label_encoder.transform(xml_df['class'])

In [11]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Assuming using ImageNet normalization
])

dataset = CustomDataset(xml_df, transform = transform)
dataloader = DataLoader(dataset, batch_size = 32, shuffle=True, num_workers=4)


In [12]:
mobilenet = models.mobilenet_v2(pretrained=True)
mobilenet.classifier[1] = torch.nn.Linear(mobilenet.classifier[1].in_features, num_classes)

class ModifiedMobileNetV2(nn.Module):
    def __init__(self, mobilenet):
        super(ModifiedMobileNetV2, self).__init__()
        self.mobilenet = mobilenet
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.mobilenet(x)
        x = self.softmax(x)
        return x

mobilenet = ModifiedMobileNetV2(mobilenet)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 57.1MB/s]


In [13]:
criterion = nn.CrossEntropyLoss()
kl_div_loss = nn.KLDivLoss()
optimizer = optim.SGD(mobilenet.parameters(), lr=0.005, momentum=0.9)

In [14]:
class ModifiedResnet(nn.Module):
    def __init__(self, resnet):
        super(ModifiedResnet, self).__init__()
        self.resnet = resnet
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.resnet(x)
        x = self.softmax(x)
        return x


In [15]:
teacher = pickle.load(open('/kaggle/input/deeplearning-data/teacher.pkl','rb'))

In [16]:
def train_model(student_model, teacher_model, criterion, optimizer, num_epochs = 10):
    for epoch in range(num_epochs):
        student_model.train()
        total_loss = 0.0

        for inputs, labels in dataloader:

            optimizer.zero_grad()
            with torch.no_grad():
                teacher_outputs = teacher_model(inputs)
                
            student_outputs = student_model(inputs)
            
            kd_loss = kl_div_loss(student_outputs, teacher_outputs)
            ce_loss = criterion(student_outputs, labels)
            
            loss = kd_loss + ce_loss

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(dataloader.dataset):.4f}")
        with open(f'mobilenet_KD{epoch+1}.pkl', 'wb') as file:
            pickle.dump(mobilenet, file)
train_model(mobilenet, teacher, criterion, optimizer, num_epochs = 20)

  self.pid = os.fork()
  self.pid = os.fork()


Epoch 1/20, Loss: 0.0869
Epoch 2/20, Loss: 0.0849
Epoch 3/20, Loss: 0.0850
Epoch 4/20, Loss: 0.0848
Epoch 5/20, Loss: 0.0843
Epoch 6/20, Loss: 0.0833
Epoch 7/20, Loss: 0.0830
Epoch 8/20, Loss: 0.0827
Epoch 9/20, Loss: 0.0819
Epoch 10/20, Loss: 0.0815
Epoch 11/20, Loss: 0.0811
Epoch 12/20, Loss: 0.0804
Epoch 13/20, Loss: 0.0795
Epoch 14/20, Loss: 0.0790
Epoch 15/20, Loss: 0.0784
Epoch 16/20, Loss: 0.0778
Epoch 17/20, Loss: 0.0774
Epoch 18/20, Loss: 0.0771
Epoch 19/20, Loss: 0.0770
Epoch 20/20, Loss: 0.0769


In [17]:
with open('mobilenet_KD.pkl', 'wb') as file:
    pickle.dump(mobilenet, file)