In [1]:
import numpy as np 
import pandas as pd # for reading csv file

#train and test images are .7z archives, we need to unpack them
!pip install py7zr

from py7zr import unpack_7zarchive
import shutil

#Before using, we need to register unpack format
shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)

#unpack train images in /kaggle/working or /kaggle/temp
shutil.unpack_archive('/kaggle/input/cifar-10/train.7z', '/kaggle/temp/')

Collecting py7zr
  Downloading py7zr-0.20.8-py3-none-any.whl.metadata (16 kB)
Collecting pycryptodomex>=3.16.0 (from py7zr)
  Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting pyzstd>=0.15.9 (from py7zr)
  Downloading pyzstd-0.15.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting pyppmd<1.2.0,>=1.1.0 (from py7zr)
  Downloading pyppmd-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)
Collecting pybcj<1.1.0,>=1.0.0 (from py7zr)
  Downloading pybcj-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting multivolumefile>=0.2.3 (from py7zr)
  Downloading multivolumefile-0.2.3-py3-none-any.whl (17 kB)
Collecting inflate64<1.1.0,>=1.0.0 (from py7zr)
  Downloading inflate64-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting brotli>=1.1.0 (from py7zr)
  Downloading Brotli-1.1.

In [2]:
import torch
if torch.cuda.is_available():
    device=torch.device(type="cuda", index=0)
else:
    device=torch.device(type="cpu", index=0)            

In [3]:
#first fetching the class names from trainLabels.csv

train_labels=pd.read_csv("/kaggle/input/cifar-10/trainLabels.csv", header='infer')

#unique labels
classes=train_labels['label'].unique()

#confirming
print(classes)

#classnames to classids
name2num={}
i=0
for name in classes:
    name2num[name]=i
    i=i+1
print(name2num)
num2name={}
for i in range(len(classes)):
    num2name[i]=classes[i]
print(num2name)

['frog' 'truck' 'deer' 'automobile' 'bird' 'horse' 'ship' 'cat' 'dog'
 'airplane']
{'frog': 0, 'truck': 1, 'deer': 2, 'automobile': 3, 'bird': 4, 'horse': 5, 'ship': 6, 'cat': 7, 'dog': 8, 'airplane': 9}
{0: 'frog', 1: 'truck', 2: 'deer', 3: 'automobile', 4: 'bird', 5: 'horse', 6: 'ship', 7: 'cat', 8: 'dog', 9: 'airplane'}


In [4]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
from torchvision.io import read_image
from torchvision.transforms import ToTensor, Normalize, Resize, Compose

class TrainDataset(Dataset):
    def __init__(self, imgpath, labelpath):
        super().__init__()
        self.imgpath=imgpath
        self.labelpath=labelpath
        self.labels=pd.read_csv(labelpath, header='infer')
        self.transform=Compose([Resize((224,224), antialias=True), Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
        
    def __len__(self):
        return self.labels.shape[0]
    
    def __getitem__(self,idx):
        finalpath=os.path.join(self.imgpath,str(idx+1))+'.png'
        img=read_image(finalpath)/255
        img=self.transform(img)
        label=self.labels.iloc[idx,1]
        label=name2num[label]
        return img,label

traindataset=TrainDataset('/kaggle/temp/train','/kaggle/input/cifar-10/trainLabels.csv')        
        
batch_size=64    
traindataloader=DataLoader(dataset=traindataset, batch_size=batch_size)

In [2]:
import torch.nn as nn
from torchvision.models import resnet101, ResNet101_Weights
class Cifar10Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.pretrainednet=ResNet101(weights=ResNet101_Weights.DEFAULT)
        self.pretrainednet.classifier=nn.Sequential(
            nn.Linear(in_features=960, out_features=1280, 
                   bias=True),nn.Hardswish(), 
            nn.Dropout(p=0.2, inplace=True), 
            nn.Linear(in_features=1280, out_features=10, 
                      bias=True)
        )
        
    def forward(self,x):
        x=self.pretrainednet(x)
        return x

In [5]:
def train_one_epoch(dataloader, model,loss_fn, optimizer):
    model.train()
    track_loss=0
    num_correct=0
    num_param=0
    
    for i, (imgs, labels) in enumerate(dataloader):
        imgs=imgs.to(device)
        labels=labels.to(device)
        pred=model(imgs)
                    
        loss=loss_fn(pred,labels)
        track_loss+=loss.item()
        num_correct+=(torch.argmax(pred,dim=1)==labels).type(torch.float).sum().item()
        
        running_loss=round(track_loss/(i+(imgs.shape[0]/batch_size)),2)
        running_acc=round((num_correct/((i*batch_size+imgs.shape[0])))*100,2)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i%100==0:
            print("Batch:", i+1, "/",len(dataloader), "Running Loss:",running_loss, "Running Accuracy:",running_acc)
            
    epoch_loss=running_loss
    epoch_acc=running_acc
    return epoch_loss, epoch_acc

In [8]:
model=Cifar10Net()
model=model.to(device)

for param in model.pretrainednet.features.parameters():
    param.requires_grad=False

loss_fn=nn.CrossEntropyLoss()
lr=0.001
#optimizer=torch.optim.SGD(params=model.parameters(), lr=lr)
optimizer=torch.optim.Adam(params=model.parameters(), lr=lr)
n_epochs=10

for i in range(n_epochs):
    print("Epoch No:",i+1)
    train_epoch_loss, train_epoch_acc=train_one_epoch(traindataloader,model,loss_fn,optimizer)
    print("Training:", "Epoch Loss:", train_epoch_loss, "Epoch Accuracy:", train_epoch_acc)
    print("--------------------------------------------------")

for param in model.pretrainednet.features.parameters():
    param.requires_grad=True

for i in range(n_epochs):
    print("Epoch No:",i+1)
    train_epoch_loss, train_epoch_acc=train_one_epoch(traindataloader,model,loss_fn,optimizer)
    print("Training:", "Epoch Loss:", train_epoch_loss, "Epoch Accuracy:", train_epoch_acc)
    print("--------------------------------------------------")

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-5c1a4163.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-5c1a4163.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 67.1MB/s]


Epoch No: 1
Batch: 1 / 782 Running Loss: 2.3 Running Accuracy: 14.06
Batch: 101 / 782 Running Loss: 1.06 Running Accuracy: 63.81
Batch: 201 / 782 Running Loss: 0.92 Running Accuracy: 67.96
Batch: 301 / 782 Running Loss: 0.87 Running Accuracy: 69.68
Batch: 401 / 782 Running Loss: 0.84 Running Accuracy: 70.96
Batch: 501 / 782 Running Loss: 0.81 Running Accuracy: 71.55
Batch: 601 / 782 Running Loss: 0.8 Running Accuracy: 72.02
Batch: 701 / 782 Running Loss: 0.79 Running Accuracy: 72.45
Training: Epoch Loss: 0.78 Epoch Accuracy: 72.84
--------------------------------------------------
Epoch No: 2
Batch: 1 / 782 Running Loss: 0.62 Running Accuracy: 78.12
Batch: 101 / 782 Running Loss: 0.67 Running Accuracy: 76.5
Batch: 201 / 782 Running Loss: 0.64 Running Accuracy: 77.07
Batch: 301 / 782 Running Loss: 0.64 Running Accuracy: 77.35
Batch: 401 / 782 Running Loss: 0.64 Running Accuracy: 77.5
Batch: 501 / 782 Running Loss: 0.64 Running Accuracy: 77.55
Batch: 601 / 782 Running Loss: 0.64 Running 

In [6]:
import torch.nn as nn
from torchvision.models.alexnet import AlexNet,AlexNet_Weights

class Cifar10Net_using_AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.pretrainednet=AlexNet()
        self.pretrainednet.classifier=nn.Sequential(
            nn.Linear(in_features=9216, out_features=4096, 
                   bias=True),
            nn.Hardswish(), 
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(in_features = 4096, out_features = 1280),
            nn.ReLU(),
            nn.Linear(in_features=1280, out_features=10, 
                      bias=True)
        )
        
    def forward(self,x):
        x=self.pretrainednet(x)
        return x

In [10]:
# import torch.nn as nn
# from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights
# class Cifar10Net(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.pretrainednet=mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.DEFAULT)
#         self.pretrainednet.classifier=nn.Sequential(
#             nn.Linear(in_features=960, out_features=1280, 
#                    bias=True),nn.Hardswish(), 
#             nn.Dropout(p=0.2, inplace=True), 
#             nn.Linear(in_features=1280, out_features=10, 
#                       bias=True)
#         )
        
#     def forward(self,x):
#         x=self.pretrainednet(x)
#         return x

In [8]:
model=Cifar10Net_using_AlexNet()
model=model.to(device)

for param in model.pretrainednet.features.parameters():
    param.requires_grad=False

loss_fn=nn.CrossEntropyLoss()
lr=0.001
#optimizer=torch.optim.SGD(params=model.parameters(), lr=lr)
optimizer=torch.optim.Adam(params=model.parameters(), lr=lr)
n_epochs=10
full_epochs = 15
for i in range(n_epochs):
    print("Epoch No:",i+1)
    train_epoch_loss, train_epoch_acc=train_one_epoch(traindataloader,model,loss_fn,optimizer)
    print("Training:", "Epoch Loss:", train_epoch_loss, "Epoch Accuracy:", train_epoch_acc)
    print("--------------------------------------------------")

for param in model.pretrainednet.features.parameters():
    param.requires_grad=True

for i in range(full_epochs):
    print("Epoch No:",i+1)
    train_epoch_loss, train_epoch_acc=train_one_epoch(traindataloader,model,loss_fn,optimizer)
    print("Training:", "Epoch Loss:", train_epoch_loss, "Epoch Accuracy:", train_epoch_acc)
    print("--------------------------------------------------")

Epoch No: 1
Batch: 1 / 782 Running Loss: 2.3 Running Accuracy: 3.12
Batch: 101 / 782 Running Loss: 2.0 Running Accuracy: 25.08
Batch: 201 / 782 Running Loss: 1.87 Running Accuracy: 30.26
Batch: 301 / 782 Running Loss: 1.8 Running Accuracy: 33.22
Batch: 401 / 782 Running Loss: 1.74 Running Accuracy: 35.23
Batch: 501 / 782 Running Loss: 1.7 Running Accuracy: 36.83
Batch: 601 / 782 Running Loss: 1.67 Running Accuracy: 38.13
Batch: 701 / 782 Running Loss: 1.64 Running Accuracy: 39.06
Training: Epoch Loss: 1.63 Epoch Accuracy: 39.83
--------------------------------------------------
Epoch No: 2
Batch: 1 / 782 Running Loss: 1.64 Running Accuracy: 43.75
Batch: 101 / 782 Running Loss: 1.44 Running Accuracy: 46.32
Batch: 201 / 782 Running Loss: 1.44 Running Accuracy: 47.15
Batch: 301 / 782 Running Loss: 1.43 Running Accuracy: 47.5
Batch: 401 / 782 Running Loss: 1.41 Running Accuracy: 47.95
Batch: 501 / 782 Running Loss: 1.4 Running Accuracy: 48.44
Batch: 601 / 782 Running Loss: 1.4 Running Accu

In [9]:
#unpacking test images, there are 3 lacs images. This will take some time
shutil.unpack_archive('/kaggle/input/cifar-10/test.7z', '/kaggle/temp/')

#unregister unpack format, we are done with it
shutil.unregister_unpack_format('7zip')#, ['.7z'], unpack_7zarchive)

In [10]:
class TestDataset(Dataset):
    def __init__(self, imgpath):
        super().__init__()
        self.imgpath=imgpath
        _,_,self.files=next(os.walk(self.imgpath))
        self.length=len(self.files)
        self.transform=Compose([Resize((224,224), antialias=True), Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])        
    
    def __len__(self):
        return self.length
    
    def __getitem__(self,idx):
        finalpath=os.path.join(self.imgpath,str(idx+1))+'.png'
        img=read_image(finalpath)/255.0
        img=self.transform(img)
        return img

testdataset=TestDataset('/kaggle/temp/test/')
testdataloader=DataLoader(dataset=testdataset, batch_size=batch_size)

In [11]:
def eval(dataloader, model,loss_fn, path):
    model.eval()
    data=pd.read_csv(path)
    with torch.no_grad():
        for i, imgs in enumerate(dataloader):
            finalbatchpred=np.zeros(imgs.shape[0],dtype='object')
            imgs=imgs.to(device)
            pred=model(imgs)
            
            pred=torch.argmax(pred,dim=1).type(torch.int).cpu()
            for j,p in enumerate(pred):
                finalbatchpred[j]=num2name[p.item()]
            data.iloc[i*batch_size:i*batch_size+batch_size ,1]=finalbatchpred
    
    data.to_csv('submission.csv', index=False)
    data.head()

In [12]:
eval(testdataloader, model,loss_fn, '/kaggle/input/cifar-10/sampleSubmission.csv')

In [13]:
data_gen = pd.read_csv('/kaggle/working/submission.csv')

In [14]:
print(data_gen)

            id       label
0            1        bird
1            2    airplane
2            3       truck
3            4        ship
4            5    airplane
...        ...         ...
299995  299996  automobile
299996  299997        bird
299997  299998        deer
299998  299999         dog
299999  300000  automobile

[300000 rows x 2 columns]


If you liked the notebook, please upvote it.