In [1]:
!mkdir -p ~/.content/competitiondata


In [2]:
from google.colab import files
files.upload()  # Use this to upload your kaggle.json file

# Make sure kaggle.json is in the location ~/.kaggle/kaggle.json
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c deep-learning-mini-project-spring-24-nyu
!unzip deep-learning-mini-project-spring-24-nyu.zip -d competitiondata

Saving kaggle.json to kaggle.json
Downloading deep-learning-mini-project-spring-24-nyu.zip to /content
 92% 173M/189M [00:02<00:00, 86.9MB/s]
100% 189M/189M [00:02<00:00, 85.8MB/s]
Archive:  deep-learning-mini-project-spring-24-nyu.zip
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/batches.meta  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/data_batch_1  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/data_batch_2  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/data_batch_3  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/data_batch_4  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/data_batch_5  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/readme.html  
  inflating: competitiondata/cifar-10-python/cifar-10-batches-py/test_batch  
  inflating: competitiondata/cifar_test_nolabels.pkl  


In [3]:
# imports
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None, padding=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, bias=False) # trying same padding
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.gelu = nn.GELU() # adding in a gelu layer for testing
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=padding, bias=False) # valid padding trying
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        identity = nn.AdaptiveAvgPool2d(out.size()[2])(identity)
        out += identity
        out = self.gelu(out) # trying out a gelu for one of the blocks

        return out

class ModifiedResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ModifiedResNet, self).__init__()
        self.in_channels = 16 # number of output channels
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.dropout = nn.Dropout(0.432); # dropout layer probability
        self.dropoutlow = nn.Dropout(0.287) # lower dropout prob
        self.bn1 = nn.BatchNorm2d(16)
        self.maxpool1 = nn.AdaptiveMaxPool2d(24)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 32, layers[0])
        self.layer2 = self._make_layer(block, 64, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 115, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 256, layers[3], stride=2)
        # Adjust the layer configuration to stay under 5 million parameters
        self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
        self.fc = nn.Linear(256 * 3 * 3, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.layer1(x)
        x = self.dropoutlow(x)
        x = self.layer2(x)
        x = self.maxpool1(x)
        x = self.layer3(x)
        x = self.dropoutlow(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


model = ModifiedResNet(ResidualBlock, [3, 4, 11, 2]).to('cuda')

# Use torchsummary for a detailed summary and parameter count
from torchsummary import summary
summary(model, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
           Dropout-4           [-1, 16, 32, 32]               0
            Conv2d-5           [-1, 32, 32, 32]           4,608
       BatchNorm2d-6           [-1, 32, 32, 32]              64
              ReLU-7           [-1, 32, 32, 32]               0
            Conv2d-8           [-1, 32, 32, 32]           9,216
       BatchNorm2d-9           [-1, 32, 32, 32]              64
           Conv2d-10           [-1, 32, 32, 32]             512
      BatchNorm2d-11           [-1, 32, 32, 32]              64
             GELU-12           [-1, 32, 32, 32]               0
    ResidualBlock-13           [-1, 32, 32, 32]               0
           Conv2d-14           [-1, 32,

In [5]:
#data loading
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
#testdata = unpickle('/content/competitiondata/cifar_test_nolabels.pkl')
# Preprocess and load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Use the function to load the data

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

#testset = torchvision.datasets.CIFAR10(root='./data', train=False,
          #                             download=True, transform=None)
#testloader = DataLoader(testdata, batch_size=128, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43342713.04it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [31]:
#defining model, optimizer, regularization, hyperparameters

# hyperparameters
epochs = 200
lr = 1e-4
grad_accumulation = 3
model_save = 5
# use modified ResNet model
GPU = True

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.1)
scaler = GradScaler(enabled=GPU)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

checkpoint = torch.load('model.pth')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [32]:
# training model
import time

# train model
for epoch in range(epochs):
    start_time = time.time()
    model.train()
    running_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].cuda(), data[1].cuda()

        # smap for speeding up training
        with autocast(enabled=GPU):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        # grad accumulation
        if (i + 1) % grad_accumulation == 0:
          scaler.step(optimizer)
          scaler.update()
          optimizer.zero_grad()

        running_loss += loss.item()
        if i % 200 == 0:    # print loss after 200 iters
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

    scheduler.step()

    if epoch % model_save == 0:
        print("just saved model incase something goes wrong")
        model.cpu()  # Move model to CPU
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            'scheduler': scheduler.state_dict(),
            'scaler': scaler.state_dict(),
            }, "model.pth")
        model.cuda()  # Move model back to GPU if further training is needed


    end_time = (time.time() - start_time ) // 1
    print(f"finished epoch {epoch + 1} in {end_time} seconds" )

print('Finished Training')
print("just saved final model incase something goes wrong")
model.cpu()  # Move model to CPU
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            'scheduler': scheduler.state_dict(),
            'scaler': scaler.state_dict(),
            }, "model.pth")
model.cuda()  # Move model back to GPU if further training is needed

[1, 1] loss: 0.001
[1, 201] loss: 0.206
just saved model incase something goes wrong
finished epoch 1 in 47.0 seconds
[2, 1] loss: 0.001
[2, 201] loss: 0.208
finished epoch 2 in 47.0 seconds
[3, 1] loss: 0.001
[3, 201] loss: 0.205
finished epoch 3 in 46.0 seconds
[4, 1] loss: 0.001
[4, 201] loss: 0.209
finished epoch 4 in 46.0 seconds
[5, 1] loss: 0.001
[5, 201] loss: 0.208
finished epoch 5 in 46.0 seconds
[6, 1] loss: 0.001
[6, 201] loss: 0.208
just saved model incase something goes wrong
finished epoch 6 in 47.0 seconds
[7, 1] loss: 0.001
[7, 201] loss: 0.208
finished epoch 7 in 46.0 seconds
[8, 1] loss: 0.001
[8, 201] loss: 0.206
finished epoch 8 in 49.0 seconds
[9, 1] loss: 0.001
[9, 201] loss: 0.206
finished epoch 9 in 46.0 seconds
[10, 1] loss: 0.001
[10, 201] loss: 0.203
finished epoch 10 in 46.0 seconds
[11, 1] loss: 0.001
[11, 201] loss: 0.210


KeyboardInterrupt: 

In [20]:
# testing data code adopted from https://github.com/hzhao20/DLMiniproject/blob/main/GenerateCSV.py.
"""

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1Y7hJP7qXUYC-K7YD40kiqYj4Hz-TRluh
"""

import torch
import numpy as np
import pandas as pd
import pickle
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# unpickle function
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# load
test_data_dict = unpickle('/content/competitiondata/cifar_test_nolabels.pkl')
test_images = test_data_dict[b'data']
test_ids = test_data_dict[b'ids']

# transform
test_images = test_images.reshape(len(test_images), 3, 32, 32).transpose(0, 2, 3, 1)  # 从CHW转换为HWC

# preprocess
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 根据模型训练时的配置调整
])

test_images = torch.stack([transform(img) for img in test_images])

# data loader
test_loader = DataLoader(TensorDataset(test_images, torch.tensor(test_ids)), batch_size=64, shuffle=False)

# load model
model = model.to(device) # prolly on cuda
model.eval()

# predict
predicted_labels = []
with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        predicted_labels.extend(preds.cpu().numpy())

# generate CSV
submission_df = pd.DataFrame({
    'ID': test_ids,
    'Labels': predicted_labels
})

predicted_labels = np.array(predicted_labels)

submission_df.to_csv('submission.csv', index=False)

Using device: cuda
