In [1]:
'''ResNet in PyTorch.

For Pre-activation ResNet, see 'preact_resnet.py'.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition
    . arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, prob_dropout=0.3):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        # mod here
        self.dropout1 = nn.Dropout2d(p=prob_dropout)
        
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        

        # mod here
        self.dropout2 = nn.Dropout2d(p=prob_dropout)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet9():
    return ResNet(BasicBlock, [1, 1, 1, 1])
    
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
    return ResNet(Bottleneck, [3, 8, 36, 3])


def test():
    net = ResNet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())

# test()



print("done")

done


In [2]:
from torchvision import transforms, datasets
import torch.utils.data
from torchvision.transforms import ToTensor
torch.manual_seed(1024)
# Define transformations
train_transform = transforms.Compose([
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), # Add color jitter
    # transforms.RandomHorizontalFlip(), #apply horizontal flipping
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(), # Randomly flip the images on the horizontal axis
    # transforms.RandomRotation(10), # Randomly rotate the images by +/- 10 degrees
    # transforms.RandomCrop(32, padding=4), # Apply random crops
    # transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    # transforms.RandomHorizontalFlip(0.5),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(), # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalize images
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Apply transformations to datasets
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

batch_size = 16
# Create DataLoader
trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# for batch in train_loader:
#     print(batch)
print("done")


Files already downloaded and verified
Files already downloaded and verified
done


In [3]:
import torch.optim.lr_scheduler as lr_scheduler
from torchsummary import summary
# from torch.optim.lr_scheduler import StepLR 



input_size = 28 # each input token is a row of a FashionMNIST image so 28 pixels
hidden_size=128 # hidden representation size
num_layers = 2  # two-layer LSTM
num_classes = 10

learning_rate= 0.01


model = ResNet9().cuda()
model.load_state_dict(torch.load('my_model_weights.pth'))
loss = torch.nn.CrossEntropyLoss() # Step 2: loss
# optimizer = torch.optim.SGD(model.parameters(), lr=.001)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


# overfitting
# optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
# scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
# summary(model, (3, 32, 32))

print("done")

done


In [23]:
num_epochs=40
num_params = sum(p.numel() for p in model.parameters())
print("Number of parameters: ", num_params)

optimizer = torch.optim.SGD(model.parameters(), lr=0.0009, momentum=0.9, weight_decay=1e-5)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)  # T_max is typically set to the number of epochs


Number of parameters:  4903242


In [None]:
total_step = len(trainDataLoader)


train_loss_history = []
test_loss_history = []
matches = 0
total = 0
print("beginning")
for epoch in range(num_epochs):
    train_loss = 0.0
    test_loss = 0.0

    model.train()
    for i, data in enumerate(trainDataLoader):
  
        images, labels = data
        images = images.cuda()
        labels = labels.cuda()
        # forward
        predicted_output = model(images) # forward propagation
        fit = loss(predicted_output, labels)  # calculate our measure of goodness
    
        # backwards
        optimizer.zero_grad() # zero out any gradient values from the previous iteration
        fit.backward() # backpropagation
        optimizer.step() # update the weights of our trainable parameters
        train_loss += fit.item()
    train_loss = train_loss / len(trainDataLoader)
    train_loss_history += [train_loss]
    print("epoch " + str(epoch) + " train loss " + str(train_loss))

    scheduler.step()
print("done")

beginning
epoch 0 train loss 0.365176789367497
epoch 1 train loss 0.3562690950524807
epoch 2 train loss 0.3420518339973688
epoch 3 train loss 0.32744943346709016
epoch 4 train loss 0.3154638339853287
epoch 5 train loss 0.3090467218917608
epoch 6 train loss 0.3025328635804355
epoch 7 train loss 0.2962124489313364
epoch 8 train loss 0.2793005823466182
epoch 9 train loss 0.2698242404061556
epoch 10 train loss 0.2629868407458067
epoch 11 train loss 0.2528243648068607
epoch 12 train loss 0.24331311888337134
epoch 13 train loss 0.23593203740358354
epoch 14 train loss 0.22670731483772397
epoch 15 train loss 0.22336709954231979
epoch 16 train loss 0.2139384103424102
epoch 17 train loss 0.20325559547297656
epoch 18 train loss 0.19667344963386654
epoch 19 train loss 0.19106642444461583
epoch 20 train loss 0.1855848950113356
epoch 21 train loss 0.1787099402911961
epoch 22 train loss 0.16860882401488722
epoch 23 train loss 0.16119375531252472
epoch 24 train loss 0.15679664021141826
epoch 25 train 

In [4]:
model.eval()
with torch.no_grad():
    correct, total = 0, 0
    for images, labels in testDataLoader:
        images = images.cuda()
        labels = labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Test accuracy: {100 * correct / total}%')

# ~10 epochs = 81.5%?
# ~13 = Test accuracy: 79.6%

Test accuracy: 89.43%


In [None]:
# LOAD PREDICTIONS
import pickle
import pandas as pd
import numpy as np
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

test_images_nl = unpickle('cifar_test_nolabels.pkl')[b'data']
test_images_nl2 = unpickle('cifar_test_nolabels.pkl')[b'data']
test_images_nl_id = unpickle('cifar_test_nolabels.pkl')[b'ids'].tolist()

test_images_nl = test_images_nl.reshape((-1, 3, 32, 32))
min_val = np.min(test_images_nl)
max_val = np.max(test_images_nl)
test_images_nl = (test_images_nl - min_val) / (max_val - min_val) #normalizing facepalm

test_images_nl2 = test_images_nl2.reshape((-1, 3, 32, 32)).astype(np.float32) / 255.0

# print(test_images_nl)
# print("rawr")
# print(test_images_nl2)

# plt.figure(figsize=(20, 4))
# for i in range(10):
#     plt.subplot(1, 10, i+1)
#     plt.imshow(test_images_nl[i])
#     plt.axis('off')
# plt.show()



test_images_tensor_nl = torch.tensor(test_images_nl, dtype=torch.float32)
testDataLoaderNL = torch.utils.data.DataLoader(test_images_tensor_nl, batch_size=1,shuffle=False)


predictions = []
model.eval()
for image in testDataLoaderNL:
    image = image.cuda()
    with torch.no_grad():
        output = model(image)
        predictions.append(output.argmax().item())
# for image in testDataLoaderNL:
#     image = image.cuda()
#     with torch.no_grad():
#         output = model(image)

#     # _, predicted_class = torch.max(output, 1)
#     # predictions.append(predicted_class.item())

#     # Preprocess the image if necessary
#     # Perform inference
#     # Here we assume 'resnet' is already loaded and 'image' is preprocessed
#     output = model(image)
#     # Perform post-processing if necessary
#     # Append the prediction to the predictions list
#     predictions.append(output.argmax().item())

print(len(predictions))
# # print(test_images_nl_id)


In [None]:
# CHECK ACC
def calculate_accuracy(predictions, actual):
    if len(predictions) != len(actual):
        raise ValueError("Length of predictions and actual arrays must be the same.")
    
    num_matches = sum(1 for pred, act in zip(predictions, actual) if pred == act)
    accuracy = (num_matches / len(predictions)) * 100
    return accuracy


actual = []
pattern = [8, 2, 9, 0, 4, 3, 6, 1, 7, 5]
for num in pattern:
    actual.extend([num] * 1000)
calculate_accuracy(predictions, actual)

In [26]:
torch.save(model.state_dict(), 'betterish_my_model_weights.pth')

In [8]:
lr_best = 32
def write_lr_best_to_file(lr_best):
    with open("lr_best.txt", "w") as file:
        file.write(str(lr_best))
write_lr_best_to_file(lr_best)



In [17]:
# Create a DataFrame with IDs and labels
df = pd.DataFrame({
    'ID': test_images_nl_id,
    'Labels': predictions
})

# Save the DataFrame to a CSV file
df.to_csv('predictions.csv', index=False)

print("done")

done


In [23]:
correct = 0
total = 0
model.eval()
for i, data in  enumerate(test_loader):
  images, labels = data
  images = images.cuda()
  labels = labels.cuda()
  with torch.no_grad():
    predicted_output = model(images)
    _, predicted = torch.max(predicted_output.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

accuracy = correct / total
print('Accuracy of the network on the test images: %d %%' % (100 * accuracy))

Accuracy of the network on the test images: 84 %


In [24]:
import pickle
import pandas as pd
import numpy as np
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

test_images_nl = unpickle('cifar_test_nolabels.pkl')[b'data']
test_images_nl_id = unpickle('cifar_test_nolabels.pkl')[b'ids'].tolist()

test_images_nl = test_images_nl.reshape((-1, 3, 32, 32)).astype(np.float32) / 255.0

test_images_tensor_nl = torch.tensor(test_images_nl, dtype=torch.float32)

testDataLoaderNL = torch.utils.data.DataLoader(test_images_tensor_nl, batch_size=1,shuffle=True)


predictions = []
for image in testDataLoaderNL:
    image = image.cuda()
    # Preprocess the image if necessary
    # Perform inference
    # Here we assume 'resnet' is already loaded and 'image' is preprocessed
    output = model(image)
    # Perform post-processing if necessary
    # Append the prediction to the predictions list
    predictions.append(output.argmax().item())

# print(len(predictions))
# print(test_images_nl_id)

# Create a DataFrame with IDs and labels
df = pd.DataFrame({
    'ID': test_images_nl_id,
    'Labels': predictions
})

# Save the DataFrame to a CSV file
df.to_csv('predictions.csv', index=False)

