In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchvision import transforms 
from torch import nn
from torchvision.models import resnet50
from PIL import Image

# Construct class to store testing data, easy to load the index of images
class image_label():
    def __init__(self, img=None,number=None,label=None):
        self.img = img
        self.number=number
        self.label=label
        
# Load testing data, store in a list 'test_img_set'
test_img_set=[]
error=0
for i in range(2988):
    try:
        img_PIL = Image.open("testing\\"+str(i)+".jpg")
        img=image_label(img_PIL,i)
        test_img_set.append(img)
        error=error+1
    except Exception as e:
        pass
    continue

# Transformation for training data
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Transformation for testing data
transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.5,], [0.5,])
])

# Load training data and labels
train_dataset = ImageFolder("training", transform_train)
train_loader = DataLoader(train_dataset, batch_size=60, shuffle=True)
labelset = train_dataset.classes

# Load ResNet50 model and modify last full-connected layer
model = resnet50(pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048, 256),                       # full-connected layer first output 256 features
    nn.ReLU(),                                  # activation function to cut negative number
    nn.Dropout(0.4),                            # random dropout units to reduce over-fitting
    nn.Linear(256, len(train_dataset.classes))  # finally output required class number
)

# Freeze k layers parameter grad
count = 0
for k in model.children():
    count += 1
    if (count < 9):
        print(k)
        for param in k.parameters():
            param.requires_grad = False
            
# Use GPU to speed up
device = "cuda:0" if torch.cuda.is_available() else "cpu" 
model.to(device)

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
ReLU(inplace=True)
MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, t

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [3]:
# Ensure correctly load dataset
print(error)
print(len(test_img_set))
print(labelset)
print(len(train_dataset))

2985
2985
['Coast', 'Forest', 'Highway', 'Insidecity', 'Mountain', 'Office', 'OpenCountry', 'Street', 'Suburb', 'TallBuilding', 'bedroom', 'industrial', 'kitchen', 'livingroom', 'store']
1500


In [4]:
# Train ResNet50 model, with 40 epoch, Adam optimizer and cross entropy loss function
from torch import optim
import time
epoch_num = 40
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=0.001)
loss_function = nn.CrossEntropyLoss()

for epoch in range(epoch_num):
    epoch_start = time.time()
    train_loss = 0.0
    train_acc = 0.0

    model.train()
    for data in train_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()   
        _, indexArray = torch.max(outputs.data, 1)
        accArray = indexArray.eq(labels.data.view_as(indexArray))
        accNum = accArray.type(torch.FloatTensor).sum()
        train_acc += accNum.item()
    train_acc = train_acc/len(train_dataset)  
    epoch_end = time.time()
    print("Epoch: {:02d}, Training Loss: {:.4f}, Accuracy: {:.2f}%, Time: {:.3f}s".format(epoch+1, train_loss, train_acc*100, epoch_end-epoch_start))
        


Epoch: 01, Training Loss: 50.5631, Accuracy: 35.53%, Time: 9.358s
Epoch: 02, Training Loss: 27.9356, Accuracy: 68.73%, Time: 8.009s
Epoch: 03, Training Loss: 20.5092, Accuracy: 74.53%, Time: 8.301s
Epoch: 04, Training Loss: 17.8405, Accuracy: 76.20%, Time: 8.245s
Epoch: 05, Training Loss: 16.1333, Accuracy: 78.53%, Time: 8.396s
Epoch: 06, Training Loss: 13.9625, Accuracy: 81.93%, Time: 8.600s
Epoch: 07, Training Loss: 13.2180, Accuracy: 81.80%, Time: 8.747s
Epoch: 08, Training Loss: 11.4429, Accuracy: 84.67%, Time: 8.636s
Epoch: 09, Training Loss: 10.9860, Accuracy: 85.33%, Time: 8.818s
Epoch: 10, Training Loss: 12.0034, Accuracy: 83.47%, Time: 8.820s
Epoch: 11, Training Loss: 10.7275, Accuracy: 85.53%, Time: 8.794s
Epoch: 12, Training Loss: 9.9814, Accuracy: 86.07%, Time: 8.775s
Epoch: 13, Training Loss: 9.5279, Accuracy: 87.60%, Time: 8.935s
Epoch: 14, Training Loss: 9.3751, Accuracy: 87.53%, Time: 9.109s
Epoch: 15, Training Loss: 9.9228, Accuracy: 86.40%, Time: 9.174s
Epoch: 16, Tra

In [5]:
# Predict testing data in model and create run3.txt
with open('run3.txt','a') as f:
    for i in test_img_set:
        model.eval()
        img = transform_test(i.img).unsqueeze(0)
        img = img.repeat(1,3,1,1)
        img = img.to(device)
        out = model(img)
        _, index = torch.max(out.data, 1)
        text = '\n'+str(i.number)+'.jpg'+' '+labelset[index]
        f.write(text)
        