This notebook will preprocess all of the data

In [18]:
# imports

import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from PIL import Image, ImageFilter
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet34, ResNet34_Weights
import torchvision.transforms as transforms


In [19]:
# create label dict
label_dic = {'N': 0, 'O': 1, 'R': 2}
path = 'DATASET/'

# index 0-2 is train, 3-5 is test
data = []
paths = ["DATASET/TRAIN/N", "DATASET/TRAIN/O", "DATASET/TRAIN/R", "DATASET/TEST/N", "DATASET/TEST/O", "DATASET/TEST/R"]

for i, path in enumerate(paths):
    print(path)
    data.append(os.listdir(path))
    print(data[i][:5])
    print(len(data[i]))

# print(data[0][:10])

DATASET/TRAIN/N
['R_1.jpg', 'R_10.jpg', 'R_100.jpg', 'R_1000.jpg', 'R_1001.jpg']
2847
DATASET/TRAIN/O
['O_1.jpg', 'O_10.jpg', 'O_100.jpg', 'O_1000.jpg', 'O_10000.jpg']
12565
DATASET/TRAIN/R
['R_2848.jpg', 'R_2849.jpg', 'R_2850.jpg', 'R_2851.jpg', 'R_2852.jpg']
7152
DATASET/TEST/N
['00000000.jpg', '00000001.jpg', '00000002.jpg', '00000003.jpg', '00000004.jpg']
397
DATASET/TEST/O
['O_12568.jpg', 'O_12569.jpg', 'O_12570.jpg', 'O_12571.jpg', 'O_12572.jpg']
1401
DATASET/TEST/R
['R_10000.jpg', 'R_10001.jpg', 'R_10002.jpg', 'R_10003.jpg', 'R_10004.jpg']
1112


In [21]:
# preprocess each image into a matrix of 500 x 500  & run edge detection

def add_margin(pil_img, top, right, bottom, left, color):
    width, height = pil_img.size
    new_width = width + right + left
    new_height = height + top + bottom
    result = Image.new(pil_img.mode, (new_width, new_height), color)
    result.paste(pil_img, (left, top))
    return result

for i, category in enumerate(data):
    print(category[0])
    for j, image in enumerate(category):
        img = Image.open(paths[i] + "/" + image).convert('RGB')
        width, height = img.size
        img = add_margin(img, 0, 500 - width, 500 - height, 0, (255, 255, 255))
        # print(img.mode)
        # Calculating Edges using the passed laplican Kernel
        final = img.filter(ImageFilter.Kernel((3, 3), (-1, -1, -1, -1, 8,
                                                -1, -1, -1, -1), 1, 0))
        # final.convert(mode="RGB")
        # print("" + final.mode)
        # img.show()
        # final.show()     
        # if(j == 3): break

        category[j] = transforms.Compose([transforms.PILToTensor()])(final)

print("Done with image sizing")

R_1.jpg
O_1.jpg
R_2848.jpg
00000000.jpg
O_12568.jpg
R_10000.jpg
Done with image sizing


In [22]:
# sload data into a single list with labels
train_data = []
train_labels = []

test_data = []
test_labels = []

for i, images in enumerate(data):
    if(i <= 2): 
        train_data.extend(images)
        if(i == 0):
            train_labels.extend([label_dic['N']] * len(images))
            print(train_labels[-1])
        elif(i == 1):
            train_labels.extend([label_dic['O']] * len(images))
            print(train_labels[-1])
        elif(i == 2):
            train_labels.extend([label_dic['R']] * len(images))
            print(train_labels[-1])
    elif(i <= 5):
        test_data.extend(images)
        if(i == 3):
            test_labels.extend([label_dic['N']] * len(images))
            print(test_labels[-1])
        elif(i == 4):
            test_labels.extend([label_dic['O']] * len(images))
            print(test_labels[-1])
        elif(i == 5):
            test_labels.extend([label_dic['R']] * len(images))
            print(test_labels[-1])


# print(train_data)
# print(train_labels)



0
1
2
0
1
2


In [23]:
print(train_data[0].mode)

<built-in method mode of Tensor object at 0x0000025568AED360>


In [24]:
# convert the images to numpy vectors
X_train = torch.stack(train_data)
X_test = torch.stack(test_data)

# for i, image in enumerate(train_data):
#     train_data[i] = transforms.Compose([transforms.PILToTensor()])(image)

# for i, image in enumerate(test_data):
#     test_data[i] = transforms.Compose([transforms.PILToTensor()])(image)


# for i, image in enumerate(test_data):
#     test_data[i] = np.array(image) #.reshape((500**2, 1))

# print(np.array(train_data[0]).reshape((500**2, 1)))


In [25]:
print(X_train.shape)

torch.Size([22564, 3, 500, 500])


In [26]:
# hyperparameters
batch_size = 64
lr = 0.01
epoch = 2
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [27]:
# dataloader
train_dataset = TensorDataset(X_train, torch.from_numpy(np.array(train_labels)))
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size = batch_size, drop_last=True)

print(train_dataloader)

<torch.utils.data.dataloader.DataLoader object at 0x00000250122CAA00>


In [28]:
images, labels = next(iter(train_dataloader))

print(images.shape)

torch.Size([64, 3, 500, 500])


In [29]:
# model creation (using pre-trained resnet34)
model = resnet34(weights = ResNet34_Weights.DEFAULT)
for param in model.parameters():
	param.requires_grad = False
modelOutputFeats = model.fc.in_features
model.fc = nn.Linear(modelOutputFeats, len(label_dic))

model.to(device)
print(model)
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=lr)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [1]:
# train model
model.train()
for e in range(epoch):
    # initialize the total training and validation loss
    totalTrainLoss = 0
    totalValLoss = 0
    # initialize the number of correct predictions in the training
    # and validation step
    trainCorrect = 0
    valCorrect = 0
    for (x, y) in train_dataloader:
        (x, y) = (x.to(device), y.to(device))

        # print("output", output.shape)
        print("x", x.shape)
        print("y", y.shape)

        opt.zero_grad()
        
        output = model(x.float())

        print("output", output.shape)

        loss = criterion(output, y)
        loss.backward()
        opt.step()

        # add the loss to the total training loss so far and calculate the number of correct parameters
        totalTrainLoss += loss
        trainCorrect += (output.argmax(1) == y).type(
            torch.float).sum().item()

        break



NameError: name 'model' is not defined

In [None]:
# test data on testing set