In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.io import read_image

from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

In [2]:
# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
imageLabels = {
    'bedroom': 1,
    'Coast': 2,
    'Forest': 3,
    'Highway': 4,
    'industrial': 5,
    'Insidecity': 6,
    'kitchen': 7,
    'livingroom': 8,
    'Mountain': 9,
    'Office': 10,
    'OpenCountry': 11,
    'store': 12,
    'Street': 13,
    'Suburb': 14,
    'TallBuilding': 15,
}

In [4]:
import cv2
import os
import numpy as np
import pandas as pd
import shutil
from torch.utils.data import DataLoader
from tqdm import tqdm

In [5]:
def rotatedImages(img):
    rot90 = np.rot90(img)
    rot180 = np.rot90(rot90)
    rot270 = np.rot90(rot180)
    return img, rot90, rot180, rot270

def flippedImages(img):
    hFlip = img[:, ::-1]
    vFlip = img[::-1, :]
    return img, hFlip, vFlip

def blurredImages(img):
    bImage = cv2.GaussianBlur(img, (5,5), 0)
    return img, bImage

In [6]:
dataFolder = './data'

In [7]:
largestHeight = 0
largestWidth = 0
if not os.path.exists(dataFolder):
    os.makedirs(dataFolder)

imageNum = 0
def getImageFileName(num):
    return 'image_{}.jpg'.format(num)

allImages = []
allLabels = []

for cat, label in imageLabels.items():
    root = './train'
    catFolder = os.path.join(root, cat)
    images = os.listdir(catFolder)
    for image in images:
        img = np.array(cv2.imread(os.path.join(catFolder, image), -1))
        augRotImages = rotatedImages(img)
        for rotImage in augRotImages:
            augFlipImages = flippedImages(rotImage)
            for augIm in augFlipImages:
                blurImages = blurredImages(augIm)
                for im in blurImages:
                    imageName = getImageFileName(imageNum)
                    imageFilePath = os.path.join(dataFolder, imageName)
                    imageNum +=1 
                    cv2.imwrite(imageFilePath, im)
                    allImages.append(imageName)
                    allLabels.append(label)

labels = pd.DataFrame({
    'imageFileName': allImages,
    'imageLabel': allLabels
})


In [8]:
# split the testing data
imageRem, imageTest, labelRem, labelTest = train_test_split(allImages, allLabels, test_size=0.2, stratify=allLabels)

In [9]:
# split the validation data
imageTrain, imageValid, labelTrain, labelValid = train_test_split(imageRem, labelRem, test_size=0.3, stratify=labelRem)

In [5]:
trainDestFolder = './data/train'
testDestFolder = './data/test'
validDestFolder = './data/valid'

In [11]:
# write the training data
if not os.path.exists(trainDestFolder):
    os.makedirs(trainDestFolder)
for trainName in imageTrain:
    # Get the full path of the source file
    source_file = os.path.join(dataFolder, trainName)
    # Get the full path of the destination file
    destination_file = os.path.join(trainDestFolder, trainName)
    # Move the file
    shutil.copy(source_file, destination_file)

In [12]:
# write the test data
if not os.path.exists(testDestFolder):
    os.makedirs(testDestFolder)
for testName in imageTest:
    # Get the full path of the source file
    source_file = os.path.join(dataFolder, testName)
    # Get the full path of the destination file
    destination_file = os.path.join(testDestFolder, testName)
    # Move the file
    shutil.copy(source_file, destination_file)

In [13]:
# write the validation data
if not os.path.exists(validDestFolder):
    os.makedirs(validDestFolder)
for testName in imageValid:
    # Get the full path of the source file
    source_file = os.path.join(dataFolder, testName)
    # Get the full path of the destination file
    destination_file = os.path.join(validDestFolder, testName)
    # Move the file
    shutil.copy(source_file, destination_file)

In [14]:
trainLabels = pd.DataFrame({
    'imageFileName': imageTrain,
    'imageLabel': labelTrain
})

testLabels = pd.DataFrame({
    'imageFileName': imageTest,
    'imageLabel': labelTest
})

validLabels = pd.DataFrame({
    'imageFileName': imageValid,
    'imageLabel': labelValid
})

trainLabels.to_csv(os.path.join(dataFolder, 'trainLabels.csv'), index=False)
testLabels.to_csv(os.path.join(dataFolder, 'testLabels.csv'), index=False)
validLabels.to_csv(os.path.join(dataFolder, 'validLabels.csv'), index=False)

In [7]:
class ImageDataset(Dataset):
    def __init__(self, labelsFile, imageDirectory, transform=None):
        self.imgLabels = pd.read_csv(labelsFile)
        self.imgDir = imageDirectory
        self.transform = transform

    def __len__(self):
        return len(self.imgLabels)

    def __getitem__(self, idx):
        imgPath = os.path.join(self.imgDir, self.imgLabels.iloc[idx, 0])
        image = read_image(imgPath)
        label = self.imgLabels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

In [8]:
# Define transformations for your dataset
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(size = (320,320)),
    transforms.ToTensor()
])
trainingDataset = ImageDataset(labelsFile = os.path.join(dataFolder, 'trainLabels.csv'), imageDirectory = trainDestFolder, transform = transform)
validDataset = ImageDataset(labelsFile = os.path.join(dataFolder, 'validLabels.csv'), imageDirectory = validDestFolder, transform = transform)
testDataset = ImageDataset(labelsFile = os.path.join(dataFolder, 'testLabels.csv'), imageDirectory = testDestFolder, transform = transform)


In [22]:
trainDataloader = DataLoader(trainingDataset, batch_size=16, shuffle=True)
testDataloader = DataLoader(testDataset, batch_size=16, shuffle=False)
validDataloader = DataLoader(validDataset, batch_size=16, shuffle=False)

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

# Define your CNN architecture
class SceneNet(nn.Module):
    def __init__(self, num_classes=15):
        super(SceneNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) # image dim 320x320
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(3,2) # dim 159x159

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # dim 158x158
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(5,2) # dim 78x78

        # self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # dim 158x158
        # self.pool3 = nn.MaxPool2d(3,2) # dim 76x76

        self.fc1 = nn.Linear(64*78*78, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [33]:
# Initialize your CNN
model = SceneNet()

# Use GPU if available
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)




In [34]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()
    trainProgress =  tqdm(enumerate(trainDataloader), total=len(trainDataloader))
    for i, data in trainProgress:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels-1)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        trainProgress.set_description('Epoch [{}/{}], train loss: {:.3f}'.format(
            epoch+1,
            num_epochs,
            running_loss/(i+1)
        ))
    
    # # Evaluate on validation data
    # model.eval()  # Set the model to evaluation mode
    # val_loss = 0.0
    # correct = 0
    # total = 0
    
    # with torch.no_grad():
    #     validProgress = tqdm(enumerate(validDataloader), total=len(validDataloader))
    #     for i, data in validProgress:
    #         images, labels = data[0].to(device), data[1].to(device)
    #         outputs = model(images)
    #         loss = criterion(outputs, labels)
    #         val_loss += loss.item()
    #         _, predicted = torch.max(outputs, 1)
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    #         trainProgress.set_description('valid loss: {:.3f}, valid accuracy: {:.3f}'.format(
    #             running_loss/(i+1),
    #             correct/total
    #         ))

print('Finished Training')



Epoch [1/10], train loss: 2.587: 100%|██████████| 1260/1260 [05:39<00:00,  3.72it/s]
Epoch [2/10], train loss: 2.388: 100%|██████████| 1260/1260 [05:29<00:00,  3.82it/s]
Epoch [3/10], train loss: 2.243: 100%|██████████| 1260/1260 [05:30<00:00,  3.81it/s]
Epoch [4/10], train loss: 2.172:  16%|█▋        | 205/1260 [00:55<04:46,  3.68it/s]


KeyboardInterrupt: 

In [32]:
# Test the model
correct = 0
total = 0   
with torch.no_grad():
    for data in testDataloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))


Accuracy of the network on the test images: 1 %
