In [1]:
import os
import torch
import pandas as pd

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from skimage import io
from PIL import Image

class ImageDataset(Dataset):
    """Project Image dataset"""

    def __init__(self, csv_file, root_dir, transform):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = Image.open(img_path).convert('RGB')
        #image = io.imread(img_path)
        y_label = torch.tensor(int(self.annotations.iloc[index, 2]))

        if self.transform:
            image = self.transform(image)

        return (image, y_label)

In [3]:
dataset = ImageDataset(
    csv_file='../src/data/raw_labes.csv', 
    root_dir='../src/data/raw', 
    transform=transforms.ToTensor()
    )

In [4]:
dataset.transform

ToTensor()

In [5]:
train_csv_file='../dataset/set_dataset/train_labes.csv'
train_root_dir='../dataset/set_dataset/train/'

annotations = pd.read_csv(train_csv_file)

In [6]:
annotations

Unnamed: 0,filename,class,class_num
0,bolt_0.jpg,bolt,1
1,bolt_1.jpg,bolt,1
2,bolt_10.jpg,bolt,1
3,bolt_102.jpg,bolt,1
4,bolt_103.jpg,bolt,1
...,...,...,...
1514,pipe_444.jpg,pipe,5
1515,pipe_445.jpg,pipe,5
1516,pipe_446.jpg,pipe,5
1517,pipe_447.jpg,pipe,5


In [7]:
len(annotations)

1519

In [8]:
img_path = os.path.join(root_dir, annotations.iloc[0, 0])
img_path

NameError: name 'root_dir' is not defined

In [9]:
image = io.imread(img_path)
type(image)

NameError: name 'img_path' is not defined

In [11]:
y_label = torch.tensor(int(annotations.iloc[0, 2]))
y_label

tensor(1)

In [12]:
transform_data = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize([224,224]),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
            ])

In [13]:
image = transform_data(image)
image.shape

NameError: name 'image' is not defined

In [14]:
image = Image.open(img_path)
image

NameError: name 'img_path' is not defined

In [64]:
train_path = '../dataset/set_dataset/train'
train_labels = '../dataset/set_dataset/train_labes.csv'
val_path = '../dataset/set_dataset/val'
val_labels = '../dataset/set_dataset/val_labes.csv'
test_path = '../dataset/set_dataset/test'
test_labels = '../dataset/set_dataset/test_labes.csv'


# Hyperparameters
num_classes = 5
learning_rate = 1e-3
batch_size = 6
num_epochs = 10

train_transform = transforms.Compose([
        #transforms.ToPILImage(),
        transforms.Resize([224,224]),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
        ])

test_val_transform = transforms.Compose([
        transforms.Resize([224,224]),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
        ])

train_dataset = ImageDataset(
    csv_file=train_labels, 
    root_dir=train_path, 
    transform=train_transform
    )
val_dataset = ImageDataset(
    csv_file=val_labels, 
    root_dir=val_path, 
    transform=test_val_transform
    ) 

test_dataset = ImageDataset(
    csv_file=test_labels, 
    root_dir=test_path, 
    transform=test_val_transform
    ) 

In [65]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [68]:
print(len(train_loader))
print(len(val_loader))
print(len(test_loader))


1524
79
64


In [72]:
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
print(len(train_dataset) + len(val_dataset) + len(test_dataset))

1519
474
379
2372


In [2]:
import torch
import torch.nn as nn
import numpy as np
from torchsummary import summary
import torch.nn.functional as F

In [3]:
def calc_input_dims():
        batch_data = torch.zeros((1, 3, 224, 224))
        batch_data = conv1(batch_data)
        batch_data = pool1(batch_data)
        batch_data = conv2(batch_data)
        batch_data = conv3(batch_data)
        batch_data = pool2(batch_data)
        batch_data = conv4(batch_data)
        batch_data = pool3(batch_data)

        return int(np.prod(batch_data.size()))

In [4]:
num_classes = 5

conv1 = nn.Conv2d(3, 64, 5)
pool1 = nn.MaxPool2d(2, 2)
conv2 = nn.Conv2d(64, 128, 5)
conv3 = nn.Conv2d(128, 64, 5)
pool2 = nn.MaxPool2d(2, 2)
conv4 = nn.Conv2d(64, 64, 5)
pool3 = nn.MaxPool2d(2, 2)

input_dims = calc_input_dims()

fc1 = nn.Linear(input_dims, num_classes)


In [5]:
input_dims

33856

In [6]:
fc1

Linear(in_features=33856, out_features=5, bias=True)

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Net(nn.Module):

    def __init__(self, num_classes=5):
        super(Net, self).__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(3, 64, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, 5)
        self.conv3 = nn.Conv2d(128, 64, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(64, 64, 5)
        self.pool3 = nn.MaxPool2d(2, 2)

        #input_dims = self.calc_input_dims()

        self.fc1 = nn.Linear(224, self.num_classes)

    # Function to calculate the input dimension to Linear layer
    # TODO: Implement calculation depending on the network structure
#     def calc_input_dims(self):
#         batch_data = torch.zeros((1, 3, 224, 224))
#         batch_data = self.conv1(batch_data)
#         batch_data = self.pool1(batch_data)
#         batch_data = self.conv2(batch_data)
#         batch_data = self.conv3(batch_data)
#         batch_data = self.pool2(batch_data)
#         batch_data = self.conv4(batch_data)
#         batch_data = self.pool3(batch_data)

#         return int(np.prod(batch_data.size()))

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool2(x)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.pool3(x)
        print(x.shape)

        x = self.fc1(x)

        return x

In [9]:
net = Net().to(device)
print(net)
#summary(net, (3, 224, 224))

Net(
  (conv1): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(128, 64, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=33856, out_features=5, bias=True)
)


In [10]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 53 * 53, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        #print(x.shape)
        x = x.view(-1, 16 * 53 * 53)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net().to(device)
print(net)
summary(net, (3, 224, 224))

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=44944, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=5, bias=True)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 6, 220, 220]             456
         MaxPool2d-2          [-1, 6, 110, 110]               0
            Conv2d-3         [-1, 16, 106, 106]           2,416
         MaxPool2d-4           [-1, 16, 53, 53]               0
            Linear-5                  [-1, 120]       5,393,400
            Linear-6                   [-1, 84]          10,164
            Linear-7                    [-1, 5]             425
Total params: 5,406,861
Trai

In [11]:
# Define Data paths
train_path = '../dataset/set_dataset/train'
train_labels = '../dataset/set_dataset/train_labes.csv'
val_path = '../dataset/set_dataset/val'
val_labels = '../dataset/set_dataset/val_labes.csv'
test_path = '../dataset/set_dataset/test'
test_labels = '../dataset/set_dataset/test_labes.csv'

In [12]:
mean_val = [0.5, 0.5, 0.5]
std_val = [0.5, 0.5, 0.5]

In [13]:
train_transforms = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean_val, std_val),
])

test_val_transforms = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean_val, std_val),
])

In [14]:
train_data = ImageDataset(train_labels, train_path, train_transforms)
val_data = ImageDataset(val_labels, val_path, test_val_transforms)
test_data = ImageDataset(test_labels, test_path, test_val_transforms)

In [15]:
train_loader = DataLoader(train_data, batch_size=6, shuffle=True, num_workers=4)
valloader = DataLoader(val_data, batch_size=6, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=6, shuffle=True, num_workers=4)


In [16]:
train_size = len(train_data)
val_size = len(val_data)
test_size = len(test_data)

In [17]:
print(f"Train data size: {train_size}\nValidation data size: {val_size}\n\
Test data size: {test_size}\nTotal data size: {train_size + val_size + test_size}")


Train data size: 1519
Validation data size: 474
Test data size: 379
Total data size: 2372


In [18]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [20]:
epochs = 2

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            
print('Finish Training')

RuntimeError: CUDA error: device-side assert triggered