In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import torch.optim as optim
import imageio
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn import preprocessing
from typing import List
from IPython.display import clear_output

In [2]:
import sys
sys.path.insert(0, '/home/caitao/Project/dl-localization')
from input_output import Default

# 1 The CNN Model

In [3]:
class Net1(nn.Module):
    '''The output dimension of the full connnection layer is 100 x 100 = 10000
       Assuming the input image is 1 x 100 x 100
    '''
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 5)
        self.conv2 = nn.Conv2d(8, 32, 5)
        self.fc    = nn.Linear(15488, 10000)  # too many labels

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc(x))
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [4]:
model1 = Net1()
print(model1)

Net1(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(8, 32, kernel_size=(5, 5), stride=(1, 1))
  (fc): Linear(in_features=15488, out_features=10000, bias=True)
)


# 2 Data

In [5]:
class SensorInputDataset(Dataset):
    '''Sensor reading input dataset'''
    def __init__(self, root_dir: str, transform=None):
        '''
        Args:
            root_dir:  directory with all the images
            labels:    labels of images
            transform: optional transform to be applied on a sample
        '''
        self.root_dir = root_dir
        self.transform = transform
        self.length = len(os.listdir(self.root_dir))

    def __len__(self):
        return self.length * Default.sample_per_label

    def __getitem__(self, idx):
        folder   = self.oneDint2twoDstr(int(idx/Default.sample_per_label))
        imgname  = str(idx%Default.sample_per_label) + '.png'
        img_path = os.path.join(self.root_dir, folder, imgname)
        image = imageio.imread(img_path)
        if self.transform:
            image = self.transform(image)
        label = self.twoDstr2oneDint(folder)
        sample = {'image':image, 'label':label}
        return sample

    def oneDint2twoDstr(self, oneDint):
        '''convert a one dimension integer index to a two dimension string index'''
        x = oneDint // Default.grid_length
        y = oneDint % Default.grid_length
        return f'({x}, {y})'
    
    def twoDstr2oneDint(self, twoDstr):
        '''convert a two dimension string to a one dimension integet index for the labels'''
        twoDstr = twoDstr[1:-1]
        x, y = twoDstr.split(',')
        x, y = int(x), int(y)
        return x*Default.grid_length + y

In [22]:
root_dir = './data/matrix-1'
sensor_input_dataset = SensorInputDataset(root_dir = root_dir, transform = T.ToTensor())
sensor_input_dataloader = DataLoader(sensor_input_dataset, batch_size=4, shuffle=True, num_workers=4)

# 3 Training

In [14]:
device    = torch.device('cuda')
model     = model1.to(device)
optimizer = optim.SGD(model1.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()  # criterion is the loss function

In [21]:
num_epochs = 5
train_losses = []
valid_accs = []
best_acc = 0
PATH = '/best_model_{}.pt'
print_every = 100

for epoch in range(num_epochs):
    print(f'epoch = {epoch}')
    for t, sample in enumerate(sensor_input_dataloader):
        model.train()
        X = sample['image'].to(device)
        y = sample['label'].to(device)
        pred = model(X)
        loss = criterion(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if t % print_every == 0:
            print(f't = {t}, loss = {loss.item()}')

epoch = 0
t = 0, loss = 9.190359115600586
t = 100, loss = 9.191838264465332
t = 200, loss = 9.197484970092773
t = 300, loss = 9.197670936584473
t = 400, loss = 9.216283798217773
t = 500, loss = 9.206048965454102
t = 600, loss = 9.197667121887207
t = 700, loss = 9.222134590148926
t = 800, loss = 9.207505226135254
t = 900, loss = 9.207732200622559
t = 1000, loss = 9.205695152282715
t = 1100, loss = 9.213281631469727
t = 1200, loss = 9.216551780700684
t = 1300, loss = 9.214351654052734
t = 1400, loss = 9.217650413513184
t = 1500, loss = 9.222225189208984
t = 1600, loss = 9.217854499816895
t = 1700, loss = 9.184883117675781
t = 1800, loss = 9.221495628356934
t = 1900, loss = 9.221102714538574
t = 2000, loss = 9.210466384887695
t = 2100, loss = 9.213089942932129
t = 2200, loss = 9.192530632019043
t = 2300, loss = 9.210480690002441
t = 2400, loss = 9.208732604980469
t = 2500, loss = 9.2089204788208
t = 2600, loss = 9.220396995544434
t = 2700, loss = 9.220514297485352
t = 2800, loss = 9.21267

In [None]:
device = torch.device('cuda:0')

In [None]:
device

In [None]:
labels = ['cat', 'dog', 'mouse', 'elephant', 'pandas']
le = preprocessing.LabelEncoder()
targets = le.fit_transform(labels)
print(targets)