# Add libraries

In [2]:
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import models

In [4]:
abs_path = '/content/drive/MyDrive/Datasets/CMPE468/sign-dataset/'

# Analyze Dataset

In [5]:
all_in_one = pd.read_csv(os.path.join(abs_path, 'annotations.csv'))

# # Uncomment below code if you want to check dataset images.
# seen_classes = []
# for index, row in all_in_one.iterrows():
#   i_class = row.category
#   if i_class not in seen_classes:
#     print(i_class)
#     seen_classes.append(i_class)
#     image = Image.open(os.path.join(abs_path, 'images', row.file_name))
#     plt.imshow(image)
#     plt.show()

In [27]:
# Selected class 3 -> 40 Speed Sign - 318
# Selected class 16 -> No Car Sign - 218
# Selected class 17 -> No Horn Sign - 192
# Selected class 30 -> Bike Sign - 184
# Selected class 54 -> No Park Sign - 500
# Selected class 55 -> No Entrance Sign - 220

# Dataset Distribution 140 Train 20 Test 20 Validation due to minimum class that has selected.

selected_classes = [3, 16, 17, 30, 54, 55]
all_in_one_selected = all_in_one[all_in_one.category.isin(selected_classes)]
all_in_one_selected.drop_duplicates(subset=['file_name'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_in_one_selected.drop_duplicates(subset=['file_name'], inplace=True)


# Training Test and Validation Dataframes

## Create OneHotLabel

In [7]:
def change_to_onehot(label):
  if label == selected_classes[0]:
    return '[1, 0, 0, 0, 0, 0]'
  elif label == selected_classes[1]:
    return '[0, 1, 0, 0, 0, 0]'
  elif label == selected_classes[2]:
    return '[0, 0, 1, 0, 0, 0]'
  elif label == selected_classes[3]:
    return '[0, 0, 0, 1, 0, 0]'
  elif label == selected_classes[4]:
    return '[0, 0, 0, 0, 1, 0]'
  elif label == selected_classes[5]:
    return '[0, 0, 0, 0, 0, 1]'

## Train - Test - Val Dataset Creation


In [8]:
train_list = []
test_list = []
val_list = []

train_limit = 140
test_limit = 20
val_limit = 20

for selected_class in selected_classes:
  inner_df = all_in_one_selected[all_in_one_selected.category == selected_class]

  selected_train = 0
  selected_test = 0
  selected_val = 0

  for i_index, i_row in inner_df.iterrows():
    if selected_train < train_limit:
      selected_train += 1
      train_list.append(i_row)
    elif selected_test < test_limit:
      selected_test += 1
      test_list.append(i_row)
    elif selected_val < val_limit:
      selected_val += 1
      val_list.append(i_row)

train_df = pd.DataFrame(train_list)
test_df = pd.DataFrame(test_list)
val_df = pd.DataFrame(val_list)

In [9]:
 # To check all classes distributed equally.
for selected_class in selected_classes:
  assert len(train_df[train_df.category == selected_class]) == train_limit
  assert len(test_df[test_df.category == selected_class]) == test_limit
  assert len(val_df[val_df.category == selected_class]) == val_limit

### Train

In [10]:
train = train_df.reset_index()

In [11]:
train['OneHotLabel'] = train.apply(lambda x: change_to_onehot(x.category), axis=1)

In [12]:
train.groupby('OneHotLabel').agg('count')

Unnamed: 0_level_0,index,file_name,width,height,x1,y1,x2,y2,category
OneHotLabel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"[0, 0, 0, 0, 0, 1]",140,140,140,140,140,140,140,140,140
"[0, 0, 0, 0, 1, 0]",140,140,140,140,140,140,140,140,140
"[0, 0, 0, 1, 0, 0]",140,140,140,140,140,140,140,140,140
"[0, 0, 1, 0, 0, 0]",140,140,140,140,140,140,140,140,140
"[0, 1, 0, 0, 0, 0]",140,140,140,140,140,140,140,140,140
"[1, 0, 0, 0, 0, 0]",140,140,140,140,140,140,140,140,140


### Test

In [13]:
test = test_df.reset_index()

In [14]:
test['OneHotLabel'] = test.apply(lambda x: change_to_onehot(x.category), axis=1)

In [15]:
test.groupby('OneHotLabel').agg('count')

Unnamed: 0_level_0,index,file_name,width,height,x1,y1,x2,y2,category
OneHotLabel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"[0, 0, 0, 0, 0, 1]",20,20,20,20,20,20,20,20,20
"[0, 0, 0, 0, 1, 0]",20,20,20,20,20,20,20,20,20
"[0, 0, 0, 1, 0, 0]",20,20,20,20,20,20,20,20,20
"[0, 0, 1, 0, 0, 0]",20,20,20,20,20,20,20,20,20
"[0, 1, 0, 0, 0, 0]",20,20,20,20,20,20,20,20,20
"[1, 0, 0, 0, 0, 0]",20,20,20,20,20,20,20,20,20


### Validation

In [16]:
val = val_df.reset_index()

In [17]:
val['OneHotLabel'] = val.apply(lambda x: change_to_onehot(x.category), axis=1)

In [18]:
val.groupby('OneHotLabel').agg('count')

Unnamed: 0_level_0,index,file_name,width,height,x1,y1,x2,y2,category
OneHotLabel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"[0, 0, 0, 0, 0, 1]",20,20,20,20,20,20,20,20,20
"[0, 0, 0, 0, 1, 0]",20,20,20,20,20,20,20,20,20
"[0, 0, 0, 1, 0, 0]",20,20,20,20,20,20,20,20,20
"[0, 0, 1, 0, 0, 0]",20,20,20,20,20,20,20,20,20
"[0, 1, 0, 0, 0, 0]",20,20,20,20,20,20,20,20,20
"[1, 0, 0, 0, 0, 0]",20,20,20,20,20,20,20,20,20


### Additional Tests

In [19]:
# To check all file_name's unique in all datasets

for _, row_train in train.iterrows():
    file_name = row_train.file_name

    matches_test = test[test.file_name == file_name]
    matches_val = val[val.file_name == file_name]

    if len(matches_test) > 0 or len(matches_val) > 0:
        print(matches_val, file_name)
        assert False

for _, row_test in test.iterrows():
    file_name = row_test.file_name

    matches_train = train[train.file_name == file_name]
    matches_val = val[val.file_name == file_name]

    if len(matches_train) > 0 or len(matches_val) > 0:
        assert False

for _, row_val in val.iterrows():
    file_name = row_val.file_name

    matches_train = train[train.file_name == file_name]
    matches_test = test[test.file_name == file_name]

    if len(matches_train) > 0 or len(matches_test) > 0:
        assert False

# Create dataset class

In [20]:
class ImageDataset(Dataset):

    def __init__(self, metadata, transform=None):
        """Init function should not do any heavy lifting, but
            must initialize how many items are available in this data set.
        """
        self.metadata = metadata
        self.transform = transform

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        """ Here we have to return the item requested by `idx`
            The PyTorch DataLoader class will use this method to make an iterable for
            our training or validation loop.
        """
        image_path = self.metadata.loc[idx, 'file_name']
        image_label = self.metadata.loc[idx, 'OneHotLabel']
        image = Image.open(os.path.join(abs_path, 'images', image_path))

        image = self.transform(image)

        return image, image_label

# Transforms

In [21]:
input_size = 224  # 224x224x3

data_transforms = {
    'train': [
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ],
    'tv': [
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ],
}

# Apply Transforms - Prepare Datasets

In [22]:
# Define transformations (if any)
transform_train = transforms.Compose(data_transforms['train'])
transform_tv = transforms.Compose(data_transforms['tv'])

# Batch size
bs = 128

# Instantiate the dataset
train_dataset = ImageDataset(metadata=train, transform=transform_train)

# Use DataLoader to access individual samples
train_data_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)

# Instantiate the dataset
test_dataset = ImageDataset(metadata=test, transform=transform_tv)

# Use DataLoader to access individual samples
test_data_loader = DataLoader(test_dataset, batch_size=bs, shuffle=True)

# Instantiate the dataset
val_dataset = ImageDataset(metadata=val, transform=transform_tv)

# Use DataLoader to access individual samples
val_data_loader = DataLoader(val_dataset, batch_size=bs, shuffle=True)

# Training

In [23]:
# define hyperparameters
learning_rate = 0.0001
# batch_size = 32
old_loss = 100

# To select a GPU
device = torch.device('cuda')

# To clear CUDA memory
torch.cuda.empty_cache()

# Initialize the EfficientNet model
efficientnet_b0 = models.efficientnet_b0(weights='IMAGENET1K_V1')
efficientnet_b0.classifier[1] = torch.nn.Linear(in_features=1280, out_features=6, bias=True)
model = efficientnet_b0.to(device)

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Epoch size
num_epochs = 100

# Softmax
softmax = torch.nn.Softmax(dim=1)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 131MB/s] 


In [24]:
for epoch in range(num_epochs):
    print(epoch+1)
    train_loss = []
    dice_train = []
    model.train()

    for batch_index_train, batch_train in enumerate(train_data_loader):
      image, image_label = batch_train
      image_label = [eval(inner_list_str) for inner_list_str in list(image_label)]
      image_label = torch.tensor(image_label, dtype=torch.float32).to(device)

      image = image.to(device)
      outputs = model(image)
      outputs = softmax(outputs)

      loss = criterion(outputs, image_label)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    with torch.no_grad():
      model.eval()
      for batch_index_val, batch_val in enumerate(val_data_loader):
        image, image_label = batch_val
        image_label = [eval(inner_list_str) for inner_list_str in list(image_label)]
        image_label = torch.tensor(image_label, dtype=torch.float32).to(device)

        image = image.to(device)
        outputs = model(image)
        outputs = softmax(outputs)
        loss = criterion(outputs, image_label)

      if loss < old_loss:
        old_loss = loss
        torch.save(model.state_dict(), '/content/drive/MyDrive/Datasets/CMPE468/other.pth')
        print(loss.item())

1
1.703062653541565
2
1.52327561378479
3
1.3755018711090088
4
1.2575597763061523
5
1.1742371320724487
6
1.1259666681289673
7
1.104768991470337
8
1.0960617065429688
9
1.0899680852890015
10
1.087155818939209
11
1.0844557285308838
12
13
1.0833834409713745
14
1.0795073509216309
15
1.078987956047058
16
1.0763672590255737
17
1.0746166706085205
18
1.0716323852539062
19
1.0702189207077026
20
1.0700472593307495
21
1.068251132965088
22
23
24
1.068011999130249
25
1.0677061080932617
26
27
1.064185619354248
28
1.0637335777282715
29
1.0627853870391846
30
1.062265157699585
31
32
33
34
35
36
1.0618627071380615
37
1.0608160495758057
38
1.059914469718933
39
1.0585721731185913
40
41
1.0581159591674805
42
1.0570509433746338
43
44
45
46
47
1.056520700454712
48
1.0565118789672852
49
1.0562564134597778
50
51
52
53
54
1.0560498237609863
55
1.055875539779663
56
1.0555131435394287
57
58
59
60
1.0550408363342285
61
1.0540883541107178
62
63
1.0535709857940674
64
1.0530550479888916
65
1.0528521537780762
66
1.05263

# Testing

In [33]:
# To clear CUDA memory
torch.cuda.empty_cache()

test_efficient = models.efficientnet_b0(weights=None)
test_efficient.classifier[1] = torch.nn.Linear(in_features=1280, out_features=6, bias=True)
test_model = test_efficient.to(device)
test_model.load_state_dict(torch.load('/content/drive/MyDrive/Datasets/CMPE468/traffic_sign.pth'))

<All keys matched successfully>

In [34]:
test_model.eval()

total = len(test)
correctly_predicted = 0

with torch.no_grad():
  for batch_index_test, batch_test in enumerate(test_data_loader):
      image, image_label = batch_test
      image_label = [eval(inner_list_str) for inner_list_str in list(image_label)]
      image_label = torch.tensor(image_label, dtype=torch.float32).to(device)

      image = image.to(device)
      outputs = test_model(image)
      outputs = softmax(outputs)
      loss = criterion(outputs, image_label)

      index_predicted = torch.argmax(outputs, dim=1, keepdim=True).to('cpu').numpy()

      # print(index_predicted, image_label)

      for index, label in enumerate(image_label):
        label = label.to('cpu').numpy()
        if label[index_predicted[index]] == 1:
          correctly_predicted += 1

      # print(index_predicted, image_label)

print((correctly_predicted / total) * 100, loss)

100.0 tensor(1.0472, device='cuda:0')
