In [2]:
import torch 
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from pathlib import Path
import csv
import numpy as np
import cv2
from tqdm import tqdm

from edge_detection import edgeDetection
from interest_point_detection import display_interest_points_matrix
from detect_laser import *
from torchinfo import summary
from torchmetrics.classification import BinaryAccuracy

import matplotlib.pyplot as plt

In [3]:
# Prepare data 

data_path = Path("./data")
csv_list = list(data_path.glob("*.csv"))
file_map = dict()
test_map = dict()
i = 0
l = 0
for csv_file in csv_list:   
    with open(csv_file, newline='') as csvfile:

        reader = csv.reader(csvfile, delimiter=',')

        header = next(reader)

        # First 2 of every file will be for testing
        for k in range(2):
            row = next(reader)
            if len(row) > 0:
                test_map[l] = dict()
                test_map[l]["name"] = row[0]
                
                for j in range(1, 3):
                    test_map[l][header[j]] = row[j]

                l+=1
        
        for row in reader:
        
            if len(row) > 0:
                file_map[i] = dict()
                file_map[i]["name"] = row[0]
                
                for j in range(1, 3):
                    file_map[i][header[j]] = row[j]

                i+=1

In [4]:
data_map = dict()
test_data_map = dict()
index = 0
index_test = 0

In [5]:
# Getting images with laser inside them for training
for i in tqdm(file_map):
    #img = cv2.imread(file_map[i]["name"])

    val = 20
    count=0

    for k in range(5,val):
        for j in range(5,val):
            laser_np = (int(file_map[i]["laser.x"]), int(file_map[i]["laser.y"]))

            #temp = img[laser_np[1]-val+k:laser_np[1]+k,laser_np[0]-val+j:laser_np[0]+j]
            # exit(1)
            count+=1

            # Add to data_map
            # 0 is no laser, 1 is yes laser
            data_map[index] = dict()
            data_map[index]["file_name"] = file_map[i]["name"]
            data_map[index]["image"] = (laser_np[1]-val+k, laser_np[0]-val+j)
            data_map[index]["label"] = 1
            index += 1

            # cv2.namedWindow("burger")
            # cv2.imshow("burger", temp)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
print(count)

100%|███████████████████████████████████████████████████████████████████████████████████| 326/326 [00:00<00:00, 1431.95it/s]

225





In [6]:
# Getting images without any laser for training
for i in tqdm(file_map):
  #  img = cv2.imread(file_map[i]["name"])
    val=20
    laser_np = (int(file_map[i]["laser.x"]), int(file_map[i]["laser.y"]))
    left = laser_np[0] - 20*2 + 5
    right = laser_np[0] + 35
    top = laser_np[1]-20*2+5
    bottom = laser_np[1]+35

    count = 0
    
    for k in range(50,3000-val-50,85):
        for j in range(50,4000-val-50,85):

            if j >= left and j < right and k >= top and k < bottom:
                continue
            # cv2.rectangle(img, (j,k), (j+val, k+val), color=(0,0,255), thickness=2)

            data_map[index] = dict()
            data_map[index]["file_name"] = file_map[i]["name"]
            data_map[index]["image"] = (k, j)
            data_map[index]["label"] = 0
            index += 1
            count+=1
            # temp = img[k:k+val, j:j+val]
print(count)

100%|█████████████████████████████████████████████████████████████████████████████████████| 326/326 [00:03<00:00, 88.87it/s]

1564





In [7]:
# Getting images with laser inside them for training
for i in tqdm(test_map):
    #img = cv2.imread(file_map[i]["name"])

    val = 20
    count=0

    for k in range(5,val):
        for j in range(5,val):
            laser_np = (int(test_map[i]["laser.x"]), int(test_map[i]["laser.y"]))

            #temp = img[laser_np[1]-val+k:laser_np[1]+k,laser_np[0]-val+j:laser_np[0]+j]
            # exit(1)
            count+=1

            # Add to data_map
            # 0 is no laser, 1 is yes laser
            test_data_map[index] = dict()
            test_data_map[index]["file_name"] = test_map[i]["name"]
            test_data_map[index]["image"] = (laser_np[1]-val+k, laser_np[0]-val+j)
            test_data_map[index]["label"] = 1
            index_test += 1

            # cv2.namedWindow("burger")
            # cv2.imshow("burger", temp)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
print(count)

100%|█████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 1604.89it/s]

225





In [8]:
# Getting images without any laser for testing
for i in tqdm(test_map):
  #  img = cv2.imread(test_map[i]["name"])
    val=20
    laser_np = (int(test_map[i]["laser.x"]), int(test_map[i]["laser.y"]))
    left = laser_np[0] - 20*2 + 5
    right = laser_np[0] + 35
    top = laser_np[1]-20*2+5
    bottom = laser_np[1]+35
    
    for k in range(50,3000-val-50,50):
        for j in range(50,4000-val-50,50):

            if j >= left and j < right and k >= top and k < bottom:
                continue
            # cv2.rectangle(img, (j,k), (j+val, k+val), color=(0,0,255), thickness=2)

            test_data_map[index_test] = dict()
            test_data_map[index_test]["file_name"] = test_map[i]["name"]
            test_data_map[index_test]["image"] = (k, j)
            test_data_map[index_test]["label"] = 0
            index_test += 1
            # temp = img[k:k+val, j:j+val]

100%|███████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 74.60it/s]


In [9]:
data = np.empty((len(data_map), 1200))
test = np.empty((len(test_data_map), 1200))

In [13]:
for idx in tqdm(data_map):
    file_name = data_map[idx]["file_name"]
    k,j = data_map[idx]["image"]
    label = data_map[idx]["label"]
    
    img = cv2.imread(file_name)
    img_clone = img.copy()
    cropped_img = img_clone[k:k+20, j:j+20]
    output_img = np.reshape(cropped_img, (3, 20, 20))

    data[idx,:] = output_img.flatten()

  0%|                                                                               | 28/583010 [00:20<119:16:13,  1.36it/s]


KeyboardInterrupt: 

In [1]:
class LaserDetectionDataset(Dataset):
    """Laser Detection dataset."""

    def __init__(self, data_map, data):
        """
        Arguments:
            data_map: map to training data.
            data: training data
        """
        self.data_map = data_map
        self.data = data

    def __len__(self):

        return len(self.data_map)

    def __getitem__(self, idx):
   
        # file_name = self.data_map[idx]["file_name"]
        # k,j = self.data_map[idx]["image"]
        label = self.data_map[idx]["label"]

        # print("Beginning to read image")
        # img = cv2.imread(file_name)
        # print("Finishing reading image")
        # img_clone = img.copy()
        # cropped_img = img_clone[k:k+20, j:j+20]
        # output_img = np.reshape(cropped_img, (3, 20, 20))
        # cv2.namedWindow("burger")
        # cv2.imshow("burger", output_img)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()
        output_img = np.reshape(self.data[idx,:], (3, 20, 20))

        return output_img.astype(np.float32), np.array([label]).astype(np.float32)

NameError: name 'Dataset' is not defined

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3), stride=1, padding=1)
        self.act1 = nn.ReLU()
        self.drop1 = nn.Dropout(0.3)
        self.pool1 = nn.AvgPool2d(kernel_size=(2, 2))
        
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(3200, 10)
        self.act2 = nn.ReLU()

        self.linear2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = self.act1(self.conv1(x))
        x = self.pool1(x)
        x = self.drop1(x)
        
        x = self.flatten(x)
        x = self.act2(self.linear1(x))

        x = self.linear2(x)
        
        return x

In [19]:
model = Net()
batch_size = 1
summary(model, input_size=(500, 3, 20, 20), device="cuda")

Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [500, 1]                  --
├─Conv2d: 1-1                            [500, 32, 20, 20]         896
├─ReLU: 1-2                              [500, 32, 20, 20]         --
├─AvgPool2d: 1-3                         [500, 32, 10, 10]         --
├─Dropout: 1-4                           [500, 32, 10, 10]         --
├─Flatten: 1-5                           [500, 3200]               --
├─Linear: 1-6                            [500, 10]                 32,010
├─ReLU: 1-7                              [500, 10]                 --
├─Linear: 1-8                            [500, 1]                  11
Total params: 32,917
Trainable params: 32,917
Non-trainable params: 0
Total mult-adds (M): 195.21
Input size (MB): 2.40
Forward/backward pass size (MB): 51.24
Params size (MB): 0.13
Estimated Total Size (MB): 53.78

In [30]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)

laser_dataset = LaserDetectionDataset(data_map = data_map, data=data)
laser_dataloader = DataLoader(laser_dataset, batch_size=500, shuffle=True)

test_dataset = LaserDetectionDataset(data_map = test_data_map, data=test)
test_dataloader = DataLoader(test_dataset, batch_size=500, shuffle=True)

metric = BinaryAccuracy().to('cuda')
test_metric = BinaryAccuracy().to('cuda')

# load_path = Path("model29.pth")
# model.load_state_dict(torch.load(load_path.as_posix()))

In [31]:
n_epochs = 1

for epoch in range(0, n_epochs):

    count = 1
    acc = 0

    for inputs, labels in tqdm(laser_dataloader):

        inputs = inputs.to('cuda')
        labels = labels.to('cuda')
        
        y_pred = model(inputs)

        metric.update(y_pred, labels)

        
        acc += metric.item()
        count += 1
        
        loss = loss_fn(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    test_acc = 0
    test_count = 1
    
    for test_inputs, test_labels in tqdm(test_dataloader):
        
        test_inputs = test_inputs.to('cuda')
        test_labels = test_labels.to('cuda')
        test_pred = model(test_inputs)
        
        metric.update(y_pred, labels)
        
        test_acc += metric.item()
        test_count += 1
        
    # print(f"Epoch {epoch}: training accuracy: {metric.compute()}")
    # print(f"Epoch {epoch}: test accuracy: {test_metric.compute()}")

    if ((epoch+1) % 5 == 0):
        torch.save(model.state_dict(), f"model_binary{epoch}.pth")

  0%|                                                                                                              | 0/1167 [00:00<?, ?it/s]

Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image
Finishing reading image
Beginning to read image


  0%|                                                                                                              | 0/1167 [00:15<?, ?it/s]

KeyboardInterrupt



In [9]:
# laser_dataset = LaserDetectionDataset(data_map = data_map)
# laser_dataloader = DataLoader(laser_dataset, shuffle=False)

# for i, (img, label) in tqdm(enumerate(laser_dataloader)):

#     display = np.reshape(img.numpy().astype(np.uint8), (20,20,3))
#     print(display.shape)
#     cv2.namedWindow("burger")
#     cv2.imshow("burger", display)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()
    
#     if i == 20:
#         cv2.destroyAllWindows()
#         break

In [8]:
print(len(data_map))

583010
