In [1]:
import torch.nn as nn
import torch
import tqdm.notebook as tqdm
from IPython import display
import matplotlib.pyplot as plt
import pickle as pkl
from sklearn.covariance import EllipticEnvelope
from utils.dataset import load_compiled_data, ObjectDetectorDataset
from utils.head import input_reg, auxiliary_head, mlp_dual, LSTM_DE
from torch.utils.data import Dataset, DataLoader
import random

device = torch.device('cuda:0')
reg_camera_1 = input_reg().to(device)
reg_camera_2 = input_reg().to(device)
aux_head = auxiliary_head().to(device)

reg_camera_1.load_state_dict(torch.load('models/head/reg_camera_1.pt'))
reg_camera_2.load_state_dict(torch.load('models/head/reg_camera_2.pt'))
aux_head.load_state_dict(torch.load('models/head/aux_head.pt'))

reg_camera_1.eval()
reg_camera_2.eval()
aux_head.eval()

aircraft-detection custom utils


auxiliary_head(
  (fc_1): Linear(in_features=256, out_features=128, bias=True)
  (fc_2): Linear(in_features=128, out_features=1, bias=True)
  (relu): ReLU()
)

In [4]:
def load_compiled_data_custom(ts=[1], ws=[1], rs=[1], offset = [[0,0,0,0],[0,0,0,0]], divider = [[1,1,1,1],[1,1,1,1]], device = torch.device('cpu')):
    camera_1_dets = []
    camera_2_dets = []
    camera_1_ys = []
    camera_2_ys = []

    offset = torch.tensor(offset, device =device)
    divider = torch.tensor(divider, device =device)

    for t in ts:
        for w in ws:
            for r in rs:
                with open(f"output/object_detector/t{t}w{w}r{r}.pkl", 'rb') as f:
                        box, scores, gt_distance = pkl.load(f)
                gt_distance = torch.from_numpy(gt_distance)
                camera_1_det = torch.cat(((box[:, :4].float()[:len(gt_distance)]), gt_distance[:, 0].unsqueeze(1) / 10, scores[:, 0].unsqueeze(1)), axis = 1)
                camera_2_det = torch.cat(((box[:, 4:].float()[:len(gt_distance)]), gt_distance[:, 0].unsqueeze(1) / 10, scores[:, 1].unsqueeze(1)), axis = 1)
                
                # Remove Missing Detection
                
                f_camera_1_available_detection = torch.all(torch.logical_not(camera_1_det[:, :4] == 0), dim = 1)
                f_camera_2_available_detection = torch.all(torch.logical_not(camera_2_det[:, :4] == 0), dim = 1)
                f_camera_1_gt_more_than_1nm = camera_1_det[:, -2] > 0.1
                f_camera_2_gt_more_than_1nm = camera_2_det[:, -2] > 0.1
                f_camera_all_available_detection = torch.logical_and(f_camera_1_available_detection, f_camera_2_available_detection)
                
                
                #f_camera_1 = torch.logical_and(f_camera_1_available_detection, f_camera_1_gt_more_than_1nm)
                #f_camera_2 = torch.logical_and(f_camera_2_available_detection, f_camera_2_gt_more_than_1nm)
                
                
                camera_1_det = camera_1_det[f_camera_all_available_detection].float()
                camera_2_det = camera_2_det[f_camera_all_available_detection].float()
    

                camera_1_dets.append(camera_1_det[:, :4])
                camera_1_ys.append(camera_1_det[:, -2:])
                camera_2_dets.append(camera_2_det[:, :4])
                camera_2_ys.append(camera_2_det[:, -2:])
                
    camera_1_y = torch.cat(camera_1_ys).to(device)
    camera_2_y = torch.cat(camera_2_ys).to(device)      
    camera_1_dets = torch.cat(camera_1_dets)
    camera_2_dets = torch.cat(camera_2_dets)
    
    # Convert to (X_centroid, Y_centroid, X_width, Y_width)
    camera_1_x = torch.zeros(camera_1_dets[:, :4].shape, device = device)
    camera_2_x = torch.zeros(camera_2_dets[:, :4].shape, device = device)
    camera_1_x[:, 0] = (camera_1_dets[:, 0] + camera_1_dets[:, 2]) / 2
    camera_1_x[:, 1] = (camera_1_dets[:, 1] + camera_1_dets[:, 3]) / 2
    camera_1_x[:, 2] = (camera_1_dets[:, 2] - camera_1_dets[:, 0])
    camera_1_x[:, 3] = (camera_1_dets[:, 3] - camera_1_dets[:, 1])
    camera_2_x[:, 0] = (camera_2_dets[:, 0] + camera_2_dets[:, 2]) / 2
    camera_2_x[:, 1] = (camera_2_dets[:, 1] + camera_2_dets[:, 3]) / 2
    camera_2_x[:, 2] = (camera_2_dets[:, 2] - camera_2_dets[:, 0])
    camera_2_x[:, 3] = (camera_2_dets[:, 3] - camera_2_dets[:, 1])
    
    camera_1_x = (camera_1_x - offset[0]) / divider[0]
    camera_2_x = (camera_2_x - offset[1]) / divider[1]
    
    print(f"""Data Statistic:
          Camera 1:
          Mean: {torch.mean(camera_1_x, 0).tolist()}
          Std : {torch.std(camera_1_x, 0).tolist()}
          
          Camera 2:
          Mean: {torch.mean(camera_2_x, 0).tolist()}
          Std : {torch.std(camera_2_x, 0).tolist()}
          """)
    return camera_1_x, camera_1_y, camera_2_x, camera_2_y

In [5]:
# Data Loading & Removing Outlier
norm_offset = [[20.7227, 271.4375,   9.0000,   5.5000], [5.3496, 248.5000,  10.1094,   4.5000]]
norm_divider = [[1894.2773,  798.0625,  174.2500,   67.5000], [1908.6504,  396.5000,   99.8906,   42.5000]]
train_camera_1_x, train_camera_1_y, train_camera_2_x, train_camera_2_y = load_compiled_data_custom(ts=[1,2,5],ws=[1],rs=[1,2], offset = norm_offset, divider = norm_divider, device = device)
test_camera_1_x, test_camera_1_y, test_camera_2_x, test_camera_2_y = load_compiled_data_custom(ts=[1,2,5],ws=[1],rs=[3], offset = norm_offset, divider = norm_divider, device = device)
ff_train = torch.logical_and((train_camera_1_x[:, 1] < 0.75), (train_camera_2_x[:, 1] < 1.5))
ff_test = torch.logical_and((test_camera_1_x[:, 1] < 0.75), (test_camera_2_x[:, 1] < 1.5))
train_camera_1_x = train_camera_1_x[ff_train]
train_camera_1_y = train_camera_1_y[ff_train]
train_camera_2_x = train_camera_2_x[ff_train]
train_camera_2_y = train_camera_2_y[ff_train]
test_camera_1_x = test_camera_1_x[ff_test]
test_camera_1_y = test_camera_1_y[ff_test]
test_camera_2_x = test_camera_2_x[ff_test]
test_camera_2_y = test_camera_2_y[ff_test]
train_2cam_x = torch.stack((reg_camera_1(train_camera_1_x), reg_camera_2(train_camera_2_x)), axis = 1).detach()
test_2cam_x = torch.stack((reg_camera_1(test_camera_1_x), reg_camera_2(test_camera_2_x)), axis = 1).detach()
train_2cam_y = train_camera_1_y
test_2cam_y = test_camera_1_y

train_camera_1_x, train_camera_1_y, train_camera_2_x, train_camera_2_y = load_compiled_data(ts=[1,2,5],ws=[1],rs=[1,2], offset = norm_offset, divider = norm_divider, device = device)
test_camera_1_x, test_camera_1_y, test_camera_2_x, test_camera_2_y = load_compiled_data(ts=[1,2,5],ws=[1],rs=[3], offset = norm_offset, divider = norm_divider, device = device)
ff_train_1 = train_camera_1_x[:, 1] < 0.75
ff_train_2 = train_camera_2_x[:, 1] < 1.5
ff_test_1 = test_camera_1_x[:, 1] < 0.75
ff_test_2 = test_camera_2_x[:, 1] < 1.5
train_camera_1_x = train_camera_1_x[ff_train_1]
train_camera_1_y = train_camera_1_y[ff_train_1]
train_camera_2_x = train_camera_2_x[ff_train_2]
train_camera_2_y = train_camera_2_y[ff_train_2]
test_camera_1_x = test_camera_1_x[ff_test_1]
test_camera_1_y = test_camera_1_y[ff_test_1]
test_camera_2_x = test_camera_2_x[ff_test_2]
test_camera_2_y = test_camera_2_y[ff_test_2]
train_1cam_x = torch.cat((reg_camera_1(train_camera_1_x), reg_camera_2(train_camera_2_x))).unsqueeze(1).detach()
test_1cam_x = torch.cat((reg_camera_1(test_camera_1_x), reg_camera_2(test_camera_2_x))).unsqueeze(1).detach()
train_1cam_y = torch.cat((train_camera_1_y, train_camera_2_y))
test_1cam_y = torch.cat((test_camera_1_y, test_camera_2_y))

Data Statistic:
          Camera 1:
          Mean: [0.23115402460098267, 0.13664153218269348, 0.3269054591655731, 0.23455950617790222]
          Std : [0.21913321316242218, 0.10741851478815079, 0.20001117885112762, 0.20704865455627441]
          
          Camera 2:
          Mean: [0.28849586844444275, 0.3268456757068634, 0.3711751103401184, 0.2691517174243927]
          Std : [0.26450350880622864, 0.19246098399162292, 0.23262810707092285, 0.2345651388168335]
          
Data Statistic:
          Camera 1:
          Mean: [0.24771815538406372, 0.10636551678180695, 0.3369009792804718, 0.24522686004638672]
          Std : [0.23109997808933258, 0.09466056525707245, 0.20359617471694946, 0.21714016795158386]
          
          Camera 2:
          Mean: [0.30855682492256165, 0.2939678430557251, 0.3898959457874298, 0.29688432812690735]
          Std : [0.2778509855270386, 0.2029714286327362, 0.24246175587177277, 0.25780919194221497]
          
Data Statistic:
          Camera 1:
          

In [6]:
sequential_de = LSTM_DE().cuda()
sequential_de.train()

LSTM_DE(
  (LSTM): LSTM(256, 256, num_layers=2, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=1, bias=True)
    (3): ReLU()
  )
)

In [128]:
learning_rate = 1e-4
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(sequential_de.parameters(), lr=learning_rate, momentum = 0.9)

In [88]:
# Shallow 1 Camera Training
sequential_de.train()
losses = []
tlosses = []
epochs = tqdm.tqdm(iterable=range(700000), leave=True)
for epoch in epochs:
    optimizer.zero_grad()
    # camera_1
    loss = criterion(sequential_de(train_1cam_x).squeeze(), train_1cam_y[:, 0]) * 1000
    loss.backward()
    optimizer.step()

    if epoch % 500 == 0:
        sequential_de.eval()
        ypred = sequential_de(test_1cam_x).squeeze()
        tloss = criterion(ypred, test_1cam_y[:, 0]) * 1000
        
        display.clear_output(wait=True)
        display.display(epochs.container)
        print("==========================\nEpoch {}".format(epoch))
        print(f'Train Loss   : {loss.item()}')
        print(f'Test Loss    : {tloss.item()}')
        print(f'Test MAPE    : {((abs(ypred-test_1cam_y[:, 0])/ypred).mean() * 100).item():.2f}%')
        print("==========================\n")
        losses.append([loss.item()])
        tlosses.append([tloss.item()])
        sequential_de.train()

  0%|          | 996/700000 [00:21<4:11:38, 46.30it/s]

Epoch 1000
Train Loss   : 0.007975622080266476
Test Loss    : 0.004990849178284407
Test MAPE    : 0.24%



KeyboardInterrupt: 

In [126]:
torch.save(sequential_de.state_dict(), 'models/head/sequential_de.pt')

In [84]:
sequential_de.load_state_dict(torch.load('models/head/sequential_de.pt'))

<All keys matched successfully>

# Dropout Training

In [11]:
sequential_de = LSTM_DE().cuda()
sequential_de.train()
tlosses_deep = []
tlosses_shallow = []

In [13]:
learning_rate = 1e-4
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(sequential_de.parameters(), lr=learning_rate)

In [14]:
od_dataset = ObjectDetectorDataset(train_2cam_x, train_2cam_y)
od_dataloader = DataLoader(od_dataset, batch_size = 32, shuffle=True)

torch.Size([26676, 2, 256]) torch.Size([26676, 2])


In [15]:
# Deep 2 Camera Training
sequential_de.train()
epochs = tqdm.tqdm(iterable=range(8000), leave=True)
for epoch in epochs:
    for train_minibatch_x, train_minibatch_y in od_dataloader:
        optimizer.zero_grad()
        
        drop_choice = random.randint(0,3)
        
        if drop_choice == 0:
            train_minibatch_x = train_minibatch_x[:, 0:1, :]
        elif drop_choice == 1:
            train_minibatch_x = train_minibatch_x[:, 1:, :]
        else:
            pass
        ypred = sequential_de(train_minibatch_x)[:, -1].squeeze()
        loss = criterion(ypred, train_minibatch_y[:, 0]) * 10
        loss.backward()
        optimizer.step()
        
    if epoch % 1 == 0:
        sequential_de.eval()
        ypred_shallow = sequential_de(test_1cam_x).squeeze()
        tloss_shallow = criterion(ypred_shallow, test_1cam_y[:, 0]) * 10
        ypred_deep = sequential_de(test_2cam_x).squeeze()[:, 1]
        tloss_deep = criterion(ypred_deep, test_2cam_y[:, 0]) * 10
        
        test_deep_mape = ((abs(ypred_deep-test_2cam_y[:, 0])/ypred_deep).mean() * 100).item()
        test_shallow_mape = ((abs(ypred_shallow-test_1cam_y[:, 0])/ypred_shallow).mean() * 100).item()
        display.clear_output(wait=True)
        display.display(epochs.container)
        print("==========================\nEpoch {}".format(epoch))
        print(f'Train Loss           : {loss.item()}\n')
        
        print(f'Test Shallow Loss    : {tloss_shallow.item()}')
        print(f'Test Deep Loss       : {tloss_deep.item()}\n')
        
        print(f'Test Shallow MAPE    : {test_shallow_mape:.2f}%')
        print(f'Test Deep MAPE       : {test_deep_mape:.2f}%')
        
        print("==========================\n")
        
        tlosses_deep.append(test_deep_mape)
        tlosses_shallow.append(test_shallow_mape)
        sequential_de.train()

  0%|          | 2/8000 [00:04<3:13:53,  1.45s/it]

Epoch 2
Train Loss           : 8.695456926943734e-05

Test Shallow Loss    : 6.014377868268639e-05
Test Deep Loss       : 3.481760359136388e-05

Test Shallow MAPE    : 0.29%
Test Deep MAPE       : 0.22%



KeyboardInterrupt: 

In [27]:
torch.save(sequential_de.state_dict(), 'models/head/sequential_de_random_training.pt')

In [12]:
sequential_de.load_state_dict(torch.load('models/head/sequential_de_random_training.pt'))

<All keys matched successfully>

# Old OD

In [30]:
norm_offset = [[20.7227, 271.4375,   9.0000,   5.5000], [5.3496, 248.5000,  10.1094,   4.5000]]
norm_divider = [[1894.2773,  798.0625,  174.2500,   67.5000], [1908.6504,  396.5000,   99.8906,   42.5000]]
train_camera_1_x, train_camera_1_y, train_camera_2_x, train_camera_2_y = load_compiled_data_custom(ts=[1,2,5],ws=[1],rs=[1,2], offset = norm_offset, divider = norm_divider, device = device)
test_camera_1_x, test_camera_1_y, test_camera_2_x, test_camera_2_y = load_compiled_data_custom(ts=[1,2,5],ws=[1],rs=[3], offset = norm_offset, divider = norm_divider, device = device)
ff_train = torch.logical_and((train_camera_1_x[:, 1] < 0.75), (train_camera_2_x[:, 1] < 1.5))
ff_test = torch.logical_and((test_camera_1_x[:, 1] < 0.75), (test_camera_2_x[:, 1] < 1.5))
train_camera_1_x = train_camera_1_x[ff_train]
train_camera_1_y = train_camera_1_y[ff_train]
train_camera_2_x = train_camera_2_x[ff_train]
train_camera_2_y = train_camera_2_y[ff_train]
test_camera_1_x = test_camera_1_x[ff_test]
test_camera_1_y = test_camera_1_y[ff_test]
test_camera_2_x = test_camera_2_x[ff_test]
test_camera_2_y = test_camera_2_y[ff_test]
train_2cam_x = torch.cat((train_camera_1_x, train_camera_2_x), axis = 1).detach()
test_2cam_x = torch.cat((test_camera_1_x, test_camera_2_x), axis = 1).detach()
train_2cam_y = train_camera_1_y
test_2cam_y = test_camera_1_y

Data Statistic:
          Camera 1:
          Mean: [0.23115402460098267, 0.13664153218269348, 0.3269054591655731, 0.23455950617790222]
          Std : [0.21913321316242218, 0.10741851478815079, 0.20001117885112762, 0.20704865455627441]
          
          Camera 2:
          Mean: [0.28849586844444275, 0.3268456757068634, 0.3711751103401184, 0.2691517174243927]
          Std : [0.26450350880622864, 0.19246098399162292, 0.23262810707092285, 0.2345651388168335]
          
Data Statistic:
          Camera 1:
          Mean: [0.24771815538406372, 0.10636551678180695, 0.3369009792804718, 0.24522686004638672]
          Std : [0.23109997808933258, 0.09466056525707245, 0.20359617471694946, 0.21714016795158386]
          
          Camera 2:
          Mean: [0.30855682492256165, 0.2939678430557251, 0.3898959457874298, 0.29688432812690735]
          Std : [0.2778509855270386, 0.2029714286327362, 0.24246175587177277, 0.25780919194221497]
          


In [40]:
model = mlp_dual().cuda()

In [46]:
learning_rate = 1e-4
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [51]:
# old od training
sequential_de.train()
losses = []
tlosses = []
epochs = tqdm.tqdm(iterable=range(700000), leave=True)
for epoch in epochs:
    optimizer.zero_grad()
    # camera_1
    ypred = model(train_2cam_x).squeeze()
    loss = criterion(ypred, train_2cam_y[:, 0]) * 100
    loss.backward()
    optimizer.step()

    if epoch % 500 == 0:
        sequential_de.eval()
        ypred = model(test_2cam_x).squeeze()
        tloss = criterion(ypred, test_2cam_y[:, 0]) * 100
        
        display.clear_output(wait=True)
        #display.display(epochs.container)
        print("==========================\nEpoch {}".format(epoch))
        print(f'Train Deep Loss      : {loss.item()}\n')
        print(f'Test Deep Loss       : {tloss.item()}\n')
        print(f'Test Deep MAPE       : {((abs(ypred-test_2cam_y[:, 0])/ypred).mean() * 100).item():.2f}%')
        
        print("==========================\n")
        #losses.append([loss.item()])
        #tlosses.append([tloss.item()])
        sequential_de.train()

Epoch 1000
Train Deep Loss      : 0.0008542968425899744

Test Deep Loss       : 0.0006653121090494096

Test Deep MAPE       : 0.28%



KeyboardInterrupt: 