In [None]:
!rm -r sample_data

!pip install wandb

!git clone https://github.com/benjamin32561/Cloud-Wise-ML.git

In [None]:
%cd /content/Cloud-Wise-ML
!git pull

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
import os
sys.path.append(os.path.abspath('/content/Cloud-Wise-ML/training/AutoEncoder'))

In [None]:
import torch
import wandb
import pandas as pd
import numpy as np
import common_constants as cc
import common_functions as cf
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from copy import deepcopy
from random import randint

In [None]:
class LinearAutoEncoder(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		self.encoder = torch.nn.Sequential(
			torch.nn.Linear(677*3, 1024),
			torch.nn.ReLU(),
			torch.nn.Linear(1024, 512),
			torch.nn.ReLU(),
			torch.nn.Linear(512, 256),
			torch.nn.ReLU(),
		)
		
		self.decoder = torch.nn.Sequential(
			torch.nn.Linear(256, 512),
			torch.nn.ReLU(),
			torch.nn.Linear(512, 1024),
			torch.nn.ReLU(),
			torch.nn.Linear(1024, 677*3)
		)

	def forward(self, x):
		x = torch.flatten(x, start_dim=1)
		x = self.encoder(x)
		x = self.decoder(x)
		return x

In [None]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
WANDB_PROJECT_NAME = "AutoEncoder"
RUN_NAME = "1"

EPOCHS = 500
START_FROM = 0

SHOW_N_TESTS = 5

TRAIN_BATCH_SIZE = 1
TEST_BATCH_SIZE = 4

In [None]:
!cp -r /content/drive/MyDrive/ML/accident_or_not_dataset /content/accident_or_not_dataset

In [None]:
%cd /content/Cloud-Wise-ML/data_analysis

!python save_xlsx_paths.py --folder_class_dict_file /content/Cloud-Wise-ML/training/AutoEncoder/folder_class_dict.txt --txt_file_location /content/record_class.txt

!python split_txt.py --txt_file_location /content/record_class.txt

In [None]:
cf.CreatePath(cc.MODELS_PATH)

In [None]:
train_dataset = cf.AEAcceleratorDataset(cc.TRAIN_TXT_PATH)
train_dataloader = DataLoader(train_dataset,batch_size=TRAIN_BATCH_SIZE,shuffle=True)
test_dataset = cf.AEAcceleratorDataset(cc.TEST_TXT_PATH)
test_dataloader = DataLoader(test_dataset,batch_size=TEST_BATCH_SIZE,shuffle=True)

In [None]:
wandb.init(project=WANDB_PROJECT_NAME,name=RUN_NAME) #d2ea8beb067a044208ad55aa1b7e888b30b7bf22

In [None]:
sub_project_path = os.path.join(cc.MODELS_PATH,'LinearModel')
cf.CreatePath(sub_project_path)
sub_model_path = os.path.join(sub_project_path,RUN_NAME)
cf.CreatePath(sub_model_path)
models_path = os.path.join(sub_model_path,'models')
cf.CreatePath(models_path)

model = LinearAutoEncoder().to(DEVICE)
loss_func = torch.nn.MSELoss()
optimizer = Adam(model.parameters()) #SGD(model.parameters())

In [None]:
best_val = -1
best_model = -1
for epoch in range(START_FROM, EPOCHS):
    #train
    model.train()
    epoch_loss = []
    n_batches = len(train_dataloader)
    for idx, data in enumerate(train_dataloader):
        optimizer.zero_grad()
        data = data.to(torch.float32).to(DEVICE)
        flattned_data = torch.flatten(data, start_dim=1)

        prediction = model(data)
    
        loss = loss_func(prediction,flattned_data)
        epoch_loss.append(float(loss))

        loss.backward()
        optimizer.step()

        print("",end='\rEpoch: {}/{} | Batch: {}/{} | loss: {}'.format(epoch,EPOCHS,idx,n_batches,np.mean(epoch_loss)))
    epoch_final_loss = np.mean(epoch_loss)
    del epoch_loss

    #evaluating
    model.eval()
    optimizer.zero_grad()
    validation_loss = []
    for idx, data in enumerate(test_dataloader):
        data = data.to(torch.float32).to(DEVICE)
        flattned_data = torch.flatten(data, start_dim=1)

        prediction = model(data)
        
        loss = loss_func(prediction,flattned_data)
        validation_loss.append(float(loss))
    validation_final_loss = np.mean(validation_loss)
    del validation_loss
    print('\rEpoch: {}/{} | train_loss: {} | val_loss: {}\n'.format(epoch,EPOCHS,epoch_final_loss,validation_final_loss))
    if validation_final_loss>0.1:
      validation_final_loss = 0.05
    if epoch_final_loss>0.1: #some epochs produce very bad model wich produce noise
      epoch_final_loss = 0.05

    if best_val==-1 or best_val>validation_final_loss:
      best_val=validation_final_loss
      best_model = deepcopy(model)
    
    wandb.log({
        "epoch_loss":epoch_final_loss,
        "epoch_validation_loss":validation_final_loss
    },sync=True,step=epoch)

last_path = os.path.join(models_path,'last.pt')
torch.save(model,last_path)
best_path = os.path.join(models_path,'best.pt')
torch.save(best_model,best_path)

print("min validation loss: {}".format(best_val))

In [None]:
model = torch.load(os.path.join(models_path,'best.pt'))
n_test_sample = len(test_dataset)

for show_i in range(SHOW_N_TESTS):
    idx = randint(0,n_test_sample-1)

    df = test_dataset.__getitem__(idx,True)

    model_input = torch.flatten(torch.tensor(df.to_numpy()[:,:-1]).unsqueeze(0), start_dim=1).to(torch.float32).to(DEVICE)

    prediction = model(model_input)

    prediction = prediction.view(1,677,3).detach().cpu().numpy()[0]

    new_df_data = {}
    for i,col in enumerate(df.columns):
        if i==3:
            new_df_data[col] = df[col]
            break
        new_df_data[col] = prediction[:,i]

    new_df = pd.DataFrame(new_df_data)

    gt_path = os.path.join(sub_model_path,"GT_{}.jpg".format(show_i))
    cf.PlotRecordData(df,False,False,False,gt_path,False)
    pred_path = os.path.join(sub_model_path,"Prediction_{}.jpg".format(show_i))
    cf.PlotRecordData(new_df,False,False,False,pred_path,False)