In [None]:
!rm -r sample_data

!pip install wandb
!pip install torchmetrics

!git clone https://github.com/benjamin32561/Cloud-Wise-ML.git

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
import os
sys.path.append(os.path.abspath('/content/Cloud-Wise-ML/training/Classifier'))

In [None]:
import torch
import wandb
import pandas as pd
import numpy as np
import common_constants as cc
import common_functions as cf
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from copy import deepcopy
from random import randint
from torchmetrics.classification import BinaryPrecision, BinaryRecall, BinaryAccuracy

In [None]:
class LinearAutoEncoder(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		self.encoder = torch.nn.Sequential()
		
		self.decoder = torch.nn.Sequential()

	def forward(self, x):
		x = torch.flatten(x, start_dim=1)
		x = self.encoder(x)
		x = self.decoder(x)
		return x

In [None]:
class LinearClassifier(torch.nn.Module):
	def __init__(self,input_layer_size,backbone_location=None):
		super().__init__()
		
		if backbone_location==None:
			self.backbone = torch.nn.Sequential(
				torch.nn.Linear(input_layer_size, 1024),
				torch.nn.ReLU(),
				torch.nn.Linear(1024, 512),
				torch.nn.ReLU(),
			)
		else:
			self.backbone = torch.load(backbone_location).encoder
		
		self.classification_head = torch.nn.Sequential(
			torch.nn.Linear(self.GetLastLayerOutput(), 128),
			torch.nn.ReLU(),
			# torch.nn.Linear(256, 128),
			# torch.nn.ReLU(),
			# torch.nn.Linear(256, 64),
			# torch.nn.ReLU(),
			torch.nn.Linear(128, 1),
			torch.nn.Sigmoid()
		)

	def BackboneMode(self,train=False):
		self.backbone.train(train)

	def forward(self, x):
		x = torch.flatten(x, start_dim=1)
		x = self.backbone(x)
		x = self.classification_head(x)
		return x
	
	def GetLastLayerOutput(self):
		layers = self.backbone.children()
		n_layers = 0
		for layer in layers:
			n_layers+=1

		layers = self.backbone.children()
		idx = 0
		for layer in layers:
			if idx==n_layers-2:
				return layer.out_features
			idx+=1

In [None]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
WANDB_PROJECT_NAME = "Classifier"
RUN_NAME = "2_3"

INPUT_LAYER_SIZE = 2031

EPOCHS = 20
START_FROM = 0

SHOW_N_TESTS = 10

TRAIN_BATCH_SIZE = 1
TEST_BATCH_SIZE = 4

BACKBONE_LOCATION = "/content/drive/MyDrive/ML/training/AutoEncoder/LinearModel/2/models/best.pt" #None

In [None]:
!cp -r /content/drive/MyDrive/ML/accident_or_not_dataset /content/accident_or_not_dataset

In [None]:
%cd /content/Cloud-Wise-ML/data_analysis

!python save_xlsx_paths.py --folder_class_dict_file /content/Cloud-Wise-ML/training/Classifier/folder_class_dict.txt --txt_file_location /content/record_class.txt

!python split_txt.py --txt_file_location /content/record_class.txt

In [None]:
cf.CreatePath(cc.MODELS_PATH)

In [None]:
wandb.init(project=WANDB_PROJECT_NAME,name=RUN_NAME) #d2ea8beb067a044208ad55aa1b7e888b30b7bf22

In [None]:
train_dataset = cf.ClassifierAcceleratorDataset(cc.TRAIN_TXT_PATH)
train_dataloader = DataLoader(train_dataset,batch_size=TRAIN_BATCH_SIZE,shuffle=True)
test_dataset = cf.ClassifierAcceleratorDataset(cc.TEST_TXT_PATH)
test_dataloader = DataLoader(test_dataset,batch_size=TEST_BATCH_SIZE,shuffle=True)

In [None]:
sub_project_path = os.path.join(cc.MODELS_PATH,'LinearModel')
cf.CreatePath(sub_project_path)
sub_model_path = os.path.join(sub_project_path,RUN_NAME)
cf.CreatePath(sub_model_path)
models_path = os.path.join(sub_model_path,'models')
cf.CreatePath(models_path)

model = LinearClassifier(INPUT_LAYER_SIZE,BACKBONE_LOCATION).to(DEVICE)
loss_func = torch.nn.BCELoss()
optimizer = Adam(model.parameters()) #SGD(model.parameters())

In [None]:
best_val = -1
best_model = -1
precision = BinaryPrecision().to(DEVICE)
recall = BinaryRecall().to(DEVICE)
accuracy = BinaryAccuracy().to(DEVICE)
for epoch in range(START_FROM, EPOCHS):
    #train
    model.train()
    model.BackboneMode()
    epoch_data = []
    n_batches = len(train_dataloader)
    for idx, data in enumerate(train_dataloader):
        optimizer.zero_grad()
        x,y = data
        x = x.to(torch.float32).to(DEVICE)
        y = y.to(torch.float32).to(DEVICE)

        prediction = model(x)
    
        loss = loss_func(prediction,y)
        epoch_data.append(float(loss))

        loss.backward()
        optimizer.step()

        print("",end='\rEpoch: {}/{} | Batch: {}/{} | loss: {}'.format(epoch,EPOCHS,idx,n_batches,np.mean(epoch_data)))
    epoch_loss = np.mean(epoch_data)
    del epoch_data

    #evaluating
    model.eval()
    optimizer.zero_grad()
    val_data = []
    class_labels = torch.tensor([]).to(DEVICE)
    class_predictions = torch.tensor([]).to(DEVICE)
    for idx, data in enumerate(test_dataloader):
        x,y = data
        x = x.to(torch.float32).to(DEVICE)
        y = y.to(torch.float32).to(DEVICE)

        prediction = model(x)
        rounded_predictions = torch.round(prediction)

        class_labels = torch.cat((class_labels,y[:,0]))
        class_predictions = torch.cat((class_predictions,rounded_predictions[:,0]))

        loss = loss_func(prediction,y)
        val_data.append(float(loss))
        
    val_loss = np.mean(val_data)
    val_pre = float(precision(class_predictions,class_labels)) #tp/(tp+fp)
    val_rec = float(recall(class_predictions,class_labels)) #tp/(tp+fn)
    val_acc = float(accuracy(class_predictions,class_labels)) #n_times_correct/n_gusses
    del val_data

    print('\rEpoch: {}/{} | train_loss: {} | val_loss: {} | val_acc: {} | val_per: {} | val_rec: {}\n'.format(epoch,EPOCHS,epoch_loss,val_loss,val_acc,val_pre,val_rec))

    #updating best model so far
    val_met = np.mean([val_acc,val_pre,val_rec])
    if best_val==-1 or best_val<=val_met:
      best_val=val_met
      best_model = deepcopy(model)

    # wandb.log({
    #     "epoch_loss":epoch_loss,
    #     "epoch_val_loss":val_loss,
    #     "epoch_val_acc":val_acc,
    #     "epoch_val_per":val_pre,
    #     "epoch_val_rec":val_rec,
    #     "epoch_val_metrics_avg":val_met,
    # },sync=True,step=epoch)

last_path = os.path.join(models_path,'last.pt')
#torch.save(model,last_path)
best_path = os.path.join(models_path,'best.pt')
#torch.save(best_model,best_path)

print("max validation metrics avg: {}".format(best_val))

In [None]:
model = torch.load(os.path.join(models_path,'best.pt'))

In [None]:
n_test_sample = len(test_dataset)

for show_i in range(SHOW_N_TESTS):
  idx = randint(0,n_test_sample-1)

  df,class_label = test_dataset.__getitem__(idx,True)

  model_input = torch.tensor(df.to_numpy()[:,:-1]).unsqueeze(0).to(torch.float32).to(DEVICE)

  prediction = model(model_input)

  gt_path = os.path.join(sub_model_path,"{}_GT_{}_PRED_{}.jpg".format(show_i,round(float(prediction)),int(class_label)))
  cf.PlotRecordData(df,False,False,False,gt_path,False)

Saving model as ONNX

In [None]:
x,_ = test_dataset[0]
x = np.expand_dims(x, axis=0)
dummy_input = torch.randn(x.shape).to(DEVICE)
onnx_path = os.path.join(models_path,'model.onnx')

In [None]:
model.eval() 

# Export the model   
torch.onnx.export(model,         # model being run 
      dummy_input,       # model input (or a tuple for multiple inputs) 
      onnx_path,       # where to save the model  
      export_params=True,  # store the trained parameter weights inside the model file 
      opset_version=10,    # the ONNX version to export the model to 
      do_constant_folding=True,  # whether to execute constant folding for optimization 
      input_names = ['modelInput'],   # the model's input names 
      output_names = ['modelOutput'], # the model's output names 
      dynamic_axes={'modelInput' : {0 : 'batch_size'},    # variable length axes 
                            'modelOutput' : {0 : 'batch_size'}})