In [None]:
import os
import time
import kthread
import threading
import env_vars
import csv
import random
import ast
import helpers
import torch as T
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset 
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn import preprocessing
import copy
from tqdm import tqdm


In [4]:
configs = None

# configs

with open(os.path.join(env_vars.PROJ_HOME, 'src/VSM/parser/output_copter.csv')) as csv_file:
    configs = {}
    file_data = csv.reader(csv_file, delimiter=',')
    headers = next(file_data)
    for row in file_data:
        row_dict = dict(zip(headers, row))

        configs[row[0]] = {
            'name': row_dict['Name'],
            'description': row_dict['Description'],
            'range': [] if row_dict['Range'] == '' else  row_dict['Range'].split(' '),
            'increment': None if row_dict['Increment'] == '' else float(row_dict['Increment']), 
            'value': [] if row_dict['Value'] == '' else  [v for v in row_dict['Value'].split('|') if v != '']
        }

rt1_5p_dir = '/home/anon/Documents/dronefuzzingresearch/results/5_params/A.RTL1/rand/'
rt1_10p_dir = '/home/anon/Documents/dronefuzzingresearch/results/10_params/A.RTL1/rand/'
rt1_15p_dir = '/home/anon/Documents/dronefuzzingresearch/results/15_params/A.RTL1/rand/'
rt1_20p_dir = '/home/anon/Documents/dronefuzzingresearch/results/20_params/A.RTL1/rand/'
rt2_5p_dir = '/home/anon/Documents/dronefuzzingresearch/results/5_params/A.RTL2/rand/'
rt2_10p_dir = '/home/anon/Documents/dronefuzzingresearch/results/10_params/A.RTL2/rand/'
rt2_15p_dir = '/home/anon/Documents/dronefuzzingresearch/results/15_params/A.RTL2/rand/'
rt2_20p_dir = '/home/anon/Documents/dronefuzzingresearch/results/20_params/A.RTL2/rand/'

Helper Functions

In [5]:
def normalize_log(field, value):
    if field in env_vars.state_ranges:
        min_val = float(env_vars.state_ranges[field]['min'])
        max_val = float(env_vars.state_ranges[field]['max'])
        return (value - min_val) / (max_val - min_val)
    else:
        return value
    
def normalize_param(param, value):
    min_val = float(configs[param]['range'][0])
    max_val = float(configs[param]['range'][1])
    # print('param', min_val, max_val)
    return ((value - min_val) / (max_val - min_val))

def one_hot_column(row, target_mode):
    if row['MODE'] == target_mode:
        return 1
    else:
        return 0

def extract_logs(dir, target_scenario):
    rand_iterations_dir = [d for d in os.listdir(dir) if os.path.isdir(dir + d)]
    sorted_rand_iterations = [d for d in sorted(rand_iterations_dir)]

    for iteration in sorted_rand_iterations:
        log_df = pd.read_csv(dir  + iteration + '/' + 'streamed_logs_' + iteration + '.csv', index_col=0)
        log_df = log_df[env_vars.simulation_vars[target_scenario]['state_cols']+['MODE']]

        # one-hot encode the mode (each mode has a column)
        for mode in env_vars.simulation_vars[target_scenario]['modes_vec']:
            # add column
            log_df["MODE_"+mode] = log_df.apply(lambda row: one_hot_column(row, mode), axis=1)
        log_df = log_df.drop(columns=['MODE'])

        log_df[sorted(log_df.columns, key=lambda x: x.lower())].to_csv(dir  + iteration + '/' + 'sorted_streamed_logs_' + iteration + '.csv')

        # params already sorted by top_k
        # param_df = pd.read_csv(dir  + iteration + '/' + 'params_records_' + iteration + '.csv', index_col=0)
        # param_df[sorted(param_df.columns)].to_csv(dir  + iteration + '/' + 'sorted_params_records_' + iteration + '.csv')
        # param_df.to_csv(dir  + iteration + '/' + 'sorted_params_records_' + iteration + '.csv')


Datasets

In [6]:
# extract_logs(rt1_5p_dir, 'A.RTL1')
# extract_logs(rt1_10p_dir, 'A.RTL1')
# extract_logs(rt1_15p_dir, 'A.RTL1')
# extract_logs(rt1_20p_dir, 'A.RTL1')
# extract_logs(rt2_5p_dir, 'A.RTL2')
# extract_logs(rt2_10p_dir, 'A.RTL2')
extract_logs(rt2_20p_dir, 'A.RTL2')
# extract_logs(rt2_15p_dir, 'A.RTL2')


Mine Extracted Logs for Training Data

In [7]:
def mine_datapoints(dir, scenario):

    iteration_state_params = {}
    rand_iterations_dir = [d for d in os.listdir(dir) if os.path.isdir(dir + d)]
    sorted_rand_iterations = [d for d in sorted(rand_iterations_dir)]

    for iteration in sorted_rand_iterations:
        
        vio_df = pd.read_csv(dir  + iteration + '/' + 'violation_scores_' + iteration + '.csv', index_col=0)
        params_df = pd.read_csv(dir  + iteration + '/' + 'params_records_' + iteration + '.csv', index_col=0)
        log_df = pd.read_csv(dir  + iteration + '/' + 'sorted_streamed_logs_' + iteration + '.csv', index_col='time_stamp')

        iteration_state_params[iteration] = []

        log_pos = 0
        vio_pos = 0

        try:

            for param_ts in params_df.index:

                vio_score = 0

                while True:
                    if vio_pos >= len(vio_df.index):
                        break
                    curr_vio_time = vio_df.index[vio_pos]
                    if datetime.strptime(curr_vio_time, '%H:%M:%S.%f') > datetime.strptime(param_ts, '%H:%M:%S.%f'):
                        
                        # vio_score = vio_df.loc
                        rows = vio_df[vio_pos: vio_pos+15]
                        violations = rows[scenario].tolist()
                        vio_score = max(violations)

                        break
                    else:
                        vio_pos += 1

                while True:
                    if log_pos >= len(log_df.index):
                        break
                    curr_log_time = log_df.index[log_pos]
                    if datetime.strptime(curr_log_time, '%H:%M:%S.%f') > datetime.strptime(param_ts, '%H:%M:%S.%f'):
                        if log_pos < 5:
                            break

                        new_data_point = {
                            'param_set_time': param_ts,
                            'params_set': params_df.loc[param_ts].to_dict(),
                            'this_state': log_df.iloc[log_pos].to_dict(),
                            'next_state': log_df.iloc[log_pos+1].to_dict(),
                            'violation': vio_score
                        }

                        normalized_data_point = {
                            'param_set_time': param_ts,
                            'params_set': [normalize_param(p, new_data_point['params_set'][p]) for p in new_data_point['params_set']],
                            'this_state': [normalize_log(l, new_data_point['this_state'][l]) for l in new_data_point['this_state']],
                            'next_state': [normalize_log(l, new_data_point['next_state'][l]) for l in new_data_point['next_state']],
                            'violation': vio_score
                        }
                                
                        # iteration_state_params[iteration].append(new_data_point)
                        iteration_state_params[iteration].append(normalized_data_point)
                        break
                    else:
                        log_pos += 1

        except Exception as e:
            print(e)

    return iteration_state_params

In [8]:
# rt1_5p_datapoints = mine_datapoints(rt1_5p_dir, 'A.RTL1')
# rt1_10p_datapoints = mine_datapoints(rt1_10p_dir, 'A.RTL1')
# rt1_15p_datapoints = mine_datapoints(rt1_15p_dir, 'A.RTL1')
# rt1_20p_datapoints = mine_datapoints(rt1_20p_dir, 'A.RTL1')
# rt2_5p_datapoints = mine_datapoints(rt2_5p_dir, 'A.RTL2')
# rt2_10p_datapoints = mine_datapoints(rt2_10p_dir, 'A.RTL2')
rt2_20p_datapoints = mine_datapoints(rt2_20p_dir, 'A.RTL2')
# rt2_15p_datapoints = mine_datapoints(rt2_15p_dir, 'A.RTL2')

Train Surrogate Model for New State Prediction

In [9]:
# lstm to train regression problem
class Model(nn.Module):
    def __init__(self, lr, input_dims, output_dims) -> None:
        super(Model, self).__init__()
        self.lr = lr
        self.input_dims = input_dims
        self.output_dims = output_dims

        self.fc1 = nn.Linear(self.input_dims, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, self.output_dims)

        self.optimizer = optim.Adam(self.parameters(), lr = self.lr)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')

        self.to(self.device)

        self.fc1.weight.data.normal_(0, 1e-1)
        self.fc2.weight.data.normal_(0, 1e-1)
        self.fc3.weight.data.normal_(0, 1e-2)
        self.fc1.bias.data.normal_(0, 1e-1)
        self.fc2.bias.data.normal_(0, 1e-1)
        self.fc3.bias.data.normal_(0, 1e-2)

    def forward(self, state):

        fc1_out = nn.functional.sigmoid(self.fc1(state))
        fc2_out = nn.functional.sigmoid(self.fc2(fc1_out))
        fc3_out = nn.functional.sigmoid(self.fc3(fc2_out))
        fc4_out = nn.functional.sigmoid(self.fc4(fc3_out))

        return fc4_out

    def save_checkpoint(self, chkpt_path):
        T.save(self.state_dict(), chkpt_path)

    def load_checkpoint(self, chkpt_path):
        self.load_state_dict(T.load(chkpt_path))

In [10]:
class StateDataset(Dataset):

    def __init__(self, iterations):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data = []

        for iteration in iterations:
            self.data += iterations[iteration]

        self.data = np.array(self.data)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        states_to_appended = T.tensor(self.data[idx]['this_state'], dtype=T.float)
        params_to_append = T.tensor(self.data[idx]['params_set'], dtype=T.float)
        
        x = T.concat((states_to_appended, params_to_append), dim=0)
        # y = T.tensor(self.data[idx]['next_state'], dtype=T.float)
        y = T.tensor(self.data[idx]['violation'], dtype=T.float)
        return x, y


In [11]:
# rtl1_5p_dataset = StateDataset(rt1_5p_datapoints)
# rtl1_10p_dataset = StateDataset(rt1_10p_datapoints)
# rtl1_15p_dataset = StateDataset(rt1_15p_datapoints)
# rtl1_20p_dataset = StateDataset(rt1_20p_datapoints)
# rtl2_5p_dataset = StateDataset(rt2_5p_datapoints)
# rtl2_10p_dataset = StateDataset(rt2_10p_datapoints)
# rtl2_15p_dataset = StateDataset(rt2_15p_datapoints)
rtl2_20p_dataset = StateDataset(rt2_20p_datapoints)


### Surrogate Model

In [12]:
def surrogate_model(dataset, scenario, params):
    train_ds, test_ds = T.utils.data.random_split(dataset, (round(len(dataset)*.90),  len(dataset)-round(len(dataset)*.90)))
    train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
    test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)

    model = Model(
        0.0005, 
        len(env_vars.simulation_vars[scenario]['state_cols']) + len(env_vars.simulation_vars[scenario]['modes_vec']) + params, 
        1)
    mse = nn.MSELoss()

    i = 0
    print_every = 50
    plot_every = 75
    loss_history = []
    avg_train_loss = []
    val_loss_history = []

    print("Training", scenario, params, 'params')
    for epoch in tqdm(range(50)):
        model.optimizer.zero_grad()
        for states, new_states in train_dl:
            model.optimizer.zero_grad()
            pred_states = model(states)
            loss = mse(pred_states, new_states)
            loss.backward()
            model.optimizer.step()
            avg_train_loss.append(loss.item())
            if i % print_every == 0:
                loss_history.append(sum(avg_train_loss)/len(avg_train_loss))
                avg_train_loss = []
                val_loss = []
                for v_state_seqs, v_new_states in test_dl:
                    val_pred_states = model(v_state_seqs)
                    val_loss.append(mse(val_pred_states, v_new_states).item())
                val_loss_history.append(sum(val_loss)/len(val_loss))
            i += 1

    plt.figure()
    plt.plot([i for i in range(len(loss_history))], loss_history, label='Training Loss')
    plt.plot([i for i in range(len(loss_history))], val_loss_history, label='Validation Loss')
    plt.legend()
    plt.show()
    plt.clf()

    model.save_checkpoint(env_vars.MODELS + scenario + '_' + str(params) + 'params')

In [None]:
# surrogate_model(rtl1_5p_dataset, 'A.RTL1', 5)
# surrogate_model(rtl1_10p_dataset, 'A.RTL1', 10)
# surrogate_model(rtl1_15p_dataset, 'A.RTL1', 15)
# surrogate_model(rtl1_20p_dataset, 'A.RTL1', 20)
# surrogate_model(rtl2_5p_dataset, 'A.RTL2', 5)
# surrogate_model(rtl2_10p_dataset, 'A.RTL2', 10)
# surrogate_model(rtl2_15p_dataset, 'A.RTL2', 15)
surrogate_model(rtl2_20p_dataset, 'A.RTL2', 20)