In [119]:
import tensorflow as tf
import os
import pandas as pd
import strym
from strym import strymread
import matplotlib.pyplot as plt
from pylab import rcParams
import strym.DBC_Read_Tools as dbc
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from collections import Counter
from tqdm import tqdm

In [120]:
def load_dataframes():
    '''Loads a dataframe from the designated CSV file.'''
    data_path = '../../libpanda/encoded_data/'
    dataframes = [strymread("raw_data/libpanda/2021_01_28/2021-01-28-19-15-41_2T3Y1RFV8KC014025_CAN_Messages.csv").dataframe]
    return dataframes

In [121]:
def convert_messages(df):
    '''Converts the messages from hexadecimal string values to integers.'''
    
    for i, x in enumerate(df.iloc[:, 3]):
        df.iloc[i, 3] = int(x, 16)
    return df

In [122]:
def convert_coords(df):
    '''Converts the GPS coordinates from absolute latitude and longitude to delta latitude and lonitude.'''
    lat = [0]
    long = [0]

    for i in range(len(df.iloc[:, 5]) - 1):
        lat.append(df.iloc[i+1, 5] - df.iloc[i, 5])
        long.append(df.iloc[i+1, 6] - df.iloc[i, 6])
    
    df.iloc[:, 5] = lat
    df.iloc[:, 6] = long
    
    return df

In [144]:
def filter_IDs(df):
    '''Filters out the IDs that are not relevant to the vehicle's location.'''
    irrelevant_filters = [1568, 1570]
    for i,row in df.iterrows():
        if row["MessageID"] in irrelevant_filters:
            try:
                df = df.drop(labels=i)
            except:
                pass

    return df

In [124]:
def to_histogram(interval, df, message_ids):
    '''Converts the dataframe to a histogram representation.'''
    for j, i_d in enumerate(message_ids):
            df = df.assign(i_d=0)
    
    for i, (current_index, row) in tqdm(enumerate(df.iterrows())):
        
        current_time = current_index.astype(float)*(10**9)
        
        time_range = df.between_time(pd.to_datetime(current_time - interval).time(), pd.to_datetime(current_time).time())
        
        c = Counter(time_range)
        
        for message_id in message_ids:
            df.iloc[i, 2] = c[message_id]
            
        if i % 2 == 0:
            print('converted', i, ' samples to histograms')
            
            
    return df

In [125]:
def to_np(df):
    '''Converts the dataframes to numpy arrays.'''
    data = df.to_numpy()

    return data

In [126]:
def window_data(data):
    '''Segments the data into chunks by sliding a window over the array.'''
    sequenceLength = 5

    stackedData = []

    # split can_data into subsampled sequences
    for i in range(data.shape[0]-sequenceLength):
        stackedData.append(data[i:i+sequenceLength])

    stackedData = np.array(stackedData)
    
    data = stackedData.reshape((stackedData.shape[0], stackedData.shape[1]*stackedData.shape[2]))
    
    return data

In [127]:
def split_data(data):
    '''Splits the data into training and testing data and labels.'''
    X = data[:][:4, 8:] # indices for data
    Y = data[:][5:7] # indices for labels
    
    kf = KFold(n_splits=10)
    
    Xtrain = []
    Xtest = []
    Ytrain = []
    Ytest = []
    
    for train_index, test_index in kf.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        Xtrain.append(x_train)
        Xtest.append(x_test)
        Ytrain.append(y_train)
        Ytest.append(y_test)
        
    Xtrain = np.concatenate(Xtrain)
    Xtest = np.concatenate(Xtest)
    Ytrain = np.concatenate(Ytrain)
    Ytest = np.concatenate(Ytest)
    
    return Xtrain, Ytrain, Xtest, Ytest

In [128]:
dataframes = load_dataframes()

[2021_07_27_14_28_33] (root) INFO: Vehicle model infered is toyota-rav4-2019


In [145]:
datalist = []
debug_num_samples = 50

messageIDs = []
for i, df in enumerate(dataframes):
    m = df.iloc[:, 3]
    if len(messageIDs) < len(m):
        messageIDs = m

for i, dataframe in enumerate(dataframes):
    print('starting dataframe', i+1)

    df = dataframe.iloc[:debug_num_samples]
    df_copy = df.copy(deep=False)
    

    df_copy = filter_IDs(df_copy).copy(deep=False)
    if not df.equals(df_copy):
        print('filtered ids')
    else:
        print('Dataframe is not being changed.')
    
    
#     df_copy = convert_messages(df_copy)
#     if not og_df.equals(df_copy):
#         print('converted messages')
#     else:
#         print('Dataframe is not being changed.')


#     df_copy = convert_coords(df_copy)
#     if not og_df.equals(df_copy):
#         print('converted coordinates')
#     else:
#         print('Dataframe is not being changed.')


#     df = to_histogram(3, df, messageIDs)
#     if not og_df.equals(df.copy):
#         print('converted to histogram')
#     else:
#         print('Dataframe is not being changed.)


starting dataframe 1
Dataframe is not being changed.


In [None]:
print([d.shape for d in datalist])
all_data = np.concatenate(datalist)
np.save('all_data.npy', all_data)

In [None]:
Xtrain, Ytrain, Xtest, Ytest = split_data(all_data)

In [None]:
print(list(messageIDs.keys()))