In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
from ipywidgets import widgets
from sklearn import preprocessing
from tensorflow import keras 
from tensorflow.keras import layers, regularizers,Model, utils
%matplotlib inline 
import matplotlib.pyplot as plt
import sklearn
from tensorflow.keras.callbacks import TensorBoard
from datetime import time
import plotly.express as px
from sklearn.model_selection import train_test_split
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import plotly.express as px
from scipy.ndimage.filters import gaussian_filter
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from livelossplot import PlotLossesKeras

from os import listdir
import os
#thanks to https://stackoverflow.com/users/218681/bernhard-kausler
def find_csv_filenames( path_to_dir, suffix=".csv" ):
    filenames = listdir(path_to_dir)
    return [ filename for filename in filenames if filename.endswith( suffix ) ]

In [None]:
drivePaths = find_csv_filenames(os.getcwd())
subsamplingPeriod = 32
sequenceLength = 16
smoothingSigma = 2

In [None]:
drivesWithLocation = []
drivesWithoutLocation = []
for drivePath in drivePaths:
    drive = pd.read_csv(drivePath)
    drive = drive.iloc[::subsamplingPeriod].copy(deep=True)
    driveWithoutLocation = drive.drop(columns=["Time", "Longitude", "Latitude"])
    drivesWithLocation.append(drive)
    drivesWithoutLocation.append(driveWithoutLocation)

In [None]:
normalizedDrives = []
for drive in drivesWithoutLocation:
    standard_scaler = preprocessing.StandardScaler()
    data_normalized = standard_scaler.fit_transform(drive)
    data_normalized = pd.DataFrame(data_normalized)
    normalizedDrives.append(data_normalized.to_numpy())

In [None]:
roughData = np.vstack(normalizedDrives[:])
print(roughData.shape)

In [None]:
for k in range(len(drivePaths)):
    print(drivePath[k])
    roughData = normalizedDrives[k]
    smoothData = -1*np.ones(shape=roughData.shape)
    features = range(roughData.shape[1])
    for curFeature in features:
        print("smoothing %s"%curFeature)

        smoothData[:,curFeature] = gaussian_filter(roughData[:,curFeature], sigma=smoothingSigma)#.rolling(window=sequenceLength, win_type='gaussian', center=True).mean(std=0.5)
    #     ewmaSmoothData[curFeature] = pd.DataFrame.ewm(data_normalized[curFeature], span=sequenceLength)

        #plot the original feature and the smoothed feature
        scatterData = pd.DataFrame({
        "index":range(roughData.shape[0]),
        "smoothData": smoothData[:,curFeature],
        #"ewmaSmoothData": ewmaSmoothData[curFeature],
        "originalData": roughData[:,curFeature],
        })
        scatterData.describe()

        fig1 = go.Figure()
    #     fig1.add_trace(go.Scatter(x=scatterData.index, y=scatterData.ewmaSmoothData, name="ewma smoothed data"))
        fig1.add_trace(go.Scatter(x=scatterData.index, y=scatterData.smoothData, name="gaussian smoothed data"))
        fig1.add_trace(go.Scatter(x=scatterData.index, y=scatterData.originalData, name="original data"))

        fig1.show()
    
    
#

In [None]:
features = ['Speed',
            'LatAcceleration',
            'LongAcceleration',
            'SteerTorque',
            'SteerRate',
            'SteerAngle',
            'FLWheelSpeed',
            'FRWheelSpeed',
            'RRWheelSpeed',
            'RLWheelSpeed']
windowedDrives = []

stackedData = []
# split can_data into subsampled sequences
for drive in normalizedDrives:
    for i in range(smoothData.shape[0]-sequenceLength):
        stackedData.append(smoothData[i:i+sequenceLength,:])
    stackedData = np.array(stackedData)
    windowedDrives.append(stackedData)

In [None]:
dataset = {"samples":[], "labels":[]}
for k,drive in enumerate(windowedDrives):
    for i,window in enumerate(drive[:(-1-sequenceLength-1)]):
        last = drivesWithLocation[k].iloc[i]
        lastLong = last.Longitude
        lastLat = last.Latitude
        cur = drivesWithLocation[k].iloc[i+sequenceLength]
        curLong = cur.Longitude
        curLat = cur.Latitude
        
        dataset["samples"].append(window)
        dataset["labels"].append([curLong - lastLong, curLat - lastLat])
data = np.array(dataset["samples"][:])
labels = np.array(dataset["labels"][:])

print(data.shape)
print(labels.shape)

In [None]:
#normalize the labels
rawLabels = np.copy(labels)

scaler = preprocessing.MinMaxScaler()
labels = scaler.fit_transform(rawLabels)



In [None]:
import tfMI

#parameters
numberOfEDASamples = 100

edaSubset, edaLatLabels, edaLongLabels = data[:numberOfEDASamples, :, :], labels[:numberOfEDASamples,1], labels[:numberOfEDASamples, 0]

#estimate mutual information between windows and lattitude/longitudes
flattenedEDAData = edaSubset.reshape((edaSubset.shape[0], edaSubset.shape[1]*edaSubset.shape[2]))
print(flattenedEDAData.shape)
print(edaLatLabels.shape)

In [None]:
latInfo = tfMI.callMIGPU(flattenedEDAData, edaLatLabels.reshape(-1,1), 
                           alpha=1.01)
longInfo = tfMI.callMIGPU(flattenedEDAData, edaLongLabels.reshape(-1,1), 
                           alpha=1.01)

print(latInfo)
print(longInfo)

#iterate over features and calculate each feature's MI with labels:

In [None]:
#split the data
longTrainInputs, longTestInputs, longTrainLabels, longTestLabels = train_test_split(data, labels[:,0], test_size=0.5, shuffle=False)
latTrainInputs, latValInputs, latTrainLabels, latValLabels = train_test_split(data, labels[:,1], test_size=0.5, shuffle=False)

In [None]:
print(longTrainInputs.shape)

In [None]:
#define our activation
def clipping_relu(x, alpha=1.1):
            # pass through relu
            # y = K.relu(y, max_value=1)
            return tf.clip_by_value(tf.nn.elu(x),
                                            tf.constant(-1.0),
                                            tf.constant(alpha))

#specify input dimensionality
numberOfSamples = longTrainInputs.shape[0]
numberOfChannels = longTrainInputs.shape[2]
outputDimension = 1
dropoutRate = 0.33
#construct our neural network
hiddenLayerSizes = [32, 64, 128]

#set up our input layer
inputLayer = layers.Input(shape=(sequenceLength,numberOfChannels))

#set up our hidden layers
curLayer = 0
previousLayer = inputLayer
for curLayerSize in hiddenLayerSizes:
    previousLayer = layers.Conv1D(curLayerSize, int(np.ceil(sequenceLength/3)),
                                  activation=clipping_relu,
                                  padding='same',
                                  kernel_regularizer=regularizers.l2(0.0001),
#                                   activity_regularizer=regularizers.l2(0.001),
                                  name=str(curLayer)
                                 )(previousLayer)
#     previousLayer = layers.BatchNormalization()(previousLayer)
# 
    curLayer+=1

# previousLayer = layers.Dropout(0.5)(previousLayer)
# previousLayer = layers.MaxPool1D(2)(previousLayer)
    
previousLayer = layers.Dense(2048, kernel_regularizer=regularizers.l2(0.0001),
                             activation=clipping_relu)(previousLayer)
outputLayer = layers.Dense(outputDimension, activation='sigmoid')(previousLayer)

#compile our model
ourModel = Model(inputs=inputLayer, outputs=[outputLayer], name='longitude_cnn')
ourModel.compile(loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError()], optimizer='adam')
ourModel.summary()

In [None]:
trainingEpochs = 64
#fit our model on the long data
ourModel.fit(longTrainInputs, longTrainLabels, epochs=trainingEpochs, validation_split=0.15, 
             callbacks=[PlotLossesKeras()], shuffle=True, batch_size=32)

In [None]:
#test the neural network
testPreds = ourModel.predict(longTestInputs)
print(testPreds.shape)
# testPreds = testPreds.squeeze()
predictedLongs = np.mean(testPreds, axis=1).reshape(-1,)

print(predictedLongs.shape)
print(longTestLabels.shape)

scatterData = pd.DataFrame({
"index":range(predictedLongs.shape[0]),
"longPreds": predictedLongs,
"longTruth": longTestLabels,
})
scatterData.describe()

fig1 = go.Figure()
#     fig1.add_trace(go.Scatter(x=scatterData.index, y=scatterData.ewmaSmoothData, name="ewma smoothed data"))
fig1.add_trace(go.Scatter(x=scatterData.index, y=scatterData.longPreds, name="predicted longitude"))
fig1.add_trace(go.Scatter(x=scatterData.index, y=scatterData.longTruth, name="true longitude"))

fig1.show()
