In [89]:
import pandas as pd
import strym
from strym import strymread
from strym import strymmap
import matplotlib.pyplot as plt
from pylab import rcParams
import strym.DBC_Read_Tools as dbc
import numpy as np
from pathlib import Path
import ipywidgets as widgets
import datetime
import time

In [90]:
CAN_path = '../data/publishable-circles/2T3MWRFVXLW056972/libpanda/2021_03_09/2021-03-09-00-50-07_2T3MWRFVXLW056972_CAN_Messages.csv'
GPS_path = '../data/publishable-circles/2T3MWRFVXLW056972/libpanda/2021_03_09/2021-03-09-00-50-07_2T3MWRFVXLW056972_GPS_Messages.csv'

In [208]:
def interpolate(time, preTime, postTime, preValue, postValue):
    # weighted average in terms of time
    d1 = abs(time - preTime)
    w1, w2 = 0, 0
    if d1 != 0:
        w1 = 1/d1
    else:
        w1 = 0
    d2 = abs(time - postTime)
    if d2 != 0:
        w2 = 1/d2
    else:
        w2 = 0
    wv1 = preValue * w1
    wv2 = postValue * w2
    weightedValueSum = wv1 + wv2
    weightSum = w1 + w2
    average = weightedValueSum / weightSum
    return average

In [218]:
def mostRelevantRecord(GPSrecords, referenceTime, CANrecords):
    # records is GPS_records
    # referenceTime is the current CAN time
    mostRelevant = None
    try:
        avgLat, avgLong = GPSrecords.iloc[GPSrecords.index.get_loc(referenceTime)]
    except:
        earliestTime = GPSrecords['Systime'][0] # earliest GPS time
        latestTime = GPSrecords['Systime'][-1] # last GPS time
        #print(records)
        if referenceTime < earliestTime:
            # If the earliest CAN time is less than the earliest GPS time, the pre data is the first recorded and the 
            # time is the referenceTime.
            preTime = referenceTime
            preLat = GPSrecords['Lat'][0]
            preLong = GPSrecords['Long'][0]
            
            post = pd.Index(GPSrecords['Systime']).get_loc(referenceTime, method="backfill")
            postTime = GPSrecords['Systime'][post]
            postLat = GPSrecords['Lat'][post]
            postLong = GPSrecords['Long'][post]
            
        elif referenceTime > latestTime:
            pre = pd.Index(GPSrecords['Systime']).get_loc(referenceTime, method="pad")
            preTime = GPSRecords['Time']
            preLat = GPSRecords['Lat']
            preLong = GPSRecords['Long']
            
            postTime = referenceTime
            postLat = GPSrecords['Lat'][-1]
            postLong = GPSrecords['Long'][-1]
        else:
            # find previous latitude and longitude in GPS data if no exact match
            pre = pd.Index(GPSrecords['Systime']).get_loc(referenceTime, method="pad") # closest GPS indices
            
            # finds next latitude and longitude in GPS data if no exact match
            post = pd.Index(GPSrecords['Systime']).get_loc(referenceTime, method="backfill") # closest GPS indices
            
            preTime = GPSrecords['Systime'][pre]
            preLat = GPSrecords['Lat'][pre]
            preLong = GPSrecords['Long'][pre]
            postTime = GPSrecords['Systime'][post]
            postLat = GPSrecords['Lat'][post]
            postLong = GPSrecords['Long'][post]

        avgLat = interpolate(referenceTime, preTime, postTime, preLat, postLat)
        avgLong = interpolate(referenceTime, preTime, postTime, preLong, postLong)
        return avgLat, avgLong


In [183]:
# There are more CAN messages than GPS coordinates so we want to estimate a coordinate pair for each message based on 
# previous and future GPS coordinates.
r = strymread(csvfile=CAN_path)
strymGPS = strymmap(csvfile=GPS_path)
GPSRecords = strymGPS.dataframe
GPSRecords = GPSRecords.sort_values(by="Systime")

[2021_07_12_14_35_44] (root) INFO: Vehicle model infered is toyota-rav4-2020
[2021_07_12_14_36_24] (root) INFO: Reading GPS file ../data/publishable-circles/2T3MWRFVXLW056972/libpanda/2021_03_09/2021-03-09-00-50-07_2T3MWRFVXLW056972_GPS_Messages.csv
GPS signal first acquired at 2021-03-08 19:50:19:300000


In [None]:
# Test to be sure of overlap
earliestGPS = float(GPSRecords['Systime'][0])
latestGPS = float(GPSRecords['Systime'][-1])
earliestCAN = float(r.dataframe['Time'][0])
latestCAN = float(r.dataframe['Time'][-1])

if not ((earliestGPS >= earliestCAN and earliestGPS <= latestCAN) or (latestGPS >= earliestCAN and latestGPS <= latestCAN)):
    print("These files cannot be linked as there is no overlap")

columns = {"Time":[], "Longitude":[], "Latitude":[]}

for i, message in enumerate(r.dataframe['Time']):
    avgLat, avgLong = mostRelevantRecord(GPSRecords, float(message), r.dataframe)
    columns['Latitude'].append(avgLat)
    columns['Longitude'].append(avgLong)
    if (i%10000) == 0:
        print('Processed', i, 'samples.')
r.dataframe['Latitude'] = columns['Latitude']
r.dataframe['Longitude'] = columns['Longitude']


Processed 0 samples.
Processed 10000 samples.
Processed 20000 samples.
Processed 30000 samples.
Processed 40000 samples.
Processed 50000 samples.
Processed 60000 samples.
Processed 70000 samples.
Processed 80000 samples.
Processed 90000 samples.
Processed 100000 samples.


In [114]:
def updateTimer():
    currentTime = time.time()
    timeElapsed.value = "Time elapsed: " + datetime.timedelta(seconds=(currentTime - startTime))

In [103]:
drives_can = [str(PathObj) for PathObj in Path("./raw_data").rglob("*_CAN_Messages.csv")][:]


driveCounter = 0
driveTotal = len(drives_can)
startTime = time.time()
currentTime = time.time()

existingFiles = [str(PathObj).split("/")[-1] for PathObj in Path("./outputs").rglob("dataByLocation_*.csv")]

for i, data_path in enumerate(drives_can):
    filename = "dataByLocation_" + "_".join(data_path.split("/")[-1].split("_")[:2]) + ".csv"
    if filename not in existingFiles:
        try:
            createDataByLocationFile(data_path, drives_gps[i])
            driveCounter += 1
        except:
            pass
    else:
        print("drive previously processed")
    finishedDriveCounter.value = "Drives decoded: " + str(driveCounter)