In [8]:
import numpy as np
import pandas as pd
from ipywidgets import widgets
from sklearn import preprocessing
import sklearn
from datetime import time
from sklearn.model_selection import train_test_split
from pathlib import Path
import pickle
from scipy.ndimage.filters import gaussian_filter
import plotly.graph_objects as go
from collections import Counter
import math
import os

In [9]:
def createTestDrive(path):
    drivePaths = [path]
    subsamplingPeriod = 1

    drivesWithLocation = []
    drivesWithoutLocation = []
    driveIDs = []
    for drivePath in drivePaths:
        drive = pd.read_csv(drivePath)
        if len(drive) > 1200 and "dataByLocation_2021-06-03-19-38-35_2T3Y1RFV8KC014025.csv" not in drivePath:
            driveIDs.append("_".join(drivePath.split("/")[-1].split("_")[1:-1]))
            drive = drive.iloc[::subsamplingPeriod]
            driveWithoutLocation = drive.drop(columns=["Time","Longitude", "Latitude"])
    #         driveWithoutLocation = driveWithoutLocation.drop(columns=["ZAcceleration", "LongAcceleration", "LatAcceleration"])
            drivesWithLocation.append(drive)
            drivesWithoutLocation.append(driveWithoutLocation)
    print("Found", len(drivesWithoutLocation), "drives that meet specifications")
    if len(drivesWithoutLocation) > 0:
        scalerFile = open('otherLargeFiles/scalerUsedForTrainingInputs.pkl', 'rb')
        scaler = pickle.load(scalerFile)                     
        scalerFile.close()

        normalizedDrives = []
        for drive in drivesWithoutLocation:
            drive = drive.values[:]
            data_normalized = scaler.transform(drive)
            data_normalized = pd.DataFrame(data_normalized)
            data_normalized[0] = drive[:,0]
            normalizedDrives.append(data_normalized)

        # Faster
        sequenceLength = 10

        windowsPerDrive = []
        for drive in normalizedDrives:
            nWindows = math.floor(len(drive) / sequenceLength)
            if len(drive) % sequenceLength == 0:
                nWindows-=1
            windowsPerDrive.append(nWindows)

        datasetLength = sum(windowsPerDrive)
        dataset = {"samples":np.full((datasetLength, sequenceLength, len(normalizedDrives[0].columns)), -1.),"labels":np.full((datasetLength, 2), -1.)}
        datasetIndex = 0

        for k,drive in enumerate(normalizedDrives):
            nWindows = windowsPerDrive[k]
            print("on drive",k, " Has", nWindows, "windows worth of samples")
            for i in range(nWindows):
                windowStartIndex = i * sequenceLength
                windowEndIndex = windowStartIndex + sequenceLength
                window = drive.iloc[windowStartIndex:windowEndIndex].to_numpy()
                startWithLocation = drivesWithLocation[k].iloc[windowStartIndex]
                startLong = startWithLocation.Longitude
                startLat = startWithLocation.Latitude
                nextWithLocation = drivesWithLocation[k].iloc[windowEndIndex]
                nextLong = nextWithLocation.Longitude
                nextLat = nextWithLocation.Latitude
                deltaLong = nextLong - startLong
                deltaLat = nextLat - startLat
                deltas = [deltaLong, deltaLat]
                dataset["samples"][datasetIndex] = window
                dataset["labels"][datasetIndex] = deltas
                datasetIndex+=1
        scalerFile = open('otherLargeFiles/scalerUsedForTrainingLabels.pkl', 'rb')
        scaler = pickle.load(scalerFile)                     
        scalerFile.close()

        originalLabels = dataset["labels"]
        labels_normalized = scaler.transform(originalLabels)
        dataset["labels"] = labels_normalized
        nextTestNumber = max([int(str(path).split(".")[0].split("-")[-1]) for path in Path("./otherLargeFiles/TestDrives").rglob("test-drive-*.pkl")], default=-1) + 1
        dbfile = open(f'otherLargeFiles/TestDrives/test-drive-{nextTestNumber}.pkl', 'ab')

        # source, destination
        pickle.dump(dataset, dbfile)                     
        dbfile.close()

In [10]:
drivePaths = [str(path) for path in Path(".").rglob("outputs/withNewFeatures/dataByLocation*.csv")]
drivesNotInDataset = []
drivesInDatasetFile = open(f'otherLargeFiles/drives-used.pkl', 'rb')
drivesInDataset = pickle.load(drivesInDatasetFile)
drivesInDatasetFile.close()
for drivePath in drivePaths:
    if drivePath not in drivesInDataset:
        drivesNotInDataset.append(drivePath)

In [11]:
for path in drivesNotInDataset:
    createTestDrive(path)

Found 0 drives that meet specifications
Found 0 drives that meet specifications
Found 1 drives that meet specifications
on drive 0  Has 2929 windows worth of samples
Found 0 drives that meet specifications
Found 0 drives that meet specifications
Found 0 drives that meet specifications
Found 1 drives that meet specifications
on drive 0  Has 1736 windows worth of samples
Found 1 drives that meet specifications
on drive 0  Has 481 windows worth of samples
Found 1 drives that meet specifications
on drive 0  Has 1822 windows worth of samples
Found 0 drives that meet specifications
Found 0 drives that meet specifications
Found 1 drives that meet specifications
on drive 0  Has 1588 windows worth of samples
Found 1 drives that meet specifications
on drive 0  Has 6313 windows worth of samples
Found 1 drives that meet specifications
on drive 0  Has 2012 windows worth of samples
Found 1 drives that meet specifications
on drive 0  Has 423 windows worth of samples
Found 1 drives that meet specifica