In [1]:
import os
import pandas as pd
import numpy as np
import math
import geomodel.processors as gp
import time

_MINIMUM_DISTANCE = 40000 # 40KM
_POINT_RADIUS = 50/3 # 50 ft approx

_columns = ['row','time_stamp','event','event_type','latitude','longitude','altitude','heart_rate','speed','distance','last_event','filename']

_DATA_PATH = "/Users/btb/Documents/Garmin fit Files/data"
_DATA_FILE = "locations.csv"
_DATA_OUT_PATH = "data"

data_dict = {}
test_keys = ["2020-05-01-16-58-23.csv","2018-09-30-10-21-14.csv","2020-03-25-17-23-37.csv","2020-05-09-10-11-16.csv"]



## Read the close points - we will determine which runs are even "close" to the points of interest so we don't interate over unnecessary points


In [2]:
points = pd.read_csv("data/points.csv",names=["number","lat","lon"])
close_lats = points["lat"].to_numpy()
close_lons = points["lon"].to_numpy()

## Get all the files

In [3]:
files = ["{}/{}".format(_DATA_PATH,f) for f in os.listdir(_DATA_PATH) if f.endswith(".csv")]

## Process the files and stick them into a dictionary by name of file

In [4]:
t = time.time()

runs_dictionary = gp.process_files_list(files,_columns,close_lats,close_lons)

print("Finished in: {0:.3f} (s)".format(time.time()-t))

Finished in: 778.931 (s)


In [10]:
import pickle
with open("data/runs.pkl","wb") as f:
    pickle.dump(runs_dictionary,f)

In [12]:
with open("data/runs.pkl","rb") as f:
    runs = pickle.load(f)

## Using a pre-determined lookback "window" generate each run's features and labels

In [14]:
t = time.time()
for key in runs.keys():
    if "point_sequence" in runs[key].keys():
        labels, features = gp.generate_label_features(runs[key]["point_sequence"],window=3)
        runs[key]["labels"] = labels
        runs[key]["features"] = features
print("Finished in: {0:.3f} (s)".format(time.time()-t))

Finished in: 0.029 (s)


In [18]:
with open("data/runs.pkl","wb") as f:
    pickle.dump(runs,f)

In [17]:
runs[key]["labels"], runs[key]["features"]

(array([16, 14, 12, 11, 14, 16]),
 array([[79,  0,  3],
        [ 0,  3, 16],
        [ 3, 16, 14],
        [16, 14, 12],
        [14, 12, 11],
        [12, 11, 14]]))

## Generate a comprehenisve list of features and labels for PyTorch processing

In [62]:
t = time.time()
labels_list = []
features_list = []
for key in runs.keys():
    if "labels" in runs[key].keys():
        if len(runs[key]["labels"]) > 0:
            labels_list.append(runs[key]["labels"])
            features_list.append(runs[key]["features"])
print("Finished in: {0:.3f} (s)".format(time.time()-t))
        


Finished in: 0.002 (s)


In [64]:
labels = np.concatenate(labels_list)
features = np.concatenate(features_list)

In [65]:
np.save("{}/labels.npy".format(_DATA_OUT_PATH),labels)
np.save("{}/features.npy".format(_DATA_OUT_PATH),features)

In [68]:
len(labels), len(features)

(5742, 5742)