In [1]:
import os
import pandas as pd
import numpy as np
import math
import geomodel.processors as gp
import time
import pickle

_MINIMUM_DISTANCE = 40000 # 40KM
_POINT_RADIUS = 50/3 # 50 ft approx

_columns = ['row','time_stamp','event','event_type','latitude','longitude','altitude','heart_rate','speed','distance','last_event','filename']

_DATA_PATH = "/Users/btb/Documents/Garmin fit Files/data"
_DATA_FILE = "locations.csv"
_DATA_OUT_PATH = "data"

data_dict = {}
test_keys = ["2020-05-01-16-58-23.csv","2018-09-30-10-21-14.csv","2020-03-25-17-23-37.csv","2020-05-09-10-11-16.csv"]



## Read the close points - we will determine which runs are even "close" to the points of interest so we don't interate over unnecessary points


In [2]:
points = pd.read_csv("data/points.csv",names=["number","lat","lon"])
close_lats = points["lat"].to_numpy()
close_lons = points["lon"].to_numpy()

## Get all the files

In [3]:
files = ["{}/{}".format(_DATA_PATH,f) for f in os.listdir(_DATA_PATH) if f.endswith(".csv")]

## Process the files and stick them into a dictionary by name of file

In [4]:
t = time.time()

runs_dictionary = gp.process_files_list(files,_columns,close_lats,close_lons)

print("Finished in: {0:.3f} (s)".format(time.time()-t))

Finished in: 631.480 (s)


In [5]:
with open("data/runs.pkl","wb") as f:
    pickle.dump(runs_dictionary,f)

In [7]:
runs_dictionary['2016-01-06-18-45-01.csv'].keys()

dict_keys(['file_name', 'data', 'latitude', 'longitude', 'mean_location', 'close_points', 'points_visited', 'point_sequence'])

In [5]:
with open("data/runs.pkl","rb") as f:
    runs_dictionary = pickle.load(f)

## Using a pre-determined lookback "window" generate each run's features and labels

In [8]:
runs = {}
t = time.time()
for key in runs_dictionary.keys():
    if "point_sequence" in runs_dictionary[key].keys():
        labels, features = gp.generate_label_features(runs_dictionary[key]["point_sequence"],window=3)
        if len(labels) > 0:
            runs[key] = runs_dictionary[key]
            runs[key]["labels"] = labels
            runs[key]["features"] = features
print("Finished in: {0:.3f} (s)".format(time.time()-t))

Finished in: 0.088 (s)


In [23]:
with open("data/runs.pkl","wb") as f:
    pickle.dump(runs,f)

In [27]:
l = [k for k in runs.keys()]

In [25]:
for key in runs.keys():
    np.save("data/model_data/{}.npy".format(key),np.hstack([runs[key]["labels"].reshape(-1,1), runs[key]["features"]]))

In [31]:
files_list = pd.DataFrame({"Files": l})
files_list.to_csv("data/runs.csv")

'2016-01-06-18-45-01.csv'

In [50]:
for i,f in files_list.iloc[0:3].iterrows():
    print(f["Files"])

2016-01-06-18-45-01.csv
2016-01-08-18-16-15.csv
2016-01-12-18-20-16.csv


In [51]:
with open("data/model_data/{}.npy".format(key),'rb') as f:
    XY1 = np.load(f)

with open("data/model_data/{}.npy".format("2016-01-12-18-20-16.csv"),'rb') as f:
    XY2 = np.load(f)

In [84]:
X = np.array([]).reshape(-1,1)
Y = np.array([]).reshape(-1,1)

In [86]:
X = np.append((X, XY1[:,1:]),axis=0)
Y = np.append(Y, XY1[:,0])

X = np.append((X, XY2[:,1:]),axis=0)
Y = np.append(Y, XY2[:,0:])

TypeError: _append_dispatcher() missing 1 required positional argument: 'values'

In [83]:
X, Y.shape

(array([79.,  0.,  3.,  0.,  3., 16.,  3., 16., 14., 16., 14., 12., 14.,
        12., 11., 12., 11., 14., 38., 39., 40., 39., 40., 77., 40., 77.,
        43., 77., 43., 42., 43., 42., 43., 42., 43., 39., 43., 39., 38.,
        39., 38., 37., 38., 37., 70., 37., 70., 26., 70., 26., 27., 26.,
        27., 78., 27., 78., 64., 78., 64., 68., 64., 68., 69., 68., 69.,
        26., 69., 26., 70., 26., 70., 37., 70., 37., 36.]),
 (82,))

In [18]:
runs[key]["labels"].reshape(-1,1), runs[key]["features"]

(array([[16],
        [14],
        [12],
        [11],
        [14],
        [16]]),
 array([[79,  0,  3],
        [ 0,  3, 16],
        [ 3, 16, 14],
        [16, 14, 12],
        [14, 12, 11],
        [12, 11, 14]]))

In [1]:
import pickle
with open("data/runs.pkl", "rb") as f:
    runs = pickle.load(f)



In [41]:
X = np.empty(shape=(0,3),dtype=int)
Y = np.empty(shape=(0),dtype=int)

In [42]:
for key in runs.keys():
    if runs[key]["features"].shape[0] == runs[key]["labels"].shape[0]:
        X = np.concatenate((X,runs[key]["features"]))
        Y = np.concatenate((Y,runs[key]["labels"]))
    else:
        print(key)

In [43]:
runs[key]["features"].shape[0] == runs[key]["labels"].shape[0]

True

In [45]:
np.save("data/data.npy",np.hstack((Y.reshape(-1,1),X)))

In [46]:
XY = np.load("data/data.npy")

In [51]:
XY[2:4,1:]

array([[41, 77, 43],
       [77, 43, 42]])

In [35]:
Y.reshape(-1,1).shape

(5743, 1)

In [14]:
import numpy as np
np.vstack([X1,X2[0,:]])

array([[37, 38, 41],
       [38, 41, 77],
       [41, 77, 43],
       [77, 43, 42],
       [43, 42, 43],
       [42, 43, 77],
       [43, 77, 41],
       [77, 41, 38],
       [41, 38, 37],
       [38, 37, 70],
       [37, 70, 26],
       [70, 26, 27],
       [26, 27, 78],
       [27, 78, 64],
       [78, 64, 68],
       [64, 68, 69],
       [26, 70, 37]])

## Generate a comprehenisve list of features and labels for PyTorch processing

In [62]:
t = time.time()
labels_list = []
features_list = []
for key in runs.keys():
    if "labels" in runs[key].keys():
        if len(runs[key]["labels"]) > 0:
            labels_list.append(runs[key]["labels"])
            features_list.append(runs[key]["features"])
print("Finished in: {0:.3f} (s)".format(time.time()-t))
        


Finished in: 0.002 (s)


In [64]:
labels = np.concatenate(labels_list)
features = np.concatenate(features_list)

In [65]:
np.save("{}/labels.npy".format(_DATA_OUT_PATH),labels)
np.save("{}/features.npy".format(_DATA_OUT_PATH),features)

In [68]:
len(labels), len(features)

(5742, 5742)