# GiroE GARMIN Dataset Decoding
Conversion from "json" to pandas DataFrame "garmin_df" then pickle 

## 1. Decode files
Convert from json to csv

In [None]:
import pandas as pd
import os, json
from datetime import timedelta

DATADIR = "GarminRawData"

In [None]:
def parse(fName):
    try:
        if fName.endswith( ".json" ):
            fName = fName[:-5]

        df = pd.read_json(f"{fName}.json")
        record_df = df.loc[(df["frame_type"] == "data_message") & (df.name == "record")]
        values_df = pd.concat(
            [
                pd.DataFrame.from_records(l)[["name", "value"]]
                .set_index("name")
                .transpose()
                for l in record_df.fields
            ]
        )
        if values_df.shape[0] != 0:
            values_df.to_csv(f"{fName}.csv")
            # values_df.to_excel(f"{json_file[:-8]}xlsx")
            
            stats = {}

            values_df["timestamp"] = pd.to_datetime(values_df["timestamp"])
            stats["data_filename"] = fName
            stats["t_start"] = values_df.timestamp.min()
            stats["t_stop"] = values_df.timestamp.max()
            stats["duration_sec"] = stats["t_stop"] - stats["t_start"]

            stats["t_start"] = stats["t_start"].isoformat()
            stats["t_stop"] = stats["t_stop"].isoformat()
            stats["duration_sec"] = stats["duration_sec"].total_seconds()

            if stats["duration_sec"] > 1800:
                stats.update(
                    pd.DataFrame.from_records(
                        df.loc[
                            (df.frame_type == "data_message") & (df.name == "file_id")
                        ].fields.values[0]
                    )[["name", "value"]]
                    .set_index("name")
                    .to_dict()["value"]
                )
            with open(f"{fName}-stat.json", "w") as f:
                json.dump(stats, f, indent=2)
    except ValueError:
        print(f"No points in {fName}")
    except KeyError:
        print(f"Empty file {fName}")

In [None]:
for f in os.walk(DATADIR):
    dirName = f[0]
    if os.path.isdir( dirName ):
        for fName in f[2]:
            if fName.endswith(".json"):
                print( f"PARSING {dirName}/{fName}" )
                parse( f"{dirName}/{fName}" )
    

## 2. Parse and Load all csv

In [None]:
import pandas as pd
import os, json
from datetime import timedelta

DATADIR = "GarminRawData"

In [None]:
def load_csv( fName, device ):
    df = pd.read_csv( fName )
    df['timestamp'] = pd.to_datetime(df.timestamp)
    if (df.timestamp.max() - df.timestamp.min()).total_seconds() > 1800:
        df["device"] = device
        df['session'] = df.timestamp.min()
        df['day'] = df.timestamp.min().day
    else:
        df=pd.DataFrame()
    return df

In [None]:
garmin_df = pd.DataFrame()

for f in os.walk(DATADIR):
    dirName = f[0]
    if os.path.isdir( dirName ):
        for fName in f[2]:
            if fName.endswith(".csv"):
                print( f"LOADING {dirName}/{fName}" )
                garmin_df = pd.concat( [garmin_df,  load_csv( f"{dirName}/{fName}", os.path.split(dirName)[-1] )] )
    

## 3. EXPORT to "garmin_df.pk"

In [None]:
import pickle

with open( "garmin_df.pk", "wb" ) as fw:
    pickle.dump( garmin_df, fw )