# Description
This notebook generates a csv from the raw JSON route data. YOu can choose to either generate a (large) csv where each line is a GPS datapoint with attached metadata or a small one where each line contains just the routes metadata. It requires the user data to add information on the organisation the participant is part of. 


In [None]:
import numpy as np
import json
import csv
import pandas as pd       

users = {}
walksfile = '../Event_Walks.json'

In [None]:
with open("../Users.json", "r", encoding='utf8') as read_userfiles:
    userlist = json.load(read_userfiles)
    for user in userlist['result']['users']:
        if 'demographicGroup' in user.keys():
            users[user['id']] = user['demographicGroup']
        else:
            users[user['id']] = 'Not Found'

# Run the code in the cell below to generate a csv where each row contains a gps point and relevant metadata

In [None]:
with open(walksfile, "r") as read_file:
    data = json.load(read_file)
    routeDF = pd.DataFrame()
    for walk in data['result']['walks']:
        print(walk['createdBy'])
        #print(users[walk['createdBy']])
        latlngDF = pd.DataFrame(walk['locationData'])
        if users:
            try:
                organisation = users[walk['createdBy']]
            except KeyError:
                organisation = 'Not Found'
        else:
            organisation = 'Not Found'
            
        print(organisation)
        latlngDF["roudeId"] = walk["id"]
        latlngDF["createdBy"] = walk["createdBy"]
        latlngDF["organisation"] = organisation
        latlngDF["duration (minutes)"] = walk["duration"]/60000
        latlngDF["distance (km)"] = walk["distance"]
        latlngDF['start_time'] = walk['startedAt']
        latlngDF['end_time'] = walk['endedAt']
        
        latlngDF["latitude"] = pd.to_numeric(latlngDF["latitude"], downcast="float")
        latlngDF["longitude"] = pd.to_numeric(latlngDF["longitude"], downcast="float")
        routeDF = routeDF.append(latlngDF, ignore_index=True)
        
routeDF.to_csv('route_output.csv', index='false', sep=';')

# Run the code in the cell below to generate a csv where each row contains a only the unique route metadata information (much smaller)

In [None]:
with open(walksfile, "r") as read_file:
    data = json.load(read_file)
    routeDF = pd.DataFrame()
    for walk in data['result']['walks']:
        latlngDF = pd.Series()
        if users:
            try:
                organisation = users[walk['createdBy']]
            except KeyError:
                organisation = 'Not Found'
        else:
            organisation = 'unknown'
        #print(latlngDF)
        latlngDF["roudeId"] = walk["id"]
        latlngDF["createdBy"] = walk["createdBy"]
        latlngDF["organisation"] = organisation
        latlngDF["duration (minutes)"] = walk["duration"]/60000
        latlngDF["distance (km)"] = walk["distance"]
        latlngDF['start_time'] = walk['startedAt']
        latlngDF['end_time'] = walk['endedAt']
        
        routeDF = routeDF.append(latlngDF, ignore_index=True)
        
routeDF.to_csv('route_output_routesOnly.csv', index='false', sep=';')