In [21]:
import json
from tqdm import tqdm

In [18]:
import json

input_file = "archive/endomondoHR_proper.json"
output_file = "archive/endomondoHR_fixed.json"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        corrected_line = line.replace("'", '"')  # Replace single quotes with double quotes
        outfile.write(corrected_line)


In [22]:
import json

input_file = "archive/endomondoHR_fixed.json"

data = []
with open(input_file, "r", encoding="utf-8") as f:
    for line in tqdm(f):
        data.append(json.loads(line))  # Load each line separately

print(f"Loaded {len(data)} JSON objects")


167783it [00:40, 4177.04it/s]

Loaded 167783 JSON objects





In [24]:
print(data[0].keys())

dict_keys(['longitude', 'altitude', 'latitude', 'sport', 'id', 'heart_rate', 'gender', 'timestamp', 'url', 'userId', 'speed'])


In [26]:
import pandas as pd
# Convert to DataFrame for easier analysis
df = pd.DataFrame(data)

# 1. Count occurrences of each sport
sport_counts = df['sport'].value_counts()
print("Sport Counts:\n", sport_counts)

# 2. Check if longitude, altitude, latitude, timestamp, and heart_rate are the same length for each entry
def check_vector_lengths(row):
    lengths = [len(row[key]) for key in ['longitude', 'altitude', 'latitude', 'timestamp', 'heart_rate']]
    return len(set(lengths)) == 1  # True if all lengths are the same

df['consistent_lengths'] = df.apply(check_vector_lengths, axis=1)

# Count number of inconsistent entries
inconsistent_count = df['consistent_lengths'].value_counts()
print("\nConsistency Check on Vector Lengths:\n", inconsistent_count)

Sport Counts:
 sport
bike                       71915
run                        70591
mountain bike              10722
bike (transport)            7757
indoor cycling              1725
walk                        1289
orienteering                 867
cross-country skiing         789
core stability training      448
fitness walking              292
skate                        271
roller skiing                238
hiking                       237
kayaking                      92
circuit training              89
weight training               74
rowing                        71
gymnastics                    66
soccer                        51
downhill skiing               48
treadmill running             28
snowshoeing                   16
swimming                      14
golf                          12
elliptical                    10
horseback riding              10
badminton                     10
basketball                     8
tennis                         8
aerobics              

In [28]:
import folium

def plot_route(latitude, longitude, map_filename="route_map.html"):
    """
    Plots a route using latitude and longitude lists on an OpenStreetMap-based interactive map.

    Args:
        latitude (list): List of latitude coordinates.
        longitude (list): List of longitude coordinates.
        map_filename (str): Output filename for the HTML map.

    Returns:
        folium.Map: The generated interactive map.
    """
    if not latitude or not longitude or len(latitude) != len(longitude):
        raise ValueError("Latitude and longitude lists must be non-empty and of the same length.")

    # Get the starting point for centering the map
    start_location = (latitude[0], longitude[0])

    # Create a Folium map centered at the first location
    route_map = folium.Map(location=start_location, zoom_start=14, tiles="OpenStreetMap")

    # Add route to the map
    route = list(zip(latitude, longitude))
    folium.PolyLine(route, color="blue", weight=5, opacity=0.7).add_to(route_map)

    # Add start and end markers
    folium.Marker(route[0], popup="Start", icon=folium.Icon(color="green")).add_to(route_map)
    folium.Marker(route[-1], popup="End", icon=folium.Icon(color="red")).add_to(route_map)

    # Save and display map
    route_map.save(map_filename)
    print(f"Map saved as {map_filename}. Open it in a browser to view.")

    return route_map


In [30]:
run_routes = df[df['sport']=='run']
run_routes.head()

Unnamed: 0,longitude,altitude,latitude,sport,id,heart_rate,gender,timestamp,url,userId,speed,consistent_lengths
29,"[6.8854929, 6.8853678, 6.8851621, 6.8848205, 6...","[-173.8, -151.2, -161.6, -165.4, -168.6, -172....","[52.2226809, 52.222727, 52.2228258, 52.2228606...",run,321063199,"[80, 81, 94, 100, 102, 112, 108, 114, 110, 109...",male,"[1397079203, 1397079210, 1397079218, 139707922...",https://www.endomondo.com/users/4969375/workou...,4969375,,True
30,"[6.9144073, 6.9142929, 6.9141539, 6.9140268, 6...","[57.8, 57.6, 57.0, 56.4, 55.8, 55.2, 54.4, 53....","[52.2111711, 52.2112631, 52.2114064, 52.211608...",run,303565793,"[60, 62, 92, 92, 132, 150, 150, 159, 159, 161,...",male,"[1393908533, 1393908541, 1393908549, 139390855...",https://www.endomondo.com/users/4969375/workou...,4969375,,True
31,"[6.9141348, 6.9145702, 6.9151684, 6.9158377, 6...","[22.8, 26.4, 30.8, 35.6, 43.0, 48.4, 49.8, 49....","[52.2110297, 52.2106325, 52.2102453, 52.209833...",run,302666522,"[77, 93, 107, 121, 118, 120, 120, 124, 124, 12...",male,"[1393687929, 1393687948, 1393687967, 139368798...",https://www.endomondo.com/users/4969375/workou...,4969375,,True
32,"[6.8678543, 6.8678634, 6.8675429, 6.8672183, 6...","[35.4, 35.2, 34.6, 34.2, 35.0, 35.2, 34.8, 34....","[52.1936673, 52.1934354, 52.1931993, 52.192873...",run,296982347,"[75, 101, 116, 120, 124, 126, 127, 129, 126, 1...",male,"[1392480163, 1392480176, 1392480189, 139248020...",https://www.endomondo.com/users/4969375/workou...,4969375,,True
33,"[6.9143328, 6.9146396, 6.9148949, 6.9151568, 6...","[63.0, 65.2, 66.0, 66.2, 65.8, 65.8, 67.0, 67....","[52.2112195, 52.2110264, 52.2108135, 52.210601...",run,295890426,"[58, 83, 112, 115, 117, 116, 141, 121, 120, 11...",male,"[1392180426, 1392180436, 1392180446, 139218045...",https://www.endomondo.com/users/4969375/workou...,4969375,,True


In [34]:
len(run_routes)

70591

In [36]:
len(run_routes['userId'].unique())

807

In [33]:

# Example coordinates
latitude = df.loc[12000,'latitude']  # Replace with real data
longitude = df.loc[12000,'longitude'] 

# Generate the route map
map_object = plot_route(latitude, longitude)

# Display the map (Jupyter Notebook only)
map_object


Map saved as route_map.html. Open it in a browser to view.
