In [79]:
import re

import psycopg2
import pandas as pd

import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom

from geopy.distance import geodesic

In [80]:
osm_file = '../osm/strava.osm'

In [81]:
conn = psycopg2.connect(
    host="localhost",
    database="gpx",
    user="root",
    password="root"
)

In [86]:
try:
    query = """
        WITH sg_run AS (
            SELECT
                ST_SimplifyPreserveTopology(ST_Transform(location::geometry, 3857), 10) AS location,
                timestamp,
                filename
            FROM 
                gpx_points
            WHERE ST_Within( -- within Singapore
                location::geometry,
                ST_MakeEnvelope(
                    103.59, -- min lon
                    1.13,   -- min lat
                    104.04, -- max lon
                    1.47,   -- max lat
                    4326
                )
            )
            ORDER BY timestamp
        )


        SELECT DISTINCT
          filename,
          timestamp,
          ST_Y(ST_Transform(ST_SnapToGrid(ST_Transform(location::geometry, 3857), 10, 10), 4326)) AS lat,
          ST_X(ST_Transform(ST_SnapToGrid(ST_Transform(location::geometry, 3857), 10, 10), 4326)) AS lon
        FROM
          sg_run a
        ORDER BY timestamp
        
    """

    with conn.cursor() as cur:
        cur.execute(query)
        rows = cur.fetchall()

    df = pd.DataFrame(rows, columns=[
        "filename",
        "timestamp",
        "lat", 
        "lon",
    ])
    
except Exception as e:
    print(e)
    conn.rollback()

In [87]:
df

Unnamed: 0,filename,timestamp,lat,lon
0,11398719881.gpx,2024-05-12 22:36:55,1.309181,103.895912
1,11398719881.gpx,2024-05-12 22:36:56,1.309181,103.895912
2,11398719881.gpx,2024-05-12 22:36:57,1.309181,103.895912
3,11398719881.gpx,2024-05-12 22:36:58,1.309270,103.895912
4,11398719881.gpx,2024-05-12 22:36:59,1.309270,103.895912
...,...,...,...,...
2976,11398719881.gpx,2024-05-12 23:26:31,1.309719,103.895553
2977,11398719881.gpx,2024-05-12 23:26:32,1.309719,103.895553
2978,11398719881.gpx,2024-05-12 23:26:33,1.309719,103.895553
2979,11398719881.gpx,2024-05-12 23:26:34,1.309719,103.895642


In [105]:
def calculate_distances(df):
    distances = [0]
    cumulative_distance = 0
    for i in range(1, len(df)):
        point1 = (df.loc[i-1, 'lat'], df.loc[i-1, 'lon'])
        point2 = (df.loc[i, 'lat'], df.loc[i, 'lon'])
        distance = geodesic(point1, point2).meters
        cumulative_distance += distance
        distances.append(cumulative_distance)
    return distances

In [106]:
df['distance'] = calculate_distances(df)

In [107]:
df['node_id'] = df.groupby(['lat', 'lon']).ngroup() + 1
df['way_id'] = (df['distance'] // 1000).astype(int)

In [108]:
df

Unnamed: 0,filename,timestamp,lat,lon,distance,node_id,way_id
0,11398719881.gpx,2024-05-12 22:36:55,1.309181,103.895912,0.000000,723,0
1,11398719881.gpx,2024-05-12 22:36:56,1.309181,103.895912,0.000000,723,0
2,11398719881.gpx,2024-05-12 22:36:57,1.309181,103.895912,0.000000,723,0
3,11398719881.gpx,2024-05-12 22:36:58,1.309270,103.895912,9.930515,737,0
4,11398719881.gpx,2024-05-12 22:36:59,1.309270,103.895912,9.930515,737,0
...,...,...,...,...,...,...,...
2985,11398719881.gpx,2024-05-12 23:03:38,1.299122,103.873723,12608.588457,26,12
2986,11398719881.gpx,2024-05-12 23:09:23,1.300559,103.876957,13002.008606,123,13
2987,11398719881.gpx,2024-05-12 23:14:14,1.304600,103.881090,13643.248079,331,13
2988,11398719881.gpx,2024-05-12 23:18:28,1.308103,103.886480,14357.256546,558,14


In [110]:
osm = ET.Element("osm")
osm.set("version", "0.6")
osm.set("generator", "CGImap 0.9.2 (596732 spike-08.openstreetmap.org)")
osm.set("copyright", "OpenStreetMap and contributors")
osm.set("attribution", "http://www.openstreetmap.org/copyright")
osm.set("license", "http://opendatacommons.org/licenses/odbl/1-0/")

nodes_df = df\
.drop_duplicates(subset=['node_id', 'lat', 'lon'])\
.sort_values(by='node_id')

for idx, row in nodes_df.iterrows():
    node_id = row["node_id"]

    ET.SubElement(
        osm,
        "node",
        id=str(node_id),
        lat=str(row["lat"]),
        lon=str(row["lon"]),
        visible="true",
        version="1",        
    )

tags = [
      {"k": "highway", "v": "footway"},
      {"k": "footway", "v": "sidewalk"}
  ]

ways_df = nodes_df.groupby('way_id')['node_id'].agg(list).reset_index()

way_connections = []
way_connection_id = df['way_id'].max()

for i in range(1, len(ways_df)):
    if ways_df.loc[i, 'way_id'] != ways_df.loc[i - 1, 'way_id']:
        prev_row = ways_df.loc[i-1]
        cur_row = ways_df.loc[i]
        
        data = {
            'way_id': way_connection_id,
            'node_id': [prev_row['node_id'][-1], cur_row['node_id'][0]],
        }
        way_connections.append(data)
        
        way_connection_id += 1
        
        data = {
            'way_id': way_connection_id,
            'node_id': [cur_row['node_id'][0], prev_row['node_id'][-1] ],
        }
        way_connections.append(data)
        
        way_connection_id += 1
    
way_connections_df = pd.DataFrame(way_connections)
ways_df = pd.concat([ways_df, way_connections_df], ignore_index=True)

for idx, row in ways_df.iterrows():
    way_id = row['way_id']

    way = ET.SubElement(
        osm,
        "way",
        id=str(way_id),
        visible="true",
        version="1",
    )

    for node_id in row["node_id"]:
        nd = ET.SubElement(way, "nd")
        nd.set("ref", str(node_id))


    for tag_data in tags:
        tag = ET.SubElement(way, "tag")
        tag.set("k", tag_data["k"])
        tag.set("v", tag_data["v"]) 

    xml_str = minidom.parseString(ET.tostring(osm)).toprettyxml(indent="  ")
    
with open(osm_file, "w") as f:
      f.write(xml_str)