# Road Snapping using OSRM

In [2]:
import requests
import pandas as pd
from datetime import datetime
from scripts.MapVisualization import MapVisualization

In [12]:
person161_kalman = pd.read_csv("data/kalman_filtered/kalman_person161_2007-04-27.csv")

In [4]:
# def format_df(df):
#     df['cst_datetime'] = pd.to_datetime(df['cst_datetime'])
#     df['unix_time'] = (df['cst_datetime'].astype(int) / 1e9).astype(int) # convert to Unix for OSRM api
    
#     # Create the request URL for OSRM Match service
#     coordinates = ';'.join([f"{long},{lat}" for long, lat in zip(df['long'], df['lat'])])
#     timestamps = ';'.join(df['unix_time'].astype(str))

#     print(f'coord:\n{coordinates}')

#     request_url = f"http://router.project-osrm.org/match/v1/driving/{coordinates}?steps=true&geometries=geojson&annotations=true&overview=full&timestamps={timestamps}"

#     # Make the request
#     response = requests.get(request_url)
#     if response.status_code == 200:
#         match_data = response.json()
#         print("Matched data:", match_data)
#     else:
#         print("Failed to retrieve data:", response.status_code)

In [5]:
import json

def send_osrm_request(coordinates, timestamps):
    """Create the request URL and make a GET request to the OSRM Match service."""
    coord_string = ';'.join([f"{long},{lat}" for long, lat in coordinates])
    time_string = ';'.join(map(str, timestamps))

    print(f'coords_string:\n{coord_string}\n\ntimestamps_string:\n{time_string}')

    request_url = f"http://router.project-osrm.org/match/v1/walking/{coord_string}?steps=true&geometries=geojson&annotations=true&overview=full&timestamps={time_string}"

    response = requests.get(request_url)
    return response

def parse_osrm_response(response):
    """Extract data from OSRM response and return as a list of dictionaries."""
    data = []
    for match in response.get('matchings', []):
        for leg in match['legs']:
            for step in leg['steps']:
                point = {
                    'latitude': step['maneuver']['location'][1],
                    'longitude': step['maneuver']['location'][0],
                    'confidence': match['confidence'],
                    'name': step['name'],
                    'distance': step['distance'],
                    'duration': step['duration']
                }
                data.append(point)
    return data

def send_batch_req(df, batch_size=100):
    """Process data in batches, sending each batch to the OSRM Match service."""
    df['cst_datetime'] = pd.to_datetime(df['cst_datetime'])
    df['unix_time'] = (df['cst_datetime'].astype(int) / 1e9).astype(int)  # convert to Unix for OSRM API
    all_responses = []

    for start in range(0, len(df), batch_size):
        end = start + batch_size
        batch_df = df.iloc[start:end]
        coordinates = zip(batch_df['long'], batch_df['lat'])
        timestamps = batch_df['unix_time'].values

        response = send_osrm_request(coordinates, timestamps)
        if response.status_code == 200:
            match_data = response.json()
            
            # Write match_data to a JSON file
            with open(f'match_data_batch_{start//batch_size + 1}.json', 'w') as f:
                json.dump(match_data, f)

            parsed_data = parse_osrm_response(match_data)
            all_responses.extend(parsed_data)
            print(f"Batch {start//batch_size + 1} matched successfully.")
        else:
            print(f"Failed to retrieve data for batch {start//batch_size + 1}: {response.status_code}")

    response_df = pd.DataFrame(all_responses)
    return response_df

In [6]:
# def snap_to_road(gps_df):
#     """
#     Send the data to the OSRM Match service and return the matched data.
#     @param:
#         - gps_df: DataFrame containing 'lat', 'long', and 'cst_datetime' columns
#     @return:
#         - json response from the OSRM Match service
#     """

#     # Format time to Unix for request URL
#     gps_df['cst_datetime'] = pd.to_datetime(gps_df['cst_datetime'])
#     gps_df['unix_time'] = (gps_df['cst_datetime'].astype(int) / 1e9).astype(int)  # convert to Unix for OSRM API
#     unix_values = gps_df['unix_time'].values
#     time_string = ';'.join(map(str, unix_values))

#     # Format long/lat values for request URL
#     # long_values = gps_df['long'].values
#     # lat_values = gps_df['lat'].values
#     zipped_coords = zip(gps_df['long'], gps_df['lat'])
#     coord_string = ';'.join([f"{long},{lat}" for long, lat in zipped_coords])
    
#     print(f'coord_string:\n{coord_string}\n\ntime_string:\n{time_string}')

#     # Send the request to the OSRM Match service
#     request_url = f"http://router.project-osrm.org/match/v1/driving/{coord_string}?steps=true&geometries=geojson&annotations=true&overview=full&timestamps={time_string}"
#     response = requests.get(request_url)

#     match_data = None
#     if response.status_code == 200:
#         match_data = response.json()
#         # print("Matched data:", match_data)
#     else:
#         print("Failed to retrieve data:", response.status_code)
        
#     return match_data

# person161_roadsnap_json = snap_to_road(person161_kalman)


In [7]:
p161_osrm_responses = send_batch_req(person161_kalman)

coords_string:
116.31575,39.9261166666667;116.315966666667,39.9260666666667;116.3162,39.926;116.316416666667,39.926;116.316666666667,39.9262;116.316783333333,39.9263833333333;116.317116666667,39.9263833333333;116.317333333333,39.9264333333333;116.317516666667,39.9264;116.3177,39.9263166666667;116.317983333333,39.9261833333333;116.318216666667,39.92615;116.318416666667,39.92615;116.3186,39.9261166666667;116.31945,39.9261333333333;116.319766666667,39.9267333333333;116.319816666667,39.9269;116.31975,39.9270666666667;116.3197,39.92725;116.319933333333,39.9272833333333;116.320333333333,39.92765;116.320366666667,39.9278333333333;116.3203,39.9282833333333;116.320366666667,39.9284666666667;116.3203,39.9287666666667;116.320366666667,39.9295333333333;116.320316666667,39.92975;116.3203,39.9299166666667;116.3206,39.93;116.32095,39.9298833333333;116.3212,39.92985;116.321383333333,39.9297833333333;116.321416666667,39.93;116.321583333333,39.9301333333333;116.321333333333,39.93075;116.321233333333,39.

In [8]:
p161_osrm_responses.to_csv('data/osrm/person161_osrm.csv', index=False)
p161_osrm_responses

Unnamed: 0,latitude,longitude,confidence,name,distance,duration
0,39.926732,116.319843,1.208043,首都体育馆南路,18.7,3.1
1,39.926900,116.319842,1.208043,首都体育馆南路,0.0,0.0
2,39.926900,116.319842,1.208043,首都体育馆南路,18.7,1.0
3,39.927068,116.319839,1.208043,首都体育馆南路,0.0,0.0
4,39.927068,116.319839,1.208043,首都体育馆南路,20.3,1.2
...,...,...,...,...,...,...
133,39.976884,116.328984,0.000000,星规路,93.4,10.4
134,39.976920,116.330076,0.000000,,183.6,26.5
135,39.975353,116.330435,0.000000,,11.8,4.2
136,39.975247,116.330439,0.000000,知春路,37.6,3.4


In [9]:
import folium
from folium.plugins import HeatMap
from folium.plugins import MousePosition

def plot_osrm_heatmap(gps_df, original_df):
    folium_map = folium.Map(location=[gps_df['latitude'].mean(), gps_df['longitude'].mean()], zoom_start=14)
    HeatMap(data=gps_df[['latitude', 'longitude']],
            gradient={0.4: 'blue', 0.65: 'cyan'},  
            blur=6, 
            radius=10
            ).add_to(folium_map)

    HeatMap(data=original_df[['lat', 'long']], 
            gradient={0.4: 'yellow', 0.8: 'red'}, 
            blur=6, 
            radius=8
            ).add_to(folium_map)
    MousePosition().add_to(folium_map)
    return folium_map

plot_osrm_heatmap(p161_osrm_responses, person161_kalman)

In [10]:
person161_kalman

Unnamed: 0,person,lat,long,zero,altitude,date_numb_days,date,time,cst_datetime,cst_weekday,lat_filtered,long_filtered,unix_time
0,161,39.926117,116.315750,0,646.325459,39199.088542,2007-04-27,02:07:30,2007-04-27 10:07:30+08:00,4,39.926099,116.315855,1177639650
1,161,39.926067,116.315967,0,646.325459,39199.088762,2007-04-27,02:07:49,2007-04-27 10:07:49+08:00,4,39.926067,116.316004,1177639669
2,161,39.926000,116.316200,0,646.325459,39199.088970,2007-04-27,02:08:07,2007-04-27 10:08:07+08:00,4,39.926034,116.316208,1177639687
3,161,39.926000,116.316417,0,646.325459,39199.089144,2007-04-27,02:08:22,2007-04-27 10:08:22+08:00,4,39.926061,116.316422,1177639702
4,161,39.926200,116.316667,0,646.325459,39199.089387,2007-04-27,02:08:43,2007-04-27 10:08:43+08:00,4,39.926198,116.316644,1177639723
...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,161,39.975283,116.330000,0,157.480315,39199.115856,2007-04-27,02:46:50,2007-04-27 10:46:50+08:00,4,39.975444,116.330049,1177642010
108,161,39.976133,116.330733,0,147.637795,39199.116065,2007-04-27,02:47:08,2007-04-27 10:47:08+08:00,4,39.975879,116.330528,1177642028
109,161,39.975900,116.330750,0,150.918635,39199.116157,2007-04-27,02:47:16,2007-04-27 10:47:16+08:00,4,39.975867,116.330712,1177642036
110,161,39.975767,116.330950,0,177.165354,39199.116366,2007-04-27,02:47:34,2007-04-27 10:47:34+08:00,4,39.975795,116.330841,1177642054


In [11]:
def format_json_file(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    formatted_data = json.dumps(data, indent=4)

    with open(file_path, 'w') as f:
        f.write(formatted_data)

# Call the function with the path to your JSON file
format_json_file('match_data_batch_1.json')

## Locally hosted matching attempts

In [2]:
import pandas as pd
import requests
from scripts.KalmanFilter import filter_person_and_date, time_segmentation, kalman_filtering

In [3]:
all_plt_data = pd.read_csv('../flask-app/static/data/all_plt_data.csv')

In [7]:
person161 = filter_person_and_date(all_plt_data, 161, '2007-04-27')
person161_segments = time_segmentation(person161, 60)

# person161_segments[0]

def format_for_osrm(df):
    # Convert datetime to Unix timestamp (seconds since epoch)
    df['unix_time'] = pd.to_datetime(df['cst_datetime']).astype('int64') // 10**9
    coords = ';'.join(f"{lon},{lat}" for lon, lat in zip(df['long'], df['lat']))
    timestamps = ';'.join(df['unix_time'].astype(str))
    return coords, timestamps

def send_osrm_match_request(coords, timestamps):
    url = f"http://127.0.0.1:9000/match/v1/foot/{coords}?steps=true&geometries=geojson&annotations=true&overview=full&timestamps={timestamps}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()  # Returns JSON directly
    else:
        # Raises an exception with the error status and message
        raise Exception(f"Request failed with status {response.status_code}: {response.text}")



# coords, timestamps = format_for_osrm(person161_segments[0])
# match_response = send_osrm_match_request(coords, timestamps)

# print(match_response)

Split indices: [0, 19, 25, 36, 37, 53, 92, 111]
Segment 0: 19 rows
Segment 1: 6 rows
Segment 2: 11 rows
Segment 3: 1 rows
Segment 4: 16 rows
Segment 5: 39 rows
Segment 6: 19 rows


In [9]:
import folium
from folium.plugins import MousePosition
import json
from shapely.geometry import shape
from scripts.MapVisualization import MapVisualization

def parse_osrm_match_response(response):
    # Extract the route geometries from the matchings
    matchings = response.get('matchings', [])
    road_snapped_coords = []

    for match in matchings:
        if 'geometry' in match:
            geom = shape(match['geometry'])  # Convert GeoJSON to Shapely geometry
            road_snapped_coords.extend(list(geom.coords))

    # Create DataFrame for the new coordinates
    snapped_df = pd.DataFrame(road_snapped_coords, columns=['snapped_long', 'snapped_lat'])
    return snapped_df

def add_vanilla_coords(map, original_df):
    folium.PolyLine(locations=original_df[['lat', 'long']], 
                    color="#3480eb",
                    weight=10, 
                    tooltip="Original GPS data").add_to(map)
    return map

def add_snapped_coords(map, snapped_df):
    folium.PolyLine(locations=snapped_df[['snapped_lat', 'snapped_long']], 
                    color="#FF0000",
                    weight=3, 
                    tooltip="Road snapped GPS data").add_to(map)
    return map


# Snap all segments to a road
snapped_dfs = []
for segment in person161_segments:
    coords, timestamps = format_for_osrm(segment)
    try:
        match_response = send_osrm_match_request(coords, timestamps)
        snapped_df = parse_osrm_match_response(match_response)
        snapped_dfs.append(snapped_df)
    except Exception as e:
        print(f"Failed to process segment: {e}")

# Plot the snapped dfs compared to original ones
folium_map = folium.Map(location=[person161['lat'].mean(), 
                                  person161['long'].mean()])

for person161_segment in person161_segments:
    folium.PolyLine(locations=person161_segment[['lat', 'long']], 
                    color="#3480eb",
                    weight=10, 
                    tooltip="Original GPS data").add_to(folium_map)

for snapped_df in snapped_dfs:
    # Convert DataFrame columns to list of [lat, long] pairs
    snapped_coord_list = snapped_df[['snapped_lat', 'snapped_long']].values.tolist()
    
    # Add polyline to the map
    folium.PolyLine(
        locations=snapped_coord_list,
        color="#FF0000",
        weight=3,
        tooltip="Road snapped GPS data"
    ).add_to(folium_map)

MousePosition(position="topright").add_to(folium_map)
folium_map.fit_bounds([[snapped_dfs[0]['snapped_lat'].min(), snapped_dfs[0]['snapped_long'].min()], 
                       [snapped_dfs[0]['snapped_lat'].max(), snapped_dfs[0]['snapped_long'].max()]])
folium_map

Failed to process segment: Request failed with status 400: {"message":"Could not match the trace.","code":"NoMatch"}
Failed to process segment: Request failed with status 400: {"message":"Number of coordinates needs to be at least two.","code":"InvalidOptions"}


## Note: Match service size limit
Can't send too many trace coordinates at once

In [10]:
coords, timestamps = format_for_osrm(person161)
match_response = send_osrm_match_request(coords, timestamps)
snapped_df = parse_osrm_match_response(match_response)

# Create a folium map
folium_map = folium.Map(location=[person161['lat'].mean(), person161['long'].mean()], zoom_start=14)

# Add the original GPS data to the map
folium.PolyLine(locations=person161[['lat', 'long']], 
                    color="#3480eb",
                    weight=10, 
                    tooltip="Original GPS data").add_to(folium_map)

# Add the road snapped GPS data to the map
folium.PolyLine(locations=snapped_df[['snapped_lat', 'snapped_long']], 
                    color="#FF0000",
                    weight=3, 
                    tooltip="Road snapped GPS data").add_to(folium_map)

MousePosition(position="topright").add_to(folium_map)
folium_map.fit_bounds([[snapped_df['snapped_lat'].min(), snapped_df['snapped_long'].min()],
                       [snapped_df['snapped_lat'].max(), snapped_df['snapped_long'].max()]])
folium_map

Exception: Request failed with status 400: {"message":"Too many trace coordinates","code":"TooBig"}

In [28]:
person161_segments[0]

Unnamed: 0,person,lat,long,zero,altitude,date_numb_days,date,time,cst_datetime,cst_weekday,time_diff,unix_time
0,161,39.926117,116.31575,0,646.325459,39199.088542,2007-04-27,02:07:30,2007-04-27 10:07:30+08:00,4,,1177639650
1,161,39.926067,116.315967,0,646.325459,39199.088762,2007-04-27,02:07:49,2007-04-27 10:07:49+08:00,4,19.0,1177639669
2,161,39.926,116.3162,0,646.325459,39199.08897,2007-04-27,02:08:07,2007-04-27 10:08:07+08:00,4,18.0,1177639687
3,161,39.926,116.316417,0,646.325459,39199.089144,2007-04-27,02:08:22,2007-04-27 10:08:22+08:00,4,15.0,1177639702
4,161,39.9262,116.316667,0,646.325459,39199.089387,2007-04-27,02:08:43,2007-04-27 10:08:43+08:00,4,21.0,1177639723
5,161,39.926383,116.316783,0,646.325459,39199.089502,2007-04-27,02:08:53,2007-04-27 10:08:53+08:00,4,10.0,1177639733
6,161,39.926383,116.317117,0,646.325459,39199.089826,2007-04-27,02:09:21,2007-04-27 10:09:21+08:00,4,28.0,1177639761
7,161,39.926433,116.317333,0,646.325459,39199.09,2007-04-27,02:09:36,2007-04-27 10:09:36+08:00,4,15.0,1177639776
8,161,39.9264,116.317517,0,646.325459,39199.090104,2007-04-27,02:09:45,2007-04-27 10:09:45+08:00,4,9.0,1177639785
9,161,39.926317,116.3177,0,646.325459,39199.090243,2007-04-27,02:09:57,2007-04-27 10:09:57+08:00,4,12.0,1177639797
