In [21]:
import pandas as pd
import numpy as np
import datetime

In [26]:
# load raw data 
row_GPS_df = pd.read_csv('2020_10_01.csv')

row_GPS_df.columns = ['SupplierID', 'tripID', 'routeID', 'timestamp', 'sequence number', 'lat', 'lon', 'heading', 'speed', 'modality', 'accuracy', 'waypoint']
#row_GPS_df['timestamp'] = row_GPS_df['timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(x))
row_GPS_df.head(3)

Unnamed: 0,SupplierID,tripID,routeID,timestamp,sequence number,lat,lon,heading,speed,modality,accuracy,waypoint
0,Tracefy,c16de6ff-e7c2-4103-9b44-953a5be48986,0,1601596742,2,51.983225,5.903753,59.0,36.2,2,9.5,
1,Tracefy,c16de6ff-e7c2-4103-9b44-953a5be48986,0,1601596772,3,51.983417,5.909458,77.0,42.7,2,9.5,
2,Tracefy,fd2ff301-5a9b-4416-98aa-f6ecbcb97ab3,0,1601596684,1,52.354713,4.784918,56.0,0.0,2,7.7,


In [23]:
# Extract the required columns from the data
trip_data = row_GPS_df[['tripID', 'lat', 'lon']]

# remove rows with missing values
trip_data = trip_data.dropna()

# get the last 5000 rows of the data
#trip_data = trip_data.tail(5000)

In [24]:
# plot trajectories

import plotly.graph_objects as go

# Create an empty figure
fig = go.Figure()

# Iterate over each row in the dataset
for trip_id, coordinates in trip_data.groupby('tripID')[['lat', 'lon']]:
    lats, lons = coordinates['lat'], coordinates['lon']
    
    # Add marker for each coordinate
    fig.add_trace(go.Scattermapbox(
        lat=lats,
        lon=lons,
        mode='markers',
        marker=dict(size=5),
        name=trip_id
    ))

    # Add line connecting coordinates of the same trip ID
    if len(coordinates) > 1:
        fig.add_trace(go.Scattermapbox(
            lat=lats,
            lon=lons,
            mode='lines',
            line=dict(width=1),
            name=trip_id
        ))

# Update layout
fig.update_layout(
    mapbox_style="carto-positron",
    mapbox_zoom=6.5,
    # Center to show the desired region
    mapbox_center=dict(lat=52, lon=5),
    showlegend=False,
    width=800,  # Adjust the width of the plot
    height=600  # Adjust the height of the plot
)

# Save the plot as an HTML file with reduced dimensions
fig.write_html('trajectory_map_test.html', include_plotlyjs='cdn', full_html=False)


In [32]:
# calculate row OD pairs

def identify_origin_and_destination(dataframe):
    origin_list = []
    destination_list = []
    origin_destination_pairs = []

    # Group the data by tripID
    grouped_trips = dataframe.groupby('tripID')

    # Iterate over each trip group
    for trip_id, trip_data in grouped_trips:
        # Sort the trip data by timestamp
        sorted_trip_data = trip_data.sort_values('timestamp')

        # Get the origin (earliest timestamp) and destination (latest timestamp)
        origin = (sorted_trip_data.iloc[0]['lat'], sorted_trip_data.iloc[0]['lon'])
        destination = (sorted_trip_data.iloc[-1]['lat'], sorted_trip_data.iloc[-1]['lon'])

        # Append the origin and destination to the respective lists
        origin_list.append(origin)
        destination_list.append(destination)

        # Append the (origin, destination) pair to the list
        origin_destination_pairs.append((origin, destination))

    return origin_list, destination_list, origin_destination_pairs


origin_list, destination_list, origin_destination_pairs = identify_origin_and_destination(row_GPS_df)

In [40]:
print(origin_list[0])
print(destination_list[0])
print(origin_destination_pairs[0])

(51.5620133, 5.076075)
(51.5675633, 5.0941933)
((51.5620133, 5.076075), (51.5675633, 5.0941933))


In [56]:
# plot origins and destinations on the map

# Create an empty figure
fig = go.Figure()

# Iterate over each origin-destination pair
for origin, destination in origin_destination_pairs[:1000]:
    # Add marker for the origin coordinate
    fig.add_trace(go.Scattermapbox(
        lat=[origin[0]],
        lon=[origin[1]],
        mode='markers',
        marker=dict(size=10, color='blue', opacity=0.7),
        name="Origin"
    ))

    # Add marker for the destination coordinate
    fig.add_trace(go.Scattermapbox(
        lat=[destination[0]],
        lon=[destination[1]],
        mode='markers',
        marker=dict(size=10, color='red', opacity=0.7),
        name="Destination"
    ))

# Update layout
fig.update_layout(
    mapbox_style="carto-positron-nolabels",  # Example: Using the "white-bg" map style
    mapbox_zoom=6.5,
    # Center to show the desired region
    mapbox_center=dict(lat=52, lon=5),
    showlegend=False,
    # width=600,  # Adjusted width
    # height=400  # Adjusted height
)

# Save the plot as an HTML file with reduced dimensions
fig.write_html('origin_destination_map.html', include_plotlyjs='cdn', full_html=False)

# show figure
fig.show()