In [None]:
# GPS Geospatial Analysis
Objective: clean GPS points, visualize route, compute simple segment analytics.
Data: `../assets/data/gps_sample.csv`


In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

# load
df = pd.read_csv('../assets/data/gps_sample.csv', parse_dates=['timestamp'])
df = df.dropna(subset=['latitude', 'longitude'])
df.head()


In [None]:
# create GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=[Point(xy) for xy in zip(df.longitude, df.latitude)], crs="EPSG:4326")
gdf.to_file("gps_points.geojson", driver="GeoJSON")  # optional export
gdf.plot(figsize=(10,6))
plt.title("GPS Points - Route")
plt.show()


In [None]:
# simple segmenting: compute distance between consecutive points (great-circle approx via haversine)
import numpy as np

def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # km
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    dphi = np.radians(lat2-lat1)
    dlambda = np.radians(lon2-lon1)
    a = np.sin(dphi/2)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(dlambda/2)**2
    return 2*R*np.arcsin(np.sqrt(a))

df['prev_lat'] = df['latitude'].shift()
df['prev_lon'] = df['longitude'].shift()
df = df.dropna(subset=['prev_lat'])
df['dist_km'] = df.apply(lambda r: haversine(r.prev_lat, r.prev_lon, r.latitude, r.longitude), axis=1)
df['time_diff_s'] = (df['timestamp'] - df['timestamp'].shift()).dt.total_seconds().fillna(0)
df['computed_speed_kmph'] = df['dist_km'] / (df['time_diff_s'] / 3600)
df[['timestamp','latitude','longitude','dist_km','computed_speed_kmph']].head()


In [None]:
# Plot speed over time
plt.figure(figsize=(12,4))
plt.plot(df['timestamp'], df['computed_speed_kmph'])
plt.ylabel("Speed (km/h)")
plt.xlabel("Timestamp")
plt.title("Computed Speed Over Time")
plt.show()


In [None]:
## Insights
- Show average speed, number of stops, and any anomalous segments.
