In [1]:
import pandas as pd
import numpy as np
import multiprocessing as mp
from math import radians, sin, cos, sqrt, atan2
import folium
from folium.plugins import HeatMap

In [2]:
df=pd.read_csv("combined_trajectories.csv")

In [3]:
df.head()

Unnamed: 0,latitude,longitude,altitude,date,time,individual_id,trajectory_id
0,39.984702,116.318417,492.0,2023-10-23,02:53:04,1,1
1,39.984683,116.31845,492.0,2023-10-23,02:53:10,1,1
2,39.984686,116.318417,492.0,2023-10-23,02:53:15,1,1
3,39.984688,116.318385,492.0,2023-10-23,02:53:20,1,1
4,39.984655,116.318263,492.0,2023-10-23,02:53:25,1,1


In [4]:
def calculate_distance(lat1, lon1, lat2, lon2):
    R = 6371.0 # approximate radius of earth in km

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    return distance



In [5]:
def calculate_total_distance(user_data):
    distances = []
    for i in range(len(user_data)-1):
        lat1 = user_data.iloc[i]['latitude']
        lon1 = user_data.iloc[i]['longitude']
        lat2 = user_data.iloc[i+1]['latitude']
        lon2 = user_data.iloc[i+1]['longitude']
        distances.append(calculate_distance(lat1, lon1, lat2, lon2))
    return np.sum(distances)

In [6]:
def calculate_all_total_distances(df):
    users = df['individual_id'].unique()
    pool = mp.Pool(mp.cpu_count())
    results = [pool.apply_async(calculate_total_distance, args=(df[df['individual_id']==user],)) for user in users]
    total_distances = [result.get() for result in results]
    pool.close()
    return pd.DataFrame({'individual_id': users, 'total_distance': total_distances})


In [None]:
total_distances = calculate_all_total_distances(df)
print(total_distances)

In [7]:
# Define the bounding box around Beijing city
beijing_box = [39.4437, 116.0886, 41.055, 117.5147]

# Filter the GPS df to only contain trips within Beijing city
beijing_df = df[(df['latitude'] >= beijing_box[0]) &
                    (df['latitude'] <= beijing_box[2]) &
                    (df['longitude'] >= beijing_box[1]) &
                    (df['longitude'] <= beijing_box[3])]

# Create a heatmap to visualize the density of GPS points in different regions of the city
beijing_heatmap = folium.Map(location=[39.9042, 116.4074], zoom_start=10)

HeatMap(beijing_df[['latitude', 'longitude']].values, radius=10).add_to(beijing_heatmap)

beijing_heatmap.save('beijing_heatmap.html')
