In [4]:
#Imports
import pandas as pd  # noqa: F401
import numpy as np  # noqa: F401
import duckdb  # noqa: F401
import matplotlib.pyplot as plt # noqa: F401
from geopy.distance import geodesic
from datetime import datetime, timedelta
import os # noqa: F401
import tqdm 
import folium
from folium.plugins import AntPath
from folium.plugins import HeatMap

# Generating

In [None]:
df = pd.read_parquet('sampled_data/20230327.parquet', columns= ['deviceid', 'date', 'time', 'lon', 'lat'])

df['datetime'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'].astype(str), format='%d.%m.%Y %H:%M:%S')

df = df.sort_values(by=['deviceid', 'datetime']).reset_index(drop=True)

# Thresholds
D_thres = 200  # meters

# DISTRIBUTION OF SPEEDS (SAME? FOR THE ZONES?).
# REMOVE OUTLIERS -> OPEN MOBILE CELL TOWER NETWORK PUBLIC MAP.
# DIFFERENT ALGS INSIDE TOWERS, (POSITION INSIDE OR CLOSE THE CELL TOWER).
# FOR EVERY POSITION, DISTANCE TO 3 CLOSETS TOWERS.
# BINS IN ZONES AREAS OF ZONES (MIN, MAX, AVG) (DISCRETIZE THE SPACE BY THE TYPE OF MOVEMENT).
# FURTHER REFINING. -> REMOVING THE NOISE.
# NOISE OF THE SYSTEM IS ON ANOTHER LEVEL.
# PEOPLE WHO START ON A ZONE THEN TRAVEL, DISTANCES, SPEEDS...
# DEPENDING ON THE LOCATION YOU CAN ASSUME THERE ARE MODES OF TRANSPORTATION.

T_thres = timedelta(minutes=20)
stay_points = []

# Process per device
for device_id, group in tqdm.tqdm(df.groupby('deviceid'), desc="Processing devices"):
    points = group[['datetime', 'lat', 'lon']].values
    i = 0
    while i < len(points):
        j = i + 1
        while j < len(points):
            dist = geodesic((points[i][1], points[i][2]), (points[j][1], points[j][2])).meters
            if dist > D_thres:
                delta_t = points[j][0] - points[i][0]
                if delta_t > T_thres:
                    lat_mean = group.iloc[i:j]['lat'].mean()
                    lon_mean = group.iloc[i:j]['lon'].mean()
                    stay_points.append({
                        'deviceid': device_id,
                        'arrival_time': points[i][0],
                        'leave_time': points[j][0],
                        'stay_lat': lat_mean,
                        'stay_lon': lon_mean,
                        'duration_min': delta_t.total_seconds() / 60
                    })
                break
            j += 1
        i = j

stay_df = pd.DataFrame(stay_points)

stay_df.to_parquet('stay_points.parquet', index=False)

## Mapping

In [27]:
def add_line_from_to(initial:list[int, int], final:list[int, int], m:folium.Map):
    # Use the folium AntPath plugin to draw a line between two points
    AntPath(
        locations=[initial, final],
        dash_array=[20, 20],
        delay=1000,
        color="#A00000",
        pulse_color="#A00000",
        weight=5,
        tooltip="From start to finish"
    ).add_to(m)
    
stay_points = pd.read_parquet('stay_points.parquet')
unique_devices = stay_points['deviceid'].unique()
unique_devices = unique_devices[:10]  # Limit to 10 devices for demonstration
os.makedirs('maps', exist_ok=True)
for device in unique_devices:
    device_df = stay_points[stay_points['deviceid'] == device]
    m = folium.Map(location=[device_df['stay_lat'].mean(), device_df['stay_lon'].mean()])
    for i in range(len(device_df)-1):
        initial = (device_df.iloc[i]['stay_lat'], device_df.iloc[i]['stay_lon'])
        final = (device_df.iloc[i+1]['stay_lat'], device_df.iloc[i+1]['stay_lon'])
        add_line_from_to(initial, final, m)
    m.save(f'maps/{device}.html')
def mapping(stay_points:pd.DataFrame) -> None:
    heat_map_geo = folium.Map(location=[stay_points['stay_lat'].mean(), stay_points['stay_lon'].mean()])
    heat_data = [[row['stay_lat'], row['stay_lon']] for index, row in stay_points.iterrows()]

    HeatMap(heat_data).add_to(heat_map_geo)
    heat_map_geo.save('maps/heat_map.html')

    heat_map_temp = folium.Map(location=[stay_points['stay_lat'].mean(), stay_points['stay_lon'].mean()])
    heat_data_temp = [[row['stay_lat'], row['stay_lon'], row['duration_min']] for index, row in stay_points.iterrows()]
    HeatMap(heat_data_temp, radius=15).add_to(heat_map_temp)
    heat_map_temp.save('maps/heat_map_temp.html')



In [28]:
import geopandas as gpd
url = 'maps/minimalist_coning.geojson'
# Add polygons to the map
gdf = gpd.read_file(url)
stay_gdf = gpd.GeoDataFrame(
    stay_points,
    geometry=gpd.points_from_xy(stay_points['stay_lon'], stay_points['stay_lat']),
    crs="EPSG:4326"
)
if gdf.crs != "EPSG:4326":
    gdf = gdf.to_crs(epsg=4326)

joined = gpd.sjoin(stay_gdf, gdf[['ID', 'geometry']], how="left", predicate="within")

# Rename the matched polygon ID as 'zone'
joined = joined.rename(columns={'ID': 'zone'})

# Now you have stay_points with an additional 'zone' column
stay_points_with_zone = joined.drop(columns=['geometry', 'index_right'])

display(stay_points_with_zone)

m = folium.Map(location=[46.2,14.5], zoom_start=12)

folium.GeoJson(
    gdf,
).add_to(m)

# Save the map to an HTML file
m.save('maps/minimalist_coning_map.html')

Unnamed: 0,deviceid,arrival_time,leave_time,stay_lat,stay_lon,duration_min,zone
0,00004ffb429cf710edc0412030866c3352e240919b399b...,2023-03-27 09:43:15,2023-03-27 10:07:35,46.249851,14.358830,24.333333,403
1,00009f53f961b9d0b605fdad2c5591a5c9daa972029123...,2023-03-27 02:58:20,2023-03-27 06:17:38,46.063030,14.577990,199.300000,1784
2,00009f53f961b9d0b605fdad2c5591a5c9daa972029123...,2023-03-27 07:30:30,2023-03-27 08:31:24,46.053051,14.504079,60.900000,468
3,00009f53f961b9d0b605fdad2c5591a5c9daa972029123...,2023-03-27 08:46:53,2023-03-27 10:09:02,46.129570,14.558150,82.150000,1685
4,00009f53f961b9d0b605fdad2c5591a5c9daa972029123...,2023-03-27 10:19:02,2023-03-27 10:50:18,46.053051,14.504080,31.266667,468
...,...,...,...,...,...,...,...
2098163,ffffd05c6bad1e20dea241138288bc056615d34aceab4d...,2023-03-27 06:38:05,2023-03-27 07:21:48,46.239681,14.369960,43.716667,401
2098164,ffffd05c6bad1e20dea241138288bc056615d34aceab4d...,2023-03-27 07:21:48,2023-03-27 09:33:49,46.242779,14.375050,132.016667,397
2098165,ffffd05c6bad1e20dea241138288bc056615d34aceab4d...,2023-03-27 09:33:49,2023-03-27 10:24:33,46.243881,14.370830,50.733333,397
2098166,ffffd05c6bad1e20dea241138288bc056615d34aceab4d...,2023-03-27 10:26:27,2023-03-27 13:23:23,46.248562,14.371650,176.933333,396
