In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
import json
import osmnx as ox

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%pwd

'/home/cseadmin/dz/TrafficFlowModel/data_process/gis_pipeline'

In [2]:
DATA_PATH = "../../data/"
TAXI_DATA_PATH = "../../data/taxi_after_proc/clean202006"
DATASET = "sz_taxi_202006"

MIN_LAT = 22.5311
MAX_LAT = 22.5517
MIN_LNG = 114.0439
MAX_LNG = 114.0633

START_DAY = 1
END_DAY = 30

DOWNSAMPLING_INTERVAL = 5 #s
TRAJ_SPLIT_INTERVAL = 600
FLOW_AGG_INTERVAL_MINUTE = 5

def contains(lat, lng):
    return lat >= MIN_LAT and lat <= MAX_LAT and lng >= MIN_LNG and lng <= MAX_LNG

def notify(msg):
    import datetime
    channel = "J0budaR2THarZw0OqS5O"
    notify_url = f"https://notify.run/{channel}"
    massage = f"{msg} | {str(datetime.datetime.now())}"
    os.system(f'curl {notify_url} -d "{massage}"')

In [3]:
for taxi_file in os.listdir(TAXI_DATA_PATH):
    df_taxi = pd.read_pickle(os.path.join(TAXI_DATA_PATH, taxi_file))
    break

df_taxi

Unnamed: 0,lat,lng,gps_time,speed
4556480,22.621574,114.156810,2020-06-08 13:32:15,7
4579770,22.621704,114.156600,2020-06-08 13:32:36,15
4601596,22.622225,114.155785,2020-06-08 13:32:56,16
4604167,22.622246,114.155690,2020-06-08 13:32:58,17
4910696,22.622255,114.155480,2020-06-08 13:33:03,12
...,...,...,...,...
3410211,22.569810,114.117760,2020-06-08 23:55:54,63
3429444,22.572780,114.117880,2020-06-08 23:56:13,58
3458226,22.577087,114.116590,2020-06-08 23:56:43,60
3467801,22.578468,114.115870,2020-06-08 23:56:53,62


In [None]:
gps_file = open(os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "gps.csv"), "w")
trash = gps_file.write("id;x;y;time;speed\n")

timedelta_downsampling = pd.Timedelta(seconds=DOWNSAMPLING_INTERVAL)
timedelta_traj_split = pd.Timedelta(seconds=TRAJ_SPLIT_INTERVAL)

traj_counter = 0
for taxi_file in tqdm(sorted(os.listdir(TAXI_DATA_PATH))):
    date = int(taxi_file.split("_")[0].split("-")[1])
    if date < START_DAY or date > END_DAY:
        continue
    if os.path.getsize(os.path.join(TAXI_DATA_PATH, taxi_file)) < 100:
        continue
    # df_taxi = pd.read_csv(os.path.join(TAXI_DATA_PATH, taxi_file), parse_dates=["gps_time"])
    df_taxi = pd.read_pickle(os.path.join(TAXI_DATA_PATH, taxi_file))
    if df_taxi.empty:
        continue

    line_buffer = []
    last_time = df_taxi.iloc[0]["gps_time"] + pd.Timedelta(seconds=-TRAJ_SPLIT_INTERVAL)
    for row in df_taxi.itertuples():
        if not contains(row[1], row[2]):
            continue
        if row[3] - last_time < timedelta_downsampling:  # resample: drop <30s
            continue
        if row[4] > 60: # drop speed > 60km/h
            continue
        if row[3] - last_time > timedelta_traj_split:
            if len(line_buffer) > 1:  # only store length>1 traj
                trash = gps_file.write("".join(line_buffer))
                traj_counter += 1
            line_buffer = []

        last_time = row[3]

        line_buffer.append(f"{traj_counter};{row[2]};{row[1]};{row[3]};{row[4]}\n")

gps_file.close()

notify("Finish fmm dataset.")

 15%|█▍        | 92431/617790 [08:11<39:16, 222.95it/s]  

In [None]:
# https://github.com/cyang-kth/fmm/issues/166
# https://github.com/cyang-kth/fmm/blob/master/example/osmnx_example/README.md

os.system(
    "ubodt_gen --network {} --network_id fid --source u --target v --output {} --delta 0.03 --use_omp"
    .format(
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "edges.shp"),
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "ubodt.txt")))
os.system(
    "fmm --ubodt {} --network {} --network_id fid --source u --target v --gps {} --gps_point -k 8 -r 0.003 -e 0.0005 --output {} --use_omp --output_fields id,opath,cpath,mgeom > {} 2>&1"
    .format(
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "ubodt.txt"),
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "edges.shp"),
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "gps.csv"),
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "mr.txt"),
        os.path.join(DATA_PATH, DATASET, f"fmm_{DATASET}", "fmm.log")))