In [4]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
import json
import osmnx as ox

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%pwd

'/home/cseadmin/data/cys/TrafficFlowModel/data_process'

In [5]:
minlat=22.5310
maxlat=22.5397
minlng=114.0442
maxlng=114.0633

def contains(lat, lng):
    return lat>=minlat and lat<=maxlat and lng>=minlng and lng<=maxlng

In [6]:
# Mon. to Fri.
gps_file=open("../data/fmm_data/gps.csv", "w")
write_length=gps_file.write("id;x;y;time\n") # disable printing return value

timedelta_30=pd.Timedelta(seconds=30)
timedelta_600=pd.Timedelta(seconds=600)

traj_counter=0 # this is global now
for taxi_file in tqdm(sorted(os.listdir("../data/taxi_after_proc/merged"))):
    date=int(taxi_file.split("_")[0].split("-")[1])
    if date<2 or date>6:
        continue
    if os.path.getsize(f"../data/taxi_after_proc/merged/{taxi_file}")<100:
        continue
    df_taxi=pd.read_csv(f"../data/taxi_after_proc/merged/{taxi_file}", parse_dates=["gps_time"])
    if df_taxi.empty:
        continue
    
    line_buffer=[]
    last_time=df_taxi.iloc[0]["gps_time"]+pd.Timedelta(seconds=-600)
    for row in df_taxi.itertuples():
        if not contains(row[1], row[2]):
            continue
        if row[3]-last_time<timedelta_30: # resample: drop <30s
            continue
        if row[3]-last_time>timedelta_600:
            if len(line_buffer)>1: # only store length>1 traj
                write_length=gps_file.write("".join(line_buffer))
                traj_counter+=1
            line_buffer=[]
            
        last_time=row[3]
        
        line_buffer.append(f"{traj_counter};{row[2]};{row[1]};{row[3]}\n")
        
gps_file.close()

  8%|▊         | 21010/252072 [00:00<00:00, 383130.56it/s]


In [3]:
%%bash

# https://github.com/cyang-kth/fmm/issues/166
# https://github.com/cyang-kth/fmm/blob/master/example/osmnx_example/README.md
ubodt_gen --network ../data/fmm_data/edges.shp --network_id fid --source u --target v --output ../data/fmm_data/ubodt.txt --delta 0.03 --use_omp

[info][ubodt_gen_app_config.cpp:42 ] Start reading ubodt configuration from arguments
[info][ubodt_gen_app_config.cpp:70 ] Finish with reading ubodt arg configuration
[info][ubodt_gen_app_config.cpp:74 ] ----    Print configuration   ----
[info][network_config.cpp:6  ] NetworkConfig
[info][network_config.cpp:7  ] File name: ../data/fmm_data/edges.shp 
[info][network_config.cpp:8  ] ID name: fid 
[info][network_config.cpp:9  ] Source name: u 
[info][network_config.cpp:10 ] Target name: v 
[info][ubodt_gen_app_config.cpp:76 ] Delta 0.03
[info][ubodt_gen_app_config.cpp:77 ] Output file ../data/fmm_data/ubodt.txt
[info][ubodt_gen_app_config.cpp:78 ] Log level 2-info
[info][ubodt_gen_app_config.cpp:79 ] Use omp true
[info][ubodt_gen_app_config.cpp:80 ] ---- Print configuration done ----
[info][ubodt_gen_app_config.cpp:97 ] Validating configuration for UBODT construction
[info][ubodt_gen_app_config.cpp:118] Validating done.
[info][network.cpp:72 ] Read network from file ../data/fmm_data/edge

bash: line 1: fg: no job control
ERROR 4: Unable to open EPSG support file gcs.csv.  Try setting the GDAL_DATA environment variable to point to the directory containing EPSG csv files.


In [10]:
%%bash

nohup fmm --ubodt ../data/fmm_data/ubodt.txt --network ../data/fmm_data/edges.shp --network_id fid --source u --target v --gps ../data/fmm_data/gps.csv --gps_point -k 8 -r 0.003 -e 0.0005 --output ../data/fmm_data/mr.txt --use_omp --output_fields id,opath,cpath,mgeom > ../data/fmm_data/fmm.log 2>&1