# GPS Probe mapping for many .csv input files in  <raw_dir> within the date range <start_date>  and <end_date>. 
### The csv file names must indicate the dates.
### For working with single input file use gpsProbeMatching.ipynb

In [1]:
import warnings
warnings.filterwarnings("ignore")
from datetime import datetime ,date

import sys,os
sys.path.append('py/') 

import shutil
from functions import initialize, get_points_within_target_region, df2gdf, plot_map, check_dir, check_dir, get_all_files_from_dir
from preprocess import preprocess_data
from map_matching import map_match_csv2gpx, map_match_csv2gpx_multithread
from generate_route_by_pyroutelib import generate_osm_routes_main
from config import max_threads, OUTPUT_DIR, INPUT_DIR, map_matched_gps_probe

from get_raw_files import get_raw_files_main

### Define Input and Output locations

In [2]:
raw_dir = '/home/bidur/map_match_gps_data/raw_phl'
mapmatching_input_dir = '/home/bidur/map_match_gps_data/raw_phl/phl_sample_clean/'
backup_dir ='/home/bidur/map_match_gps_data/raw_phl/bak/'


### CSV prepare for input to  map-matching
__Assumptions:__
- start_date < end_date
- raw input files are named with date in __yyyymmdd__ format. Ex. __NPL_data_20190404.csv__

- original csv files from *raw_dir* contains many fields.
- Interesting fields in each csv are got via: 
         cut -d ',' -f 1,2,3,4,6,7,8,9 NPL_data_20190404.csv >  NPL_20190704.csv
- timestamp field is creadted by joining different fileds using *py/get_raw_files.py*
    - csv files with desired fields *(ap_id, timestamp, latitude, longitude)* are saved at : *mapmatching_input_dir*


Files from __raw_dir__ are processed and saved in __mapmatching_input_dir__

In [3]:

#raw_dir = '/mnt/lv/heromiya/PHLMobilityData/'    ## original raw file location
#mapmatching_input_dir = '/mnt/lv/bidur/PHL_raw_data/clean_input/test/' ## prepare csv in required format(ap_id, ) for map-matching


start_date = date(2019, 4, 4) # NPL_data_20190404.csv
end_date = date(2019, 4, 8)
    
get_raw_files_main(raw_dir, mapmatching_input_dir, start_date , end_date)    
  


 The following files are prepared: 
csv preparation for map-matching input complete


csv from __mapmatching_input_dir__ are processed and final csv are saved in __backup_dir__

In [4]:

#arr_input_csv = get_all_files_from_dir(mapmatching_input_dir)
arr_input_csv= [
    '/home/bidur/map_match_gps_data/raw_phl/2file.csv'
                #'/home/bidur/map_match_gps_data/raw_phl/2222.csv',
               # '/home/bidur/map_match_gps_data/raw_phl/1643_1.csv'
               ]
print(arr_input_csv)
for gps_csv in arr_input_csv:
      
    '''
    if '20190203' in gps_csv:
        continue
    '''
    
    print(gps_csv, 'START ', str( datetime.now() ))
 
    # 1. remove old data and create necessary directories
    initialize()

    # 2. ananymize ap_id column to int value ,   clip points within boundary
    gdf_probe_clipped, gdf_target = get_points_within_target_region (gps_csv, anonymize=False, display_plot = False)
    #gdf_probe_clipped, gdf_target = get_points_within_target_region (gps_csv, anonymize=True, display_plot = False)
    
    #print('----2 done----')
    
    
    # 3. Preprocess: cleaning data & applying sampling
    df_sample = preprocess_data()
    #print('----3 done----')

    # 4. map matching with osm roads using graphhopper
    df_mapped_route = map_match_csv2gpx_multithread(df_sample) # multithreaded
    
    #Use this for normal execution without multi thread ( IF user permission do not allow multi-threading):
    #df_mapped_route = map_match_csv2gpx(df_sample)
    #print('----4 done----')
    
        
    # 5. save final csv to backup dir
    check_dir(backup_dir)# create directory if not exit
    csv_path, csv_name = os.path.split(gps_csv)
    final_data_path = backup_dir+csv_name
    shutil.copyfile(map_matched_gps_probe,final_data_path)
    
    print(gps_csv, 'END ', str( datetime.now() ))
    print('Saved at: ', final_data_path)
    print('__________________________________________________________________')
    

print("ALL TASK COMPLETED!!")

['/home/bidur/map_match_gps_data/raw_phl/2file.csv']
/home/bidur/map_match_gps_data/raw_phl/2file.csv START  2020-11-16 09:58:33.708343
6443 JUMP >  threshold(km/hr): 100 time taken(hr): 0.0011111111111111111  dist_covered: 0.35807186207479186
ap_id (all): 2  ap_id (No JUMPS): 1

completed:  java -jar matching-web/target/graphhopper-map-matching-web-1.0-SNAPSHOT.jar import map-data/philippines-latest.osm.pbf
<< multithreaded_process() -> START 2020-11-16 09:59:52.170761
Route Points cannot be generated as all the selected ap_id's have very few input data points


PLEASE PROVIDE NEW INPUT WITH SUFFICIENT DATA 


1  csv file prepared and saved in  /home/bidur/map_match_gps_data/input/csv/1
< apply_map_matching_multithread() >
Current Working Directory  /home/bidur/map_match_gps_data/map-matching-master
GPX_DIR:  /home/bidur/map_match_gps_data/map-matching-master/matching-web/src/test/resources/target/1

 Thread #1 completed:  java -jar matching-web/target/graphhopper-map-matching-web-1.0

In [5]:
%tb

No traceback available to show.


#### Output summary for a sample inout for 20190703
- about 9 hr 30 min 
- 10% = 525 ap_ids
- 411 ap_ids in final output 