In [1]:
import os
import pandas as pd

from tqdm.auto import tqdm
from utils.utils import load_gps_data
from utils.find_travel_time import FindTravelTime

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
gps_data_dir = 'E:/data/gps'
save_path = ['E:/data/start_stop', 'E:/data/stop_start'] # change for your directory to save start_stop and stop_start datafrfame

In [4]:
def init_data_dict(roads_list):
    data_dict = {}
    for road in roads_list:
        data_dict[f"{road}_start_stop"] = []
        data_dict[f"{road}_stop_start"] = []

    return data_dict

def init_save_directory(save_path, roads_list):
    if not os.path.exists(os.path.join(save_path[0])):
        os.mkdir(save_path[0])
    if not os.path.exists(os.path.join(save_path[1])):
        os.mkdir(save_path[1])

    for road in roads_list:
        if not os.path.exists(os.path.join(save_path[0], f"{road}")):
            os.mkdir(os.path.join(save_path[0], f"{road}"))
        if not os.path.exists(os.path.join(save_path[1], f"{road}")):
            os.mkdir(os.path.join(save_path[1], f"{road}"))

def create_travel_time(gps_data_dir, save_path, month_start, month_stop, day_load=1):
    # init for load gps data function
    year = '2019'
    roads_list = [1, 2, 4, 7, 9, 32, 35, 41, 304, 331]
    days_count_list = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    months_list = [f"{i:02d}" for i in range(month_start, month_stop + 1)]
    
    # init for this function
    amount_day = 0
    
    # init data dict for save dataframe
    data_dict = init_data_dict(roads_list)
    init_save_directory(save_path, roads_list)
    
    for month in months_list:
        print(f"============= [month {month}] =============")
        days_list = [f"{i:02d}" for i in range(1, days_count_list[int(month) - 1] + 1)]
        for day in tqdm(days_list, desc='day: '):
            df_gps = load_gps_data(gps_data_dir, [day], [month], [year])
            find_travel_time = FindTravelTime(df_gps)
            
            for road in roads_list:
                find_travel_time.set_road(road)
                df_start_stop, df_stop_start = find_travel_time.find_travel_time_select_hour(1)
                data_dict[f"{road}_start_stop"].append(df_start_stop)
                data_dict[f"{road}_stop_start"].append(df_stop_start)

                # clear data in memory
                del df_start_stop
                del df_stop_start

            amount_day += 1

            # check amount of days that you want
            if amount_day >= day_load:
                for key, values in data_dict.items():
                    # print(f"save data {day}/{month}/{year} => {key}.csv")
                    road_id = key.split('_')[0]
                    df_save = pd.concat(values, axis='rows', ignore_index=True)   
                    if 'start_stop' in key:
                        df_save.to_csv(os.path.join(save_path[0], f"{road_id}/{month}_{day}_{year}.csv"), index=False)
                    else:
                        df_save.to_csv(os.path.join(save_path[1], f"{road_id}/{month}_{day}_{year}.csv"), index=False)
                    del df_save

                data_dict = init_data_dict(roads_list)
                amount_day = 0

            # clear data in memory
            del df_gps
            del find_travel_time

        if amount_day != 0:
            print(f"save the rest of the data!!")
            for key, values in data_dict.items():
                # print(f"save data {day}/{month}/{year} => {key}.csv")
                road_id = key.split('_')[0]
                df_save = pd.concat(values, axis='rows', ignore_index=True)   
                if 'start_stop' in key:
                    df_save.to_csv(os.path.join(save_path[0], f"{road_id}/{month}_{day}_{year}.csv"), index=False)
                else:
                    df_save.to_csv(os.path.join(save_path[1], f"{road_id}/{month}_{day}_{year}.csv"), index=False)
                del df_save
            
            data_dict = init_data_dict(roads_list)
            amount_day = 0
        
    print(f"=====> Create dataset complete. <=====")

In [6]:
create_travel_time(gps_data_dir, save_path, 5, 8)



day: 100%|██████████| 31/31 [51:17<00:00, 99.28s/it] 




day: 100%|██████████| 30/30 [58:15<00:00, 116.51s/it]




day: 100%|██████████| 31/31 [51:30<00:00, 99.68s/it]




day: 100%|██████████| 31/31 [49:58<00:00, 96.73s/it] 




