# Import Libraries

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
import numpy as np
import pandas as pd
import tqdm
import yaml
import glob
import os
import gc

from datetime import datetime
from IPython.display import clear_output

In [3]:
from src.components.feature_extraction import feature_extraction
from src.utils import get_root_directory

In [4]:
# Get root directory of the project
root_dir = get_root_directory()

In [5]:
# Maximize Column Display 
pd.set_option('display.max_colwidth', None)     # Display all content within each cell without truncation
pd.set_option('display.max_columns', None)      # Display all columns
pd.set_option('display.width', None)            # Display entire width of DataFrame is displayed

pd.set_option('display.max_rows', None)         # Display all rows

# Extract Traffic Signal Profile

## FDOT D5

In [6]:
# Configurations
signal_ids = [
    "1285", "1290",
    "1300", "1315", "1325", "1330", 
    "1455", "1470", "1490",
    "1500", "1555",
    "1707", "1725", "1790", "1795", 
    "1960",
    "2055", 
    "2485", 
    "2665", 
    # "D5I-3000"
]

In [7]:
signal_ids = ["1500"]

In [8]:
# for signal_id in signal_ids:
#     print(f"Processing Signal ID: {signal_id}")
#     print("=" * 40)

#     # Define the filepaths
#     filepaths = f"../data/interim/atspm/fdot_d5/event_data/{signal_id}/*.pkl"
#     filepaths = [p for p in glob.glob(filepaths)][1:]  # Exclude first file, if needed

#     # Extract dates from filepaths
#     dates = [os.path.basename(filepath).split(".")[0] for filepath in filepaths]

#     for date in dates:
#         print(f"  Processing Date: {date}")

#         try:
#             # Parse the date string into a datetime object
#             date_object = datetime.strptime(date, '%Y-%m-%d')

#             # Extract day, month, and year
#             day = date_object.day
#             month = date_object.month
#             year = date_object.year

#             # if not ((month == 6) and (day in [1, 2])):
#             #     continue

#             if month != 6:
#                 continue

#             # Extract Traffic Signal Profile
#             # traffic_signal_profile = feature_extraction.TrafficSignalProfile(day=day, 
#             #                                                                  month=month, 
#             #                                                                  year=year)

#             # df_vehicle_phase_profile_id = traffic_signal_profile.extract_vehicle_phase_profile(signal_id=signal_id)
#             # df_vehicle_cycle_profile_id = traffic_signal_profile.extract_vehicle_cycle_profile(signal_id=signal_id)

#             # df_pedestrian_phase_profile_id = traffic_signal_profile.extract_pedestrian_phase_profile(signal_id=signal_id)
#             # df_pedestrian_cycle_profile_id = traffic_signal_profile.extract_pedestrian_cycle_profile(signal_id=signal_id)

#             # # Extract signal features
#             # signal_feature_extract = feature_extraction.SignalFeatureExtract(day=day, 
#             #                                                                  month=month, 
#             #                                                                  year=year)

#             # print("   # Extracting SPaT")
#             # df_spat_id = signal_feature_extract.extract_spat(signal_id=signal_id)

#             # Extract traffic features
#             traffic_feature_extract = feature_extraction.TrafficFeatureExtract(day=day, 
#                                                                                month=month,
#                                                                                year=year)
#             # # Volume
#             # print("   # Extracting Volume")
#             # df_volume_id = traffic_feature_extract.extract_volume(signal_id=signal_id, 
#             #                                                       with_countbar=False)
            
#             # # Occupancy
#             # print("   # Extracting Occupancy")
#             # df_occupancy_id = traffic_feature_extract.extract_occupancy(signal_id=signal_id)

#             # # Split Failure
#             # print("   # Extracting Split Failure")
#             # df_split_failure_id = traffic_feature_extract.extract_split_failure(signal_id=signal_id, 
#             #                                                                     purdue_standard=True)

#             # # Headway
#             # print("   # Extracting Headway")
#             # df_headway_id = traffic_feature_extract.extract_headway(signal_id=signal_id)

#             # # Conflict
#             # print("   # Extracting Conflict")
#             # df_conflict_id = traffic_feature_extract.extract_conflict(signal_id=signal_id)

#             # # Gap
#             # print("   # Extracting Gap")
#             # df_gap_id = traffic_feature_extract.extract_gap(signal_id=signal_id)
            
#             # # Red Light Running
#             # print("   # Extracting Red Light Running")
#             # df_red_running_id = traffic_feature_extract.extract_red_running(signal_id=signal_id, 
#             #                                                                 with_countbar=False)

#             # # Pedestrian Activity
#             # print("   # Extracting Pedestrian Activity")
#             # df_pedestrian_activity_id = traffic_feature_extract.extract_pedestrian_activity(signal_id=signal_id)

#             # # Pedestrian Delay
#             # print("   # Extracting Pedestrian Delay")
#             # df_pedestrian_delay_id = traffic_feature_extract.extract_pedestrian_delay(signal_id=signal_id)

#             # # Vehicle-Pedestrian Conflict Intensity
#             # print("   # Extracting Vehicle-Pedestrian Conflict Intensity")
#             # df_turn_conflict_intensity_id = traffic_feature_extract.extract_turn_conflict_intensity(signal_id=signal_id)

#             print("\n")

#         except Exception as e:
#             print(f"Error Processing Date {date}: {e}")

#     # Explicitly call garbage collector
#     gc.collect()

#     # Clear output after processing each Signal ID
#     clear_output(wait=True)

### Join Data

In [11]:
# # Define the directory paths
# dirpaths = "../data/production/atspm/fdot_d5/feature_extraction/feature/*"
# dirpaths = [dirpath for dirpath in glob.glob(dirpaths)]

# # Initialize dictionary to hold DataFrames
# dict_join = {f"df_{os.path.basename(dirpath)}": pd.DataFrame() for dirpath in dirpaths}

# # Iterate through each directory path
# for dirpath in tqdm.tqdm(dirpaths):
#     print(dirpath)
#     key = os.path.basename(dirpath)  # Extract the last part of the path for the key
#     event_types = ["vehicle_signal", "vehicle_traffic", "pedestrian_traffic"]

#     for event_type in event_types:
#         event_path = f"{dirpath}/{event_type}"

#         # Check if the event path exists to avoid errors
#         if not os.path.exists(event_path):
#             continue

#         features = os.listdir(event_path)

#         for feature in features:
#             if feature != "gap":
#                 continue
#             feature_path = f"{event_path}/{feature}"

#             signal_ids = os.listdir(feature_path)

#             df = pd.DataFrame()
#             for signal_id in signal_ids:
#                 signal_path = f"{feature_path}/{signal_id}"
#                 filepaths = glob.glob(f"{signal_path}/*")

#                 # Read and concatenate all files for the current signal ID
#                 for filepath in filepaths:
#                     df = pd.concat([df, pd.read_pickle(filepath)], axis=0, ignore_index=True)

#             # Merge or concatenate with the corresponding DataFrame in dict_join
#             common_columns = list(set(df.columns).intersection(set(dict_join[f"df_{key}"].columns)))

#             if not common_columns:
#                 # If no common columns, concatenate along axis=1
#                 dict_join[f"df_{key}"] = pd.concat([dict_join[f"df_{key}"], df], axis=1)
#             else:
#                 common_columns = [
#                     common_column for common_column in common_columns if "channelNos" not in common_column
#                 ]
                
#                 # If common columns exist, perform a left merge
#                 dict_join[f"df_{key}"] = pd.merge(dict_join[f"df_{key}"], df, on=common_columns, how="left")

In [12]:
# for key in ["cycle", "hourly"]:
#     columns = [column for column in dict_join[f"df_{key}"].columns if "_" not in column]
#     dict_join[f"df_{key}"] = dict_join[f"df_{key}"][columns]

#     dict_join[f"df_{key}"].to_csv(f"../data/production/atspm/fdot_d5/feature_extraction/feature/{key}/{key}.csv", 
#                                   index=False)

## FDOT D7

In [None]:
# # Configurations
# signal_ids = [
#     "1067", "1068", "1301", "1392", "1435", "1439", "1445", "1501", "1506"
#  ]

In [None]:
# for signal_id in signal_ids:
#     print(f"Processing Signal ID: {signal_id}")
#     print("=" * 40)

#     # Define the filepaths
#     filepaths = f"../data/interim/atspm/fdot_d7/event_data/{signal_id}/*.pkl"
#     filepaths = [p for p in glob.glob(filepaths)][1:]  # Exclude first file, if needed

#     # Extract dates from filepaths
#     dates = [os.path.basename(filepath).split(".")[0] for filepath in filepaths]

#     for date in dates:
#         print(f"  Processing Date: {date}")

#         try:
#             # Parse the date string into a datetime object
#             date_object = datetime.strptime(date, '%Y-%m-%d')

#             # Extract day, month, and year
#             day = date_object.day
#             month = date_object.month
#             year = date_object.year

#             # Extract Traffic Signal Profile
#             traffic_signal_profile = feature_extraction.TrafficSignalProfile(day=day, 
#                                                                              month=month, 
#                                                                              year=year)
#             df_vehicle_phase_profile_id = traffic_signal_profile.extract_vehicle_phase_profile(signal_id=signal_id)
#             df_vehicle_cycle_profile_id = traffic_signal_profile.extract_vehicle_cycle_profile(signal_id=signal_id)

#             df_pedestrian_phase_profile_id = traffic_signal_profile.extract_pedestrian_phase_profile(signal_id=signal_id)
#             df_pedestrian_cycle_profile_id = traffic_signal_profile.extract_pedestrian_cycle_profile(signal_id=signal_id)

#             # Extract signal features
#             signal_feature_extract = feature_extraction.SignalFeatureExtract(day=day, 
#                                                                              month=month, 
#                                                                              year=year)

#             print("   # Extracting SPaT")
#             df_spat_id = signal_feature_extract.extract_spat(signal_id=signal_id)

#             # Extract traffic features
#             traffic_feature_extract = feature_extraction.TrafficFeatureExtract(day=day, 
#                                                                                month=month,
#                                                                                year=year)
#             # Volume
#             print("   # Extracting Volume")
#             df_volume_id = traffic_feature_extract.extract_volume(signal_id=signal_id, 
#                                                                   with_countbar=True)
            
#             # Occupancy
#             print("   # Extracting Occupancy")
#             df_occupancy_id = traffic_feature_extract.extract_occupancy(signal_id=signal_id)

#             # # Split Failure
#             # print("   # Extracting Split Failure")
#             # df_split_failure_id = traffic_feature_extract.extract_split_failure(signal_id=signal_id)
            
#             # Red Light Running
#             print("   # Extracting Red Light Running")
#             df_red_running_id = traffic_feature_extract.extract_red_running(signal_id=signal_id, 
#                                                                             with_countbar=True)

#         except Exception as e:
#             print(f"Error Processing Date {date}: {e}")

#     # Explicitly call garbage collector
#     gc.collect()

#     # Clear output after processing each Signal ID
#     clear_output(wait=True)