# Import Libraries

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
import numpy as np
import pandas as pd

import tqdm
import glob
import os

In [3]:
# Maximize column display 
pd.set_option('display.max_colwidth', None)     # Display all content within each cell without truncation
pd.set_option('display.max_columns', None)      # Display all columns
pd.set_option('display.width', None)            # Display entire width of DataFrame is displayed

pd.set_option('display.max_rows', None)         # Display all rows

# Load Data

In [4]:
signal_id = "1435"

In [5]:
# Load and merge all data
filepaths = f"../data/production/atspm/fdot_d7/feature_extraction/feature/cycle/vehicle_signal/spat/{signal_id}/*.pkl"
filepaths = [p for p in glob.glob(filepaths)]

# get dates
dates = [os.path.basename(filepath).split(".")[0] for filepath in filepaths]

df_data = pd.DataFrame()
for date in tqdm.tqdm(dates):
    df_spat_date = pd.read_pickle(
        f"../data/production/atspm/fdot_d7/feature_extraction/feature/cycle/vehicle_signal/spat/1435/{date}.pkl"
    )
    df_red_light_running_date = pd.read_pickle(
        f"../data/production/atspm/fdot_d7/feature_extraction/feature/cycle/vehicle_traffic/red_light_running/1435/{date}.pkl"
    )

    df_data_date = pd.merge(df_spat_date, df_red_light_running_date, 
                            on=["signalID", "cycleNo", "date", "cycleBegin", "cycleEnd", "cycleLength"])

    df_data = pd.concat([df_data, df_data_date], axis=0, ignore_index=True)

print(df_data.shape)
df_data.head(1)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 92.64it/s]

(16685, 54)





Unnamed: 0,signalID,cycleNo,cycleBegin,cycleEnd,cycleLength,greenRatioPhase1,yellowRatioPhase1,redClearanceRatioPhase1,redRatioPhase1,greenRatioPhase2,yellowRatioPhase2,redClearanceRatioPhase2,redRatioPhase2,greenRatioPhase4,yellowRatioPhase4,redClearanceRatioPhase4,redRatioPhase4,greenRatioPhase5,yellowRatioPhase5,redClearanceRatioPhase5,redRatioPhase5,greenRatioPhase6,yellowRatioPhase6,redClearanceRatioPhase6,redRatioPhase6,greenRatioPhase7,yellowRatioPhase7,redClearanceRatioPhase7,redRatioPhase7,greenRatioPhase8,yellowRatioPhase8,redClearanceRatioPhase8,redRatioPhase8,greenRatioPhase3,yellowRatioPhase3,redClearanceRatioPhase3,redRatioPhase3,date,redClearanceRunCntPhase1,redClearanceRunFlagPhase1,redRunCntPhase1,redRunFlagPhase1,redClearanceRunCntPhase2,redClearanceRunFlagPhase2,redRunCntPhase2,redRunFlagPhase2,redClearanceRunCntPhase5,redClearanceRunFlagPhase5,redRunCntPhase5,redRunFlagPhase5,redClearanceRunCntPhase6,redClearanceRunFlagPhase6,redRunCntPhase6,redRunFlagPhase6
0,1435,2,2024-01-01 00:02:38.300,2024-01-01 00:05:38.400,180.1,0.055,0.0305,0.0111,0.9034,0.3609,0.0305,0.0111,0.5974,0.2499,0.0267,0.0117,0.7118,0.0527,0.0305,0.0111,0.9056,0.3631,0.0305,0.0111,0.5952,0.1388,0.0244,0.0111,0.8257,0.2882,0.0267,0.0117,0.6735,0.1749,0.0267,0.0111,0.7873,2024-01-01,[0],0,[1],1,"[0, 0, 0]",0,"[1, 0, 0]",1,[0],0,[1],1,"[0, 0, 0]",0,"[1, 0, 0]",1


In [6]:
# Check for missing values in data
print("\nMissing Values in Data:")
print(df_data.isnull().sum().sum())


Missing Values in Data:
0


In [7]:
dates = df_data.date.unique().tolist()
len(dates)

30

# Data Processing

In [8]:
# df_data.columns.values

In [9]:
# Remove columns with red light running counts
columns = [column for column in df_data.columns if "Cnt" not in column]

df_data = df_data[columns]
df_data.shape

(16685, 46)