# Import Libraries

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
import numpy as np
import pandas as pd
import psutil
import tqdm
import yaml
import glob
import os
import gc

from datetime import datetime
from multiprocessing import Pool
from IPython.display import clear_output

In [3]:
from src.components.feature_extraction import data_quality_check
from src.utils import get_root_directory

In [4]:
# Get root directory of the project
root_dir = get_root_directory()

In [5]:
# Maximize Column Display 
pd.set_option('display.max_colwidth', None)     # Display all content within each cell without truncation
pd.set_option('display.max_columns', None)      # Display all columns
pd.set_option('display.width', None)            # Display entire width of DataFrame is displayed

pd.set_option('display.max_rows', None)         # Display all rows

In [6]:
# Get the current CPU usage as a percentage
cpu_usage = psutil.cpu_percent(interval=1)  # Interval of 1 second
print(f"Current CPU usage: {cpu_usage}%")

# Get the per-core usage
cpu_per_core = psutil.cpu_percent(interval=1, percpu=True)
print(f"CPU usage per core: {cpu_per_core}")

# Get the total number of cores
cpu_cores = psutil.cpu_count()
print(f"Total CPU cores: {cpu_cores}")

Current CPU usage: 17.2%
CPU usage per core: [0.0, 100.0, 2.0, 0.0, 2.0, 1.0, 9.1, 1.0, 1.0, 0.0, 0.0, 1.0]
Total CPU cores: 12


# Extract Traffic Signal Profile

## FDOT D5

In [7]:
# Configurations
signal_ids = [
    "1285", "1290",
    "1300", "1315", "1325", "1330", 
    "1455", "1470", "1490",
    "1500", "1555",
    "1707", "1725", "1790", "1795", 
    "1960",
    "2055", 
    "2485", 
    "2665", 
    # "D5I-3000"
]

In [8]:
for signal_id in signal_ids:
    print(f"Processing Signal ID: {signal_id}")
    print("=" * 40)

    # Define the filepaths
    filepaths = f"../data/interim/atspm/fdot_d5/event_data/{signal_id}/*.pkl"
    filepaths = [p for p in glob.glob(filepaths)][1:]  # Exclude first file, if needed

    # Extract dates from filepaths
    dates = [os.path.basename(filepath).split(".")[0] for filepath in filepaths]

    for date in dates:
        print(f"  Processing Date: {date}")

        try:
            # Parse the date string into a datetime object
            date_object = datetime.strptime(date, '%Y-%m-%d')

            # Extract day, month, and year
            day = date_object.day
            month = date_object.month
            year = date_object.year

            if month != 6:
                continue

            # Check data quality
            checker = data_quality_check.DataQualityCheck(event_type="vehicle_signal")
            checker.check_data_quality(signal_id=signal_id, 
                                       day=day, month=month, year=year)

        except Exception as e:
            print(f"Error Processing Date {date}: {e}")

    # Explicitly call garbage collector
    gc.collect()

    # Clear output after processing each Signal ID
    clear_output(wait=True)

Processing Signal ID: 2665
  Processing Date: 2024-06-01
  Processing Date: 2024-06-02
  Processing Date: 2024-06-03
  Processing Date: 2024-06-04
  Processing Date: 2024-06-05
  Processing Date: 2024-06-06
  Processing Date: 2024-06-07
  Processing Date: 2024-06-11
  Processing Date: 2024-06-12
  Processing Date: 2024-06-13
  Processing Date: 2024-06-14
  Processing Date: 2024-06-15
  Processing Date: 2024-06-16
  Processing Date: 2024-06-17
  Processing Date: 2024-06-18
  Processing Date: 2024-06-19
  Processing Date: 2024-06-20
  Processing Date: 2024-06-21
  Processing Date: 2024-06-22
  Processing Date: 2024-06-23
  Processing Date: 2024-06-24
  Processing Date: 2024-06-25
  Processing Date: 2024-06-26
  Processing Date: 2024-06-27
  Processing Date: 2024-06-28
  Processing Date: 2024-06-29
  Processing Date: 2024-06-30
  Processing Date: 2024-09-18
  Processing Date: 2024-09-19
  Processing Date: 2665
Error Processing Date 2665: time data '2665' does not match format '%Y-%m-%d'


## FDOT D7