In [85]:
import os
import csv
import pandas as pd

In [86]:
# set filepaths for the all_links delay, incident_link delay, and feeder_links list
def safe_float(value):
    try:
        return float(value) if value is not None and value != "" else 0.0
    except ValueError:
        return 0.0

all_links = '../data/link_delays'
incident_link_delays = "../data/incident_analysis/delay/incident_link_delays"
feeder_links = "../data/feeder_links/feeders.csv"
NCE_duration = "../data/change_events/NCE_Durations.csv"

if not os.path.exists(incident_link_delays):
    os.makedirs(incident_link_delays)

feeder_data = {}

In [77]:
with open(feeder_links, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        seed = row['Seed']
        links = {
            row['Incident Link'],
            row['Feeder 1'],
            row['Feeder 2']
        }

        if seed not in feeder_data:
            feeder_data[seed] = set()

        for link_id in links:
            if link_id:  # Ensure link_id is not empty
                feeder_data[seed].add(link_id)

baseline_path = os.path.join(all_links, "Seed 000", "0-0-000.delay.csv")

### CODE TO WRITE INCIDENT LINK DELAY CSVs

In [78]:
for seed, relevant_links in feeder_data.items():
    seed_directory = f"Seed {seed}"
    source_path = os.path.join(all_links, seed_directory)
    target_path = os.path.join(incident_link_delays, seed_directory)

    if not os.path.exists(target_path):
        os.makedirs(target_path)

    # Copy and filter baseline data
    with open(baseline_path, 'r') as baseline_file, open(os.path.join(target_path, "0-0-000.delay.csv"), 'w', newline='') as f_out:
        reader = csv.DictReader(baseline_file)
        fieldnames = reader.fieldnames
        if 'Total Delay [hours]' not in fieldnames:
            fieldnames.append('Total Delay [hours]')
        writer = csv.DictWriter(f_out, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            link_id = row['Link Id']
            if link_id in relevant_links:
                delay_columns = [col for col in reader.fieldnames if ':' in col]
                for col in delay_columns:
                    if not row.get(col):
                        row[col] = "0.0"
                total_delay = sum(safe_float(row[col]) for col in delay_columns) / 3600
                row['Total Delay [hours]'] = total_delay
                writer.writerow(row)

    # Process and filter data for other delay files in the seed directory
    for delay_file in os.listdir(source_path):
        with open(os.path.join(source_path, delay_file), 'r') as f_in, open(os.path.join(target_path, delay_file), 'w', newline='') as f_out:
            reader = csv.DictReader(f_in)
            fieldnames = reader.fieldnames
            if 'Total Delay [hours]' not in fieldnames:
                fieldnames.append('Total Delay [hours]')
            writer = csv.DictWriter(f_out, fieldnames=fieldnames)
            writer.writeheader()

            for row in reader:
                link_id = row['Link Id']
                if link_id in relevant_links:
                    delay_columns = [col for col in reader.fieldnames if ':' in col]
                    for col in delay_columns:
                        if not row.get(col):
                            row[col] = "0.0"
                    total_delay = sum(safe_float(row[col]) for col in delay_columns) / 3600
                    row['Total Delay [hours]'] = total_delay
                    writer.writerow(row)

### ADD LABELS TO THE LINKS TO SPECIFY IF THEY ARE INCIDENT OR FEEDER LINKS

In [79]:
def get_characterizations(feeder_clean_path):
    with open(feeder_clean_path, 'r') as f:
        reader = csv.DictReader(f)
        char_dict = {}
        for row in reader:
            seed = row['Seed']
            entry = {
                'Incident Link': row['Incident Link'],
                'Feeder 1': row['Feeder 1'],
                'Feeder 2': row['Feeder 2']
            }
            if seed not in char_dict:
                char_dict[seed] = []
            char_dict[seed].append(entry)
        return char_dict

In [80]:
# Adding the time utility functions here
def add_time(time_str, minutes_to_add):
    h, m, s = map(int, time_str.split(':'))
    m += minutes_to_add
    h += m // 60
    m %= 60
    return f"{h:02d}:{m:02d}:{s:02d}"

def round_down(time_str):
    h, m, _ = map(int, time_str.split(':'))
    m = (m // 15) * 15
    return f"{h:02d}:{m:02d}:00"

def time_to_minutes_since_start(time_str):
    """Converts HH:MM:SS format to minutes since start of day."""
    h, m, _ = map(int, time_str.split(':'))
    return h * 60 + m

def is_time_format(s):
    """Check if the string is in the HH:MM:SS format."""
    try:
        h, m, s = map(int, s.split(':'))
        return True
    except ValueError:
        return False

In [81]:
def compute_row_delay(row, start_period, end_period):
    start_minutes = time_to_minutes_since_start(start_period)
    end_minutes = time_to_minutes_since_start(end_period)
    columns_to_sum = [col for col in row.index if is_time_format(col) and time_to_minutes_since_start(col) >= start_minutes and time_to_minutes_since_start(col) <= end_minutes]
    return row[columns_to_sum].sum() / 3600

def compute_corrected_row_delay(row, end_period, duration_minutes):
    end_minutes = time_to_minutes_since_start(end_period)
    columns_to_sum = [col for col in row.index if is_time_format(col) and time_to_minutes_since_start(col) >= end_minutes and time_to_minutes_since_start(col) < end_minutes + duration_minutes]
    return row[columns_to_sum].sum() / 3600
    
def compute_post_incident_delay(row, end_period, duration_minutes):
    end_minutes = time_to_minutes_since_start(end_period)
    columns_to_sum = [col for col in row.index if is_time_format(col) and time_to_minutes_since_start(col) >= end_minutes and time_to_minutes_since_start(col) < end_minutes + duration_minutes]
    return row[columns_to_sum].sum() / 3600 

In [82]:
def process_files(incident_link_delays, char_dict):
    for seed_dir in os.listdir(incident_link_delays):
        seed = seed_dir.split(" ")[-1]
        path = os.path.join(incident_link_delays, seed_dir)
        for delay_file in os.listdir(path):
            delay_file_path = os.path.join(path, delay_file)
            new_rows = []

            with open(delay_file_path, 'r') as f_in:
                reader = csv.DictReader(f_in)
                for row in reader:
                    link_id = row['Link Id']
                    for entry in char_dict[seed]:
                        for typ, associated_link in entry.items():
                            if link_id == associated_link:
                                new_row = row.copy()
                                new_row['Type'] = typ
                                new_row['Incident Link'] = entry['Incident Link']
                                new_row['Feeder 1'] = entry['Feeder 1']
                                new_row['Feeder 2'] = entry['Feeder 2']
                                new_rows.append(new_row)
                                break  # Break once we find a match

            # write the new rows to the file
            with open(delay_file_path, 'w', newline='') as f_out:
                writer = csv.DictWriter(f_out, fieldnames=reader.fieldnames + ['Type', 'Incident Link', 'Feeder 1', 'Feeder 2'])
                writer.writeheader()
                for row in new_rows:
                    writer.writerow(row)

In [83]:
def main():

    char_dict = get_characterizations(feeder_links)
    process_files(incident_link_delays, char_dict)

    print("Processing complete.")

if __name__ == "__main__":
    main()

Processing complete.


In [84]:
# Load NCE_Durations.csv
nce_df = pd.read_csv(os.path.join(NCE_duration))

for root, _, files in os.walk(incident_link_delays):
    for file in files:
        if file.endswith('.delay.csv'):
            
            # Extract the Seed value from the folder name
            current_seed = int(os.path.basename(root).split(" ")[1])  
            
            # Filter nce_df based on current_seed
            filtered_nce_df = nce_df[nce_df['Seed'] == current_seed]
            
            delay_df = pd.read_csv(os.path.join(root, file))

            # Merge with the filtered nce_df
            matched_rows = delay_df.merge(filtered_nce_df, on="Incident Link", how="left")

            matched_rows["Incident Start"] = matched_rows["Start Time [HH:MM:SS]"].apply(round_down)
            matched_rows["Incident End"] = matched_rows["End Time [HH:MM:SS]"].apply(round_down)

            matched_rows["Delay During Incident [hours]"] = matched_rows.apply(lambda row: compute_row_delay(row, row["Incident Start"], row["Incident End"]), axis=1)
            matched_rows["Post-Incident Delay (0-30 mins) [hours]"] = matched_rows.apply(lambda row: compute_post_incident_delay(row, add_time(row["Incident End"], 15), 30), axis=1)
            matched_rows["Post-Incident Delay (30-60 mins) [hours]"] = matched_rows.apply(lambda row: compute_post_incident_delay(row, add_time(row["Incident End"], 45), 30), axis=1)

            # Drop duplicate columns
            matched_rows = matched_rows.T.drop_duplicates().T

            matched_rows.drop(columns=["Start Time [HH:MM:SS]", "End Time [HH:MM:SS]"], inplace=True, errors='ignore')
            matched_rows.to_csv(os.path.join(root, file), index=False)

print("Processing completed.")

Processing completed.
