In [1]:
import pandas as pd
import numpy as np

# Assumption: No preventative, only one maintenance type.

In [29]:
fault_df = pd.read_csv('../data/raw/fault_data.csv')

# Parse DateTime and filter necessary data
fault_df['DateTime'] = pd.to_datetime(fault_df['DateTime'], format='%Y-%m-%d %H:%M', errors='coerce')

# Extract year, month, and day for grouping and high-demand season determination
fault_df['Year'] = fault_df['DateTime'].dt.year
fault_df['Month'] = fault_df['DateTime'].dt.month
fault_df['Day'] = fault_df['DateTime'].dt.date

# Calculate internal cost
internal_fault_types = fault_df['Fault'].unique()
internal_cost = 750000 * len([fault for fault in internal_fault_types if fault != 'NF'])

# Calculate external cost
# Define high-demand months
high_demand_months = [1, 2, 6, 7, 8]

external_cost = 0
same_day_faults = set()  # Track faults already addressed on the same day
current_month = None
for _, fault in fault_df.iterrows():
    if fault['Fault'] != 'NF':  # Only consider faults
        print(same_day_faults)
        # Reset same_day_faults when a new month starts
        if current_month != fault['Month']:
            same_day_faults = set()
            current_month = fault['Month']

        # Check if the fault type on the same day has already been addressed
        if (fault['Day'], fault['Fault']) not in same_day_faults:
            # Determine the cost based on the month
            if int(fault['Month']) in high_demand_months:
                external_cost += 150000
            else:
                external_cost += 50000
            # Mark this fault type as addressed for the day
            same_day_faults.add((fault['Day'], fault['Fault']))


# Output results
internal_cost, external_cost

set()
{(datetime.date(2014, 5, 14), 'GF')}
{(datetime.date(2014, 5, 14), 'GF')}
{(datetime.date(2014, 5, 14), 'GF')}
{(datetime.date(2014, 5, 14), 'GF')}
{(datetime.date(2014, 5, 14), 'GF')}
{(datetime.date(2014, 5, 14), 'GF')}
{(datetime.date(2014, 6, 4), 'MF')}
{(datetime.date(2014, 6, 4), 'MF')}
{(datetime.date(2014, 6, 4), 'MF')}
{(datetime.date(2014, 6, 5), 'MF'), (datetime.date(2014, 6, 4), 'MF')}
{(datetime.date(2014, 6, 5), 'MF'), (datetime.date(2014, 6, 4), 'MF'), (datetime.date(2014, 6, 5), 'FF')}
{(datetime.date(2014, 6, 5), 'MF'), (datetime.date(2014, 6, 4), 'MF'), (datetime.date(2014, 6, 8), 'AF'), (datetime.date(2014, 6, 5), 'FF')}
{(datetime.date(2014, 6, 5), 'MF'), (datetime.date(2014, 6, 4), 'MF'), (datetime.date(2014, 6, 9), 'AF'), (datetime.date(2014, 6, 5), 'FF'), (datetime.date(2014, 6, 8), 'AF')}
{(datetime.date(2014, 6, 5), 'MF'), (datetime.date(2014, 6, 4), 'MF'), (datetime.date(2014, 6, 9), 'AF'), (datetime.date(2014, 6, 9), 'MF'), (datetime.date(2014, 6, 5), '

(3750000, 8750000)

In [30]:
# Calculate total external costs per fault type based on the updated rules
external_costs_per_fault = {fault_type: 0 for fault_type in fault_df['Fault'].unique() if fault_type != 'NF'}
same_day_faults = set()  # Track faults already addressed on the same day
current_month = None  # Keep track of the current month

for _, fault in fault_df.iterrows():
    if fault['Fault'] != 'NF':  # Only consider faults
        # Reset same_day_faults when a new month starts
        if current_month != fault['Month']:
            same_day_faults = set()
            current_month = fault['Month']

        # Check if the fault type on the same day has already been addressed
        if (fault['Day'], fault['Fault']) not in same_day_faults:
            # Determine the cost based on the month
            if int(fault['Month']) in high_demand_months:
                external_costs_per_fault[fault['Fault']] += 150000
            else:
                external_costs_per_fault[fault['Fault']] += 50000
            # Mark this fault type as addressed for the day
            same_day_faults.add((fault['Day'], fault['Fault']))

# Output the external costs per fault type
external_costs_per_fault


{'GF': 300000, 'MF': 1250000, 'FF': 2250000, 'AF': 3050000, 'EF': 1900000}