In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def process_folder(folder_path, c_values, c_total, BOD1, COD1, BOD31, COD31, Ammonia1, Ammonia31, c_cod_results, c_bod_results):
    BODp = 0.7
    CODp = 0.75

    for filename in os.listdir(folder_path):
        if not filename.endswith(".pkl"):
            continue
        file_path = os.path.join(folder_path, filename)
        c_list = []
        data = []

        try:
            data = pd.read_pickle(file_path)
        except pd.errors.EmptyDataError:
            print(f"Warning: Empty DataFrame in file {file_path}")
            continue

        if data.empty or any(col not in data.columns for col in ['bod1', 'cod1', 'bod31', 'cod31', 'snh1', 'snh31']):
            print(f"Warning: Missing columns in file {file_path}")
            continue

        data['BODut'] = abs((50 - data["bod31"].min()) / (data["bod1"].max() - data["bod31"].min()))
        data['BODlt'] = abs((25 - data["bod31"].min()) / (data["bod1"].max() - data["bod31"].min()))

        data['CODut'] = abs((250 - data["cod31"].min()) / (data["cod1"].max() - data["cod31"].min()))
        data['CODlt'] = abs((125 - data["cod31"].min()) / (data["cod1"].max() - data["cod31"].min()))



        data["LIN_BODe"] = abs((data.bod31 - data["bod31"].min()) / (data["bod1"].max() - data["bod31"].min()))
        data["LIN_CODe"] = abs((data.cod31 - data["cod31"].min()) / (data["cod1"].max() - data["cod31"].min()))
        data["LIN_BODi"] = abs((data.bod1 - data["bod31"].min()) / (data["bod1"].max() - data["bod31"].min()))
        data["LIN_CODi"] = abs((data.cod1 - data["cod31"].min()) / (data["cod1"].max() - data["cod31"].min()))


        data['BODut-BODeffl'] = (data.BODut - data["LIN_BODe"])
        data['CODut-CODeffl'] = (data.CODut - data["LIN_CODe"])
        data['BODlt-BODeffl'] = (data.BODlt - data["LIN_BODe"])
        data['CODlt-CODeffl'] = (data.CODlt - data["LIN_CODe"])

        data["bodp"] = -(BODp * data.LIN_BODi) + data.LIN_BODi - data.LIN_BODe
        data["codp"] = -(CODp * data.LIN_CODi) + data.LIN_CODi - data.LIN_CODe

        data["max1"] = data[['BODlt-BODeffl', 'bodp']].max(axis=1)
        data['c_BOD'] = data[['BODut-BODeffl', 'max1']].min(axis=1)
        data["max2"] = data[['CODlt-CODeffl', 'codp']].max(axis=1)
        data['c_COD'] = data[['CODut-CODeffl', 'max2']].min(axis=1)

        c_bod_list = data['c_BOD'].tolist()
        c_cod_list = data['c_COD'].tolist()

        # FLAGS
        flag_BOD = 'F' if data['BODut-BODeffl'].min() < 0 else 'C'
        flag_COD = 'F' if data['CODut-CODeffl'].min() < 0 else 'C'
        flag_BODlt = 'F' if data['BODlt-BODeffl'].min() < 0 else 'C'
        flag_CODlt = 'F' if data['CODlt-CODeffl'].min() < 0 else 'C'
        flag_reduction_bod = 'F' if data["bodp"].min() < 0 else 'C'
        flag_reduction_cod = 'F' if data["codp"].min() < 0 else 'C'

        for c_bod, c_cod in zip(c_bod_list, c_cod_list):
            failure_type = "COMPLIANT"
            failure_source = None

            if c_bod < 0:
                if flag_BOD == 'F':
                    failure_type = "Maximum failure"
                    failure_source = "BOD"
                else:
                    if flag_BODlt == 'F' and flag_reduction_bod == 'F':
                        failure_type = "LUT failure"
                        failure_source = "BOD"

            if c_cod < 0:
                if flag_COD == 'F':
                    failure_type = "Maximum failure"
                    failure_source = "COD"
                else:
                    if flag_CODlt == 'F' and flag_reduction_cod == 'F':
                        failure_type = "LUT failure"
                        failure_source = "COD"

            # Determine the source of failure if both conditions are met
            if failure_type != "COMPLIANT" and failure_source is None:
                if c_bod < 0 and c_cod < 0:
                    failure_source = "BOTH"
                elif c_bod < 0:
                    failure_source = "BOD"
                elif c_cod < 0:
                    failure_source = "COD"

            c_list.append((min(c_bod, c_cod), failure_type, failure_source))


            if not c_list:
                print(f"Warning: No valid rows in file {file_path}")
                continue

        c_values.append(min(c_list)[0])
        c_total.append(c_list)
        BOD1.append(data['bod1'].tolist())
        COD1.append(data['cod1'].tolist())
        BOD31.append(data['bod31'].tolist())
        COD31.append(data['cod31'].tolist())
        Ammonia1.append(data['snh1'].tolist())
        Ammonia31.append(data['snh31'].tolist())
        c_cod_results.append(c_cod_list)
        c_bod_results.append(c_bod_list)

# Define folder paths
folder_path_1 = 'C:/Users/c0070810/OneDrive - Newcastle University/NEW WORK- OMAR/CSTR/hybrid aeration n1 - mech aeration/composite sample/cstr - gpsx/base_results/baseline'
folder_path_2 = 'C:/Users/c0070810/OneDrive - Newcastle University/NEW WORK- OMAR/CSTR/hybrid aeration n1 - mech aeration/composite sample/cstr - gpsx/base_results/1.3'
folder_path_3 = 'C:/Users/c0070810/OneDrive - Newcastle University/NEW WORK- OMAR/CSTR/hybrid aeration n1 - mech aeration/composite sample/cstr - gpsx/base_results/1.5'
folder_path_4 = 'C:/Users/c0070810/OneDrive - Newcastle University/NEW WORK- OMAR/CSTR/hybrid aeration n1 - mech aeration/composite sample/cstr - gpsx/base_results/1.9'

# Initialize lists for storing data
c_values_baseline = []
c_total_baseline = []
c_cod_baseline = []
c_bod_baseline = []
BOD1_baseline = []
COD1_baseline = []
BOD31_baseline = []
COD31_baseline = []
Ammonia1_baseline = []
Ammonia31_baseline = []

c_values_shift130 = []
c_total_shift130 = []
c_cod_shift130 = []
c_bod_shift130 = []
BOD1_shift130 = []
COD1_shift130 = []
BOD31_shift130 = []
COD31_shift130 = []
Ammonia1_shift130 = []
Ammonia31_shift130 = []

c_values_shift150 = []
c_total_shift150 = []
c_cod_shift150 = []
c_bod_shift150 = []
BOD1_shift150 = []
COD1_shift150 = []
BOD31_shift150 = []
COD31_shift150 = []
Ammonia1_shift150 = []
Ammonia31_shift150 = []

c_values_shift190 = []
c_total_shift190 = []
c_cod_shift190 = []
c_bod_shift190 = []
BOD1_shift190 = []
COD1_shift190 = []
BOD31_shift190 = []
COD31_shift190 = []
Ammonia1_shift190 = []
Ammonia31_shift190 = []

# Process each folder
process_folder(folder_path_1, c_values_baseline, c_total_baseline,  BOD1_baseline, COD1_baseline, BOD31_baseline, COD31_baseline, Ammonia1_baseline, Ammonia31_baseline, c_cod_baseline, c_bod_baseline)
process_folder(folder_path_2, c_values_shift130, c_total_shift130,  BOD1_shift130, COD1_shift130, BOD31_shift130, COD31_shift130, Ammonia1_shift130, Ammonia31_shift130, c_cod_shift130, c_bod_shift130)
process_folder(folder_path_3, c_values_shift150, c_total_shift150,  BOD1_shift150, COD1_shift150, BOD31_shift150, COD31_shift150, Ammonia1_shift150, Ammonia31_shift150, c_bod_shift150, c_cod_shift150)
process_folder(folder_path_4, c_values_shift190, c_total_shift190,  BOD1_shift190, COD1_shift190, BOD31_shift190, COD31_shift190, Ammonia1_shift190, Ammonia31_shift190, c_bod_shift190, c_cod_shift190)

# Define failure types and sources
def extract_failure_types(c_total):
    failure_types = []
    failure_sources = []
    for c_list in c_total:
        for _, failure_type, failure_source in c_list:
            failure_types.append(failure_type)
            failure_sources.append(failure_source)
    return failure_types, failure_sources

# Extract failure types and sources
failure_types_baseline, failure_sources_baseline = extract_failure_types(c_total_baseline)
failure_types_shift130, failure_sources_shift130 = extract_failure_types(c_total_shift130)
failure_types_shift150, failure_sources_shift150 = extract_failure_types(c_total_shift150)
failure_types_shift190, failure_sources_shift190 = extract_failure_types(c_total_shift190)

# Combine the values and failure types into a list of tuples for each dataset
def combine_data(c_values, failure_types, failure_sources):
    return list(zip(c_values, failure_types, failure_sources))

combined_data_baseline = combine_data(c_values_baseline, failure_types_baseline, failure_sources_baseline)
combined_data_shift130 = combine_data(c_values_shift130, failure_types_shift130, failure_sources_shift130)
combined_data_shift150 = combine_data(c_values_shift150, failure_types_shift150, failure_sources_shift150)
combined_data_shift190 = combine_data(c_values_shift190, failure_types_shift190, failure_sources_shift190)

# Print the results
def print_combined_data(label, combined_data):
    print(f"\n{label}:")
    for value, failure_type, failure_source in combined_data:
        print(f"Value: {value}, Failure Type: {failure_type}, Failure Source: {failure_source}")

print_combined_data("Baseline", combined_data_baseline)
print_combined_data("Shift130", combined_data_shift130)
print_combined_data("Shift150", combined_data_shift150)
print_combined_data("Shift190", combined_data_shift190)


