In [1]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def load_excel_data(file_path, sheet_name=None):
    """
    Load data from an Excel spreadsheet using Pandas.

    Parameters:
    - file_path (str): The file path to the Excel spreadsheet.
    - sheet_name (str or int, default None): Name or index of the sheet to read. If None, it reads the first sheet.

    Returns:
    - DataFrame: A pandas DataFrame containing the data.
    """
    try:
        # Load data from Excel file
        if sheet_name is not None:
            data = pd.read_excel(file_path, sheet_name=sheet_name)
        else:
            data = pd.read_excel(file_path)
        return data
    except FileNotFoundError:
        print("File not found. Please provide a valid file path.")
        return None
    except Exception as e:
        print("An error occurred:", e)
        return None

def aggregate_csvs_prj(folder_path):
  from warnings import simplefilter
  simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
  dataframes = []
  # Iterate over files in the folder
  for filename in os.listdir(folder_path):
      if filename.startswith("results") and filename.endswith(".csv"):
          file_path = os.path.join(folder_path, filename)
          ID = filename.split(".")[4]  # Extract name piece from filename
          # Read CSV file into a dataframe
          df = pd.read_csv(file_path, skiprows = [1], skipfooter=1, engine='python')
          df.loc[:, "ID"] = ID
          # Append dataframe to the list
          dataframes.append(df.iloc[:])

  # Concatenate dataframes into a single grouped dataframe
  all_results = pd.concat(dataframes)

  return all_results


def load_dirs(input_dirs, dir_meta_cols, FIPS_crosswalk_file):
    """
    Load csvs from input directories and merge dataframes.

    Parameters:
    - input_dirs (list): List of input directories.
    - dir_meta_cols (list): List of dictionaries containing metadata columns for each directory.
    - FIPS_crosswalk_file (str): Path to the crosswalk file containing FIPS and SOURCEINDX columns.

    Returns:
    - agg_df0 (DataFrame): Merged DataFrame with added "Destination FIPS" column.
    """

    agg_dfs = []

    for i in np.arange(len(input_dirs)):
        dfi = aggregate_csvs_prj(input_dirs[i])
        for key, val in dir_meta_cols[i].items():
            dfi[key] = val
        agg_dfs.append(dfi)

    agg_df0 = pd.concat(agg_dfs)
    agg_df0['ID'] = agg_df0['ID'].str.replace("_", " ")

    return agg_df0

def load_summarized_demographic_data(path_dict, summary_col):
  """
  Load csvs based on fnames in path_dict, calculate the total values for each field
  regardless of age, and drop age columns
  """
  AgeCols = ['Age0'] + ['Age'+str(i) for i in np.arange(1, 100)]

  out_dfs = []

  for i in np.arange(len(path_dict['fname'])):
    out_df0 = pd.read_csv(path_dict['fname'][i])
    out_df0[summary_col] = out_df0[AgeCols].sum(axis=1)
    out_df0['Baseline Year'] = path_dict['year'][i]
    out_df0.drop(columns=AgeCols, inplace=True)
    out_dfs.append(out_df0)

  return pd.concat(out_dfs)


incidence_vars = [
    'Total Mortality(low estimate)',
    'Total Mortality(high estimate)',
    'PM Mortality, All Cause (low)',
    'PM Mortality, All Cause (high)',
    'PM Infant Mortality',
    'Total O3 Mortality',
    'O3 Mortality (Short-term exposure)',
    'O3 Mortality (Long-term exposure)',
    'Total Asthma Symptoms',
    'PM Asthma Symptoms, Albuterol use',
    'O3 Asthma Symptoms, Chest Tightness',
    'O3 Asthma Symptoms, Cough',
    'O3 Asthma Symptoms, Shortness of Breath',
    'O3 Asthma Symptoms, Wheeze',
    'Total Incidence, Asthma',
    'PM Incidence, Asthma',
    'O3 Incidence, Asthma',
    'Total Incidence, Hay Fever/Rhinitis',
    'PM Incidence, Hay Fever/Rhinitis',
    'O3 Incidence, Hay Fever/Rhinitis',
    'Total ER Visits, Respiratory',
    'PM ER Visits, Respiratory',
    'O3 ER Visits, Respiratory',
    'Total Hospital Admits, All Respiratory',
    'PM Hospital Admits, All Respiratory',
    'O3 Hospital Admits, All Respiratory',
    'PM Nonfatal Heart Attacks',
    'PM Minor Restricted Activity Days',
    'PM Work Loss Days',
    'PM Incidence Lung Cancer',
    'PM HA Cardio Cerebro and Peripheral Vascular Disease',
    'PM HA Alzheimers Disease',
    'PM HA Parkinsons Disease',
    'PM Incidence Stroke',
    'PM Incidence Out of Hospital Cardiac Arrest',
    'PM ER visits All Cardiac Outcomes',
    'O3 ER Visits, Asthma',
    'O3 School Loss Days, All Cause'
]


In [2]:
from google.colab import drive
import os

drive.mount('/content/drive') # Comment this out if running ipynb locally
wdir = '/content/drive/MyDrive/gpDept-ResearchDept/LNG Air Pollution/LNG Health - COBRA project/git_repo/Permit-To-Kill-COBRA-Research/' # Replace this with your working directory path
os.chdir(wdir)

Mounted at /content/drive


In [3]:

# Method parameters ===========================================================

# Batch-specific metadata to add to each row
dir_meta_cols = [{"discount rate": 2, "config_id": "a.finalData.01"},
                 {"discount rate": 2, "config_id": "a.finalData.02"},
                 {"discount rate": 2, "config_id": "a.finalData.03"}
                 ]
# Project-specific metadata to add to each row
prj_meta_cols = ["Project", "Terminal", "Project Status", "DOE NFTA Authorization Status"]

# Manually assigned directories and file paths ================================
# Project data spreadsheet
f0 = 'Version 5 analysis/240610-FinalData-AllProjects.xlsx'

# Input spreadsheets and directories
results_dir0 = "Version 5 analysis"

# FIPS crosswalk
FIPS_crosswalk_file = "COBRA (from desktop version) - v5.1/data dictionary/SOURCEINDX to FIPS crosswalk.csv"

# Script ======================================================================
input_dirs = [(results_dir0 + '/' + i["config_id"]) for i in dir_meta_cols]

# 1. Aggregate CSVs ===========================================================
agg_df0 = load_dirs(input_dirs, dir_meta_cols, FIPS_crosswalk_file)
agg_df0.sort_values(by=['config_id', 'ID'], inplace=True)

In [4]:
# 2. Add discounted $ columns (from a 2023 baseline) ==========================
dol_cols = [i for i in agg_df0.columns if "$" in i]
d = 0.02

for col in [dol_cols[i] for i in [0, 1, 2, 3, 10, 16]]:
    # Create a new column name for the discounted values
    discounted_col = f'{col} DISCOUNTED'

    # Calculate the discounted value using the formula
    agg_df0[discounted_col] = agg_df0.apply(
        lambda row: row[col] * (1 / (1 + d) ** (int(row['ID']) - 2023)),
        axis=1
    )

In [5]:
# 3. Save combined dataframe as a CSV

agg_df_f0 = results_dir0 + "/a.finalData.results/a.finalData.01-03.combined_results.csv"
agg_df0.to_csv(agg_df_f0, index=False)