In [1]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def sum_by_scenario_and_year(df, colset):
    """
    Process the given DataFrame by pivoting, summing, and resetting the index.

    Args:
        df (pd.DataFrame): The DataFrame to process.
        colset (list): List of columns to include in the pivot table.

    Returns:
        pd.DataFrame: The processed DataFrame.
    """
    pivoted = -df.pivot_table(index=['config_id', 'ID'], values=colset, aggfunc='sum')
    pivoted.reset_index(inplace=True)
    pivoted['ID'] = pivoted['ID'].apply(lambda x: int(x))
    return pivoted

def filter_and_cumsum(df, colset, cumsum_year):
    """
    Filter the DataFrame for a given year and pivot to sum values.

    Args:
        df (pd.DataFrame): The DataFrame to filter and pivot.
        colset (list): List of columns to include in the pivot table.
        cumsum_year (int): The year to sum up to

    Returns:
        pd.DataFrame: The filtered and pivoted DataFrame.
    """
    filtered = df.loc[df['ID'] < cumsum_year + 1]
    pivoted = filtered.pivot_table(index=['config_id'], values=colset, aggfunc='sum')
    pivoted['CumYear'] = cumsum_year
    return pivoted

def combine_and_format(dfs, colset):
    """
    Combine multiple DataFrames and format them for output.

    Args:
        dfs (list): List of DataFrames to combine.
        colset (list): List of columns to include in the final output.

    Returns:
        pd.DataFrame: The combined and formatted DataFrame.
    """
    combined = pd.concat(dfs)
    combined.reset_index(inplace=True)
    melted = combined.melt(id_vars=['config_id', 'CumYear'], var_name='Var', value_name='Value')
    formatted = melted.pivot(index='Var', columns=['config_id', 'CumYear'], values='Value')
    return formatted.loc[colset, [
        ('a.finalData.01', 2030), ('a.finalData.01', 2040), ('a.finalData.01', 2050),
        ('a.finalData.02', 2030), ('a.finalData.02', 2040), ('a.finalData.02', 2050),
        ('a.finalData.03', 2030), ('a.finalData.03', 2040), ('a.finalData.03', 2050)
    ]]


def load_summarized_demographic_data(path_dict, summary_col):
  """
  Load csvs based on fnames in path_dict, calculate the total values for each field
  regardless of age, and drop age columns
  """
  AgeCols = ['Age0'] + ['Age'+str(i) for i in np.arange(1, 100)]

  out_dfs = []

  for i in np.arange(len(path_dict['fname'])):
    out_df0 = pd.read_csv(path_dict['fname'][i])
    out_df0[summary_col] = out_df0[AgeCols].sum(axis=1)
    out_df0['Baseline Year'] = path_dict['year'][i]
    out_df0.drop(columns=AgeCols, inplace=True)
    out_dfs.append(out_df0)

  return pd.concat(out_dfs)

def add_per_million(df, vars):
    df_out = df
    for var in vars:
        new_col_name = var + ' PER MILLION'
        df_out[new_col_name] = df_out[var] / (df_out['Total Population'] / 1000000)
    return df_out

def create_rank_list(df, scenario, eval_years, eval_vars, n_perlist, cutoffs={}):
    # Create rank list based on sorting and "top n"
    for yr in eval_years:
        filtered = df[((df['config_id'] == scenario) & (df['ID'] == yr))]
        filtered2 = filtered[(['County, State', 'ID'] + eval_vars)]
        for i in eval_vars:
            if len(cutoffs) == 0:
                rank_lists[yr][i] = filtered2.sort_values(by=i).head(n_perlist)
            else:
                filtered3 = filtered2[filtered2[i] <= -cutoffs[i]]
                rank_lists[yr][i] = filtered3.sort_values(by=i)


incidence_vars = [
    'Total Mortality(low estimate)',
    'Total Mortality(high estimate)',
    'PM Mortality, All Cause (low)',
    'PM Mortality, All Cause (high)',
    'PM Infant Mortality',
    'Total O3 Mortality',
    'O3 Mortality (Short-term exposure)',
    'O3 Mortality (Long-term exposure)',
    'Total Asthma Symptoms',
    'PM Asthma Symptoms, Albuterol use',
    'O3 Asthma Symptoms, Chest Tightness',
    'O3 Asthma Symptoms, Cough',
    'O3 Asthma Symptoms, Shortness of Breath',
    'O3 Asthma Symptoms, Wheeze',
    'Total Incidence, Asthma',
    'PM Incidence, Asthma',
    'O3 Incidence, Asthma',
    'Total Incidence, Hay Fever/Rhinitis',
    'PM Incidence, Hay Fever/Rhinitis',
    'O3 Incidence, Hay Fever/Rhinitis',
    'Total ER Visits, Respiratory',
    'PM ER Visits, Respiratory',
    'O3 ER Visits, Respiratory',
    'Total Hospital Admits, All Respiratory',
    'PM Hospital Admits, All Respiratory',
    'O3 Hospital Admits, All Respiratory',
    'PM Nonfatal Heart Attacks',
    'PM Minor Restricted Activity Days',
    'PM Work Loss Days',
    'PM Incidence Lung Cancer',
    'PM HA Cardio Cerebro and Peripheral Vascular Disease',
    'PM HA Alzheimers Disease',
    'PM HA Parkinsons Disease',
    'PM Incidence Stroke',
    'PM Incidence Out of Hospital Cardiac Arrest',
    'PM ER visits All Cardiac Outcomes',
    'O3 ER Visits, Asthma',
    'O3 School Loss Days, All Cause'
]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
"""
Read in combined results
"""

# Script ======================================================================
results_dir0 = "/content/drive/MyDrive/gpDept-ResearchDept/LNG Air Pollution/LNG Health - COBRA project/Version 5 analysis"
agg_df_f0 = results_dir0 + "/a.finalData.results/a.finalData.01-03.combined_results.csv"
agg_df0 = pd.read_csv(agg_df_f0)

## Table 4. Cumulative health impacts measured out from 2023 in each scenario

In [None]:
"""
Table 4. Cumulative health impacts measured out from 2023 in each scenario
Table Export
-----------------
Calculates cumsums for 2030, 2040, and 2050 for key variables
And formats as a nice table
"""

import pandas as pd

# Run settings ===============================================================
colset0 = ['Total Mortality(high estimate)', 'Total Mortality(low estimate)',
           '$ Total Health Benefits(high estimate) DISCOUNTED', '$ Total Health Benefits(low estimate) DISCOUNTED',
           'Total Incidence, Asthma',
          #  '$ Total Incidence, Asthma DISCOUNTED',
           'Total Asthma Symptoms',
          #  '$ Total Asthma Symptoms DISCOUNTED',
           'PM Work Loss Days', 'O3 School Loss Days, All Cause'
]

xlsx_out = results_dir0 + "/a.finalData.results/a.finalData.briefing_table.cum_impacts.xlsx"
million_usd_unit = True
save_xlsx = False

# Process the data
processed_data = sum_by_scenario_and_year(agg_df0, colset0)

# Filter and pivot for each year
sum2030 = filter_and_cumsum(processed_data, colset0, 2030)
sum2040 = filter_and_cumsum(processed_data, colset0, 2040)
sum2050 = filter_and_cumsum(processed_data, colset0, 2050)

# Combine and format the results
sums = combine_and_format([sum2030, sum2040, sum2050], colset0)

if million_usd_unit:
    usd_unit = [i for i in colset0 if ('$'  in i)]
    for i in usd_unit:
        sums.loc[i] = sums.loc[i] / 1000000
        sums.rename(index={i: f'{i} (million USD)'}, inplace=True)

if save_xlsx:
    # Output the results to an Excel file
    sums.to_excel(xlsx_out)

sums

config_id,a.finalData.01,a.finalData.01,a.finalData.01,a.finalData.02,a.finalData.02,a.finalData.02,a.finalData.03,a.finalData.03,a.finalData.03
CumYear,2030,2040,2050,2030,2040,2050,2030,2040,2050
Var,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Total Mortality(high estimate),782.223578,2536.196,4467.131,657.185393,1942.334,3357.249,500.092737,1222.091776,2018.821
Total Mortality(low estimate),545.290937,1754.848,3106.086,469.862359,1381.971,2399.16,369.01471,911.17962,1515.067
$ Total Health Benefits(high estimate) DISCOUNTED (million USD),11831.981795,36688.54,62206.03,9994.588312,28304.21,47093.08,7654.088058,18035.781157,28710.24
$ Total Health Benefits(low estimate) DISCOUNTED (million USD),8502.617626,26120.52,44444.65,7359.921041,20708.62,34562.37,5809.376895,13799.559022,22081.21
"Total Incidence, Asthma",3103.794014,9199.03,15723.76,2764.113667,7548.931,12655.49,2249.663266,5246.289821,8447.3
Total Asthma Symptoms,494926.822344,1493056.0,2556049.0,438517.802431,1218118.0,2046078.0,354253.698587,838108.703886,1352061.0
PM Work Loss Days,26543.639829,85913.05,151815.4,21351.163838,62785.01,108682.3,15166.515216,35466.864291,57953.18
"O3 School Loss Days, All Cause",239688.168503,708122.3,1209982.0,217825.762636,596169.9,1000390.0,181615.067283,430939.30017,697443.1


In [3]:
pip install kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [None]:
"""
Figure 2. Annual and cumulative premature mortality estimates due to the LNG build-out (high)
"""

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Settings ====================================================================

vars = ['Total Mortality(high estimate)', 'Total Mortality(low estimate)',
        '$ Total Mortality(high estimate)', '$ Total Mortality(low estimate)',
         'PM Mortality, All Cause (high)',  '$ PM Mortality, All Cause (high)',
        'O3 Mortality (Long-term exposure)', '$ O3 Mortality (Long-term exposure)',
        '$ Total Health Benefits(high estimate)', '$ Total Health Benefits(low estimate)']

# Script =====================================================================

sum = - agg_df0.pivot_table(
    index=['config_id', 'ID'],
    values=vars,
    aggfunc='sum'
)

sum.reset_index(inplace=True)
sum['ID'] = sum['ID'].apply(lambda x: int(x))
sum['config_id'] = pd.Categorical(sum['config_id'], categories=['a.finalData.03', 'a.finalData.02', 'a.finalData.01' ])
name_map = {
    'a.finalData.01': 'Full Buildout',
    'a.finalData.02': 'No New Permits',
    'a.finalData.03': 'Operating Projects Only'
}
sum['config_id'].replace(name_map, inplace=True)

# Create a cumulative mortalities column
sum['Cumulative mortalities'] = sum.groupby('config_id')['Total Mortality(high estimate)'].cumsum()

# Create subplots with shared x-axis
fig = make_subplots(rows=2, cols=1, shared_xaxes=False,
                    subplot_titles=("Annual Premature Deaths", "Cumulative Premature Deaths"))

color_map = {
    status: color for status, color in zip(sum['config_id'].unique(), ['#F7BE00', '#D54400', '#003B4A'])
}

# First subplot: Total Mortality
for config_id in sum['config_id'].unique():
    df = sum[sum['config_id'] == config_id]
    fig.add_trace(go.Scatter(x=df['ID'], y=df['Total Mortality(high estimate)'],
                             mode='lines+markers', marker_color=color_map[config_id],
                             name=config_id),
                  row=1, col=1)

# Second subplot: Cumulative Mortalities
for config_id in sum['config_id'].unique():
    df = sum[sum['config_id'] == config_id]
    fig.add_trace(go.Scatter(x=df['ID'], y=df['Cumulative mortalities'],
                             mode='lines+markers', marker_color=color_map[config_id],
                             name=config_id, showlegend=False),
                  row=2, col=1)

# Update x-axis label
fig.update_xaxes(title_text="Year", row=2, col=1)

# Update y-axis labels
fig.update_yaxes(title_text="Annual Deaths (high estimate)", row=1, col=1)
fig.update_yaxes(title_text="Cumulative Deaths (high estimate)", row=2, col=1)

# Update layout
fig.update_layout(height=600, width=700)
fig.update_yaxes(range=[0, 220], row=1, col=1)

fig.write_image(results_dir0 + "/a.finalData.results/timeline_impacts.svg", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/timeline_impacts.pdf", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/timeline_impacts.jpg")

fig.show()


In [None]:
"""
Appendix. Annual and cumulative premature mortality estimates due to the LNG build-out (low)
"""

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Settings ====================================================================

vars = ['Total Mortality(high estimate)', 'Total Mortality(low estimate)',
        '$ Total Mortality(high estimate)', '$ Total Mortality(low estimate)',
         'PM Mortality, All Cause (high)',  '$ PM Mortality, All Cause (high)',
        'O3 Mortality (Long-term exposure)', '$ O3 Mortality (Long-term exposure)',
        '$ Total Health Benefits(high estimate)', '$ Total Health Benefits(low estimate)']

# Script =====================================================================

sum = - agg_df0.pivot_table(
    index=['config_id', 'ID'],
    values=vars,
    aggfunc='sum'
)

sum.reset_index(inplace=True)
sum['ID'] = sum['ID'].apply(lambda x: int(x))
sum['config_id'] = pd.Categorical(sum['config_id'], categories=['a.finalData.03', 'a.finalData.02', 'a.finalData.01' ])
name_map = {
    'a.finalData.01': 'Full Buildout',
    'a.finalData.02': 'No New Permits',
    'a.finalData.03': 'Operating Projects Only'
}
sum['config_id'].replace(name_map, inplace=True)

# Create a cumulative mortalities column
sum['Cumulative mortalities'] = sum.groupby('config_id')['Total Mortality(low estimate)'].cumsum()

# Create subplots with shared x-axis
fig = make_subplots(rows=2, cols=1, shared_xaxes=False,
                    subplot_titles=("Annual Premature Deaths", "Cumulative Premature Deaths"))

color_map = {
    status: color for status, color in zip(sum['config_id'].unique(), ['#F7BE00', '#D54400', '#003B4A'])
}

# First subplot: Total Mortality
for config_id in sum['config_id'].unique():
    df = sum[sum['config_id'] == config_id]
    fig.add_trace(go.Scatter(x=df['ID'], y=df['Total Mortality(low estimate)'],
                             mode='lines+markers', marker_color=color_map[config_id],
                             name=config_id),
                  row=1, col=1)

# Second subplot: Cumulative Mortalities
for config_id in sum['config_id'].unique():
    df = sum[sum['config_id'] == config_id]
    fig.add_trace(go.Scatter(x=df['ID'], y=df['Cumulative mortalities'],
                             mode='lines+markers', marker_color=color_map[config_id],
                             name=config_id, showlegend=False),
                  row=2, col=1)

# Update x-axis label
fig.update_xaxes(title_text="Year", row=2, col=1)

# Update y-axis labels
fig.update_yaxes(title_text="Annual Deaths (low estimate)", row=1, col=1)
fig.update_yaxes(title_text="Cumulative Deaths (low estimate)", row=2, col=1)

# Update layout
fig.update_layout(height=600, width=700)
fig.update_yaxes(range=[0, 220], row=1, col=1)

fig.write_image(results_dir0 + "/a.finalData.results/timeline_impacts-low.svg", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/timeline_impacts-low.pdf", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/timeline_impacts-low.jpg")

fig.show()


In [None]:
[(i, j) for i, j in enumerate(agg_df0.columns)]

[(0, 'ID'),
 (1, 'destindx'),
 (2, 'FIPS'),
 (3, 'State'),
 (4, 'County'),
 (5, 'Base PM 2.5'),
 (6, 'Control PM 2.5'),
 (7, 'Delta PM 2.5'),
 (8, 'Base O3'),
 (9, 'Control O3'),
 (10, 'Delta O3'),
 (11, '$ Total Health Benefits(low estimate)'),
 (12, '$ Total Health Benefits(high estimate)'),
 (13, 'Total Mortality(low estimate)'),
 (14, '$ Total Mortality(low estimate)'),
 (15, 'Total Mortality(high estimate)'),
 (16, '$ Total Mortality(high estimate)'),
 (17, 'PM Mortality, All Cause (low)'),
 (18, '$ PM Mortality, All Cause (low)'),
 (19, 'PM Mortality, All Cause (high)'),
 (20, '$ PM Mortality, All Cause (high)'),
 (21, 'PM Infant Mortality'),
 (22, '$ PM Infant Mortality'),
 (23, 'Total O3 Mortality'),
 (24, '$ Total O3 Mortality'),
 (25, 'O3 Mortality (Short-term exposure)'),
 (26, '$ O3 Mortality (Short term exposure)'),
 (27, 'O3 Mortality (Long-term exposure)'),
 (28, '$ O3 Mortality (Long-term exposure)'),
 (29, 'Total Asthma Symptoms'),
 (30, '$ Total Asthma Symptoms'),
 (3

## Most Impacted Counties analysis

In [4]:
"""
Create lists of top 10 most impacted counties due to absolute mortalities and per million
"""


# Table configuration ========================================================
eval_yrs = [2023, 2030]
scenario = 'a.finalData.01'
eval_vars = ['Total Mortality(high estimate)', 'Total Mortality(high estimate) PER MILLION']
# Top 10 settings
# n_perlist = 10
# cutoffs = {}

# Absolute cut-off test
n_perlist = -1
cutoffs = {'Total Mortality(high estimate)': 1,
           'Total Mortality(high estimate) PER MILLION': 10}

# Script =====================================================================
pivot_indices = ['County', 'State', 'config_id', 'ID']
colset0 = pivot_indices + eval_vars
rank_lists = {i: {} for i in eval_yrs}

# Filter DF to just the scenario & year used for ranking
# filtered = agg_df0[((agg_df0['config_id'] == scenario) & (agg_df0['ID'] == eval_yr))]

# Load population data for Mortalities Per Million columns
pop_f0 = "/content/drive/MyDrive/gpDept-ResearchDept/LNG Air Pollution/LNG Health - COBRA project/COBRA (from desktop version) - v5.1/default data/default_YYYY_population_data.csv"
pop_file_info = {'fname': [pop_f0.replace("YYYY", str(i)) for i in eval_yrs],
            'year': [i for i in eval_yrs]}
pop_data = load_summarized_demographic_data(pop_file_info, 'Total Population')

# Merge filtered DF with population data on FIPS
agg_df1 = (agg_df0
           .merge(
               (pop_data[['FIPS', 'Baseline Year', 'Total Population']]),
               left_on=['FIPS', 'ID'],
               right_on=['FIPS', 'Baseline Year'],
               how='inner'
))

# Create County, State column
agg_df1['County, State'] = agg_df1[['County', 'State']].agg(', '.join, axis=1)
agg_df1.drop(['County', 'State'], axis=1, inplace=True)

# Create per million variables
agg_df1 = add_per_million(agg_df1, eval_vars)

# Create rank lists for given eval_vars and given_years
create_rank_list(agg_df1, scenario, eval_yrs, eval_vars, n_perlist, cutoffs)

# Create a dataframe with each county/state as a row and columns indicating with
# rank lists it was part of.
all_counties = [k for i in eval_yrs for j in eval_vars for k in rank_lists[i][j]['County, State'].tolist()]
all_rank_types = [str(i) + ' - ' + j for i in eval_yrs for j in eval_vars for k in np.arange(len(rank_lists[i][j]['County, State']))]

all_counties_df = pd.DataFrame({'County, State': all_counties,
                                'Rank type': all_rank_types})
all_counties_df['Presence'] = "Yes"
pivoted = all_counties_df.pivot(columns='Rank type', index='County, State', values='Presence')
pivoted = pivoted.replace({np.nan: "No"})

# Merge with results dataframe and filter to only the relevant counties
merged = agg_df1.merge(pivoted, on='County, State', how='right')
fig_data = (merged
 [(['County, State', 'config_id', 'ID', 'Total Mortality(high estimate)', 'Total Mortality(high estimate) PER MILLION']
   + pivoted.columns.tolist()
   + ['Total Population']
   )]
)

fig_data

Unnamed: 0,"County, State",config_id,ID,Total Mortality(high estimate),Total Mortality(high estimate) PER MILLION,2023 - Total Mortality(high estimate),2023 - Total Mortality(high estimate) PER MILLION,2030 - Total Mortality(high estimate),2030 - Total Mortality(high estimate) PER MILLION,Total Population
0,"Acadia, Louisiana",a.finalData.01,2023,-0.568960,-8.756210,No,No,Yes,Yes,64977.8570
1,"Acadia, Louisiana",a.finalData.02,2023,-0.568960,-8.756210,No,No,Yes,Yes,64977.8570
2,"Acadia, Louisiana",a.finalData.03,2023,-0.568960,-8.756210,No,No,Yes,Yes,64977.8570
3,"Acadia, Louisiana",a.finalData.01,2030,-1.160714,-17.366492,No,No,Yes,Yes,66836.4232
4,"Acadia, Louisiana",a.finalData.02,2030,-0.825491,-12.350919,No,No,Yes,Yes,66836.4232
...,...,...,...,...,...,...,...,...,...,...
205,"Vermilion, Louisiana",a.finalData.02,2023,-0.375616,-5.906816,No,No,No,Yes,63590.2386
206,"Vermilion, Louisiana",a.finalData.03,2023,-0.375616,-5.906816,No,No,No,Yes,63590.2386
207,"Vermilion, Louisiana",a.finalData.01,2030,-0.739387,-11.078699,No,No,No,Yes,66739.5374
208,"Vermilion, Louisiana",a.finalData.02,2030,-0.528024,-7.911719,No,No,No,Yes,66739.5374


In [5]:
# Create distinct dataframes for different years in the analysis
plot_yr = 2030
save_xlsx = False

d1_columns = [str(eval_yrs[0])+j for j in [" - Total Mortality(high estimate)", " - Total Mortality(high estimate) PER MILLION"]]
d2_columns = [str(eval_yrs[1])+j for j in [" - Total Mortality(high estimate)", " - Total Mortality(high estimate) PER MILLION"]]

d1 = fig_data[((fig_data['ID']==eval_yrs[0])
               & ((fig_data[d1_columns[0]]=="Yes") |
                  (fig_data[d1_columns[1]]=="Yes")))][['County, State', 'ID', 'config_id'] + eval_vars + d1_columns]
d2 = fig_data[((fig_data['ID']==eval_yrs[1])
               & ((fig_data[d2_columns[0]]=="Yes") |
                  (fig_data[d2_columns[1]]=="Yes")))][['County, State', 'ID', 'config_id'] + eval_vars + d2_columns]

for i in d1_columns:
    d1[i] = pd.Categorical(d1[i], categories=['Yes', 'No'])

for i in d2_columns:
    d2[i] = pd.Categorical(d2[i], categories=['Yes', 'No'])

# Create single column with list indication
def list_type_col(row, yr):
    sy = str(yr)
    if row[sy+' - Total Mortality(high estimate)'] == 'Yes' and row[sy+' - Total Mortality(high estimate) PER MILLION'] == 'Yes':
        return 'Both'
    elif row[sy+' - Total Mortality(high estimate)'] == 'Yes':
        return 'Total Mortality'
    elif row[sy+' - Total Mortality(high estimate) PER MILLION'] == 'Yes':
        return 'Total Mortality (per million)'
    else:
        return 'None'

# Apply the function to create the new column
cat_order = ['Total Mortality', 'Total Mortality (per million)', 'Both']

if plot_yr == eval_yrs[0]:
    d3 = d1
elif plot_yr == eval_yrs[1]:
    d3 = d2
else:
    raise ValueError("Invalid plot year")

d3['List type'] = d3.apply(list_type_col, yr=plot_yr, axis=1)
d3['List type'] = pd.Categorical(d3['List type'], categories=cat_order)

# Sort by inclusion in ranking
# d1.sort_values(by=d1_columns+['County, State', 'config_id'], ascending=[True, True, True, False], inplace=True)
# d2.sort_values(by=d2_columns+['County, State', 'config_id'], ascending=[True, True, True, False], inplace=True)
# d1.sort_values(by=d1_columns+['Total Mortality(high estimate)', 'config_id'], ascending=[True, True, True, False], inplace=True)
# d2.sort_values(by=d2_columns+['Total Mortality(high estimate)', 'config_id'], ascending=[True, True, True, False], inplace=True)
d3.sort_values(by=['List type', 'County, State', 'config_id'], ascending=[True, True, False], inplace=True)

if save_xlsx:
    if len(cutoffs) > 0:
        fstr = "{}.{}deaths_{}permillion.top_impacted_counties.xlsx".format(plot_yr,
                                                                            cutoffs['Total Mortality(high estimate)'],
                                                                            cutoffs['Total Mortality(high estimate) PER MILLION'])
    else:
        fstr = "{}.top{}.top_impacted_counties.xlsx".format(plot_yr, n_perlist)
    f0 = results_dir0 + '/a.finalData.results/top_impacted_counties/' + fstr
    d3.to_excel(f0, index=False)

d3.head()

Unnamed: 0,"County, State",ID,config_id,Total Mortality(high estimate),Total Mortality(high estimate) PER MILLION,2030 - Total Mortality(high estimate),2030 - Total Mortality(high estimate) PER MILLION,List type
23,"Bexar, Texas",2030,a.finalData.03,-1.218993,-0.509708,Yes,No,Total Mortality
22,"Bexar, Texas",2030,a.finalData.02,-1.854462,-0.775423,Yes,No,Total Mortality
21,"Bexar, Texas",2030,a.finalData.01,-2.140375,-0.894974,Yes,No,Total Mortality
29,"Brazoria, Texas",2030,a.finalData.03,-0.647071,-1.394132,Yes,No,Total Mortality
28,"Brazoria, Texas",2030,a.finalData.02,-1.061924,-2.287944,Yes,No,Total Mortality


In [6]:
# Create a dataframe where values can be combined as stacked bars (not overlapping mortalities)
def custom_diff(series):
    diff_series = series.diff().fillna(0)  # Calculate the difference and fill NaN with 0
    diff_series.iloc[0] = series.iloc[0]  # Set the first value to be the original first value
    return diff_series

d3['bottom_y'] = - d3.groupby('County, State')['Total Mortality(high estimate) PER MILLION'].transform(lambda x: custom_diff(x))
d3['top_y'] = - d3.groupby('County, State')['Total Mortality(high estimate)'].transform(lambda x: custom_diff(x))

# Calculate column widths
counts = d3.groupby('List type').size()
column_widths = counts/counts.sum()
column_widths = column_widths.loc[cat_order].tolist()

# Substitute config_id names with project status
d3['config_id'].replace({'a.finalData.01': 'Full Buildout',
                         'a.finalData.02': 'No New Permits',
                         'a.finalData.03': 'Operating Projects Only'}, inplace=True)
print(len(d3['County, State'].unique()))

35


  counts = d3.groupby('List type').size()


In [17]:
"""
Create bar charts
"""

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Define a color map for each project status
color_map = {
    status: color for status, color in zip(d3['config_id'].unique(), ['#003B4A', '#D54400', '#F7BE00'])
}


title_map = {"Both": "Overlap Counties & Parishes:<br>1+ Premature Deaths, Absolute,<br>and 10+ Premature Deaths Per Mil.", "Total Mortality": "Counties & Parishes With 1+ Premature Deaths, Absolute",
             "Total Mortality (per million)": "Counties & Parishes With<br>10+ Premature Deaths Per Mil."}
subplot_titles = [title_map[i] for i in cat_order] + ["", "", ""]

# Create subplots
fig = make_subplots(rows=2, cols=3, shared_xaxes=True, shared_yaxes=True,
                    vertical_spacing=0.025, horizontal_spacing=0.025, column_widths = column_widths,
                    subplot_titles=subplot_titles)

# First bar chart
for status in d3['config_id'].unique():
    for i in np.arange(3):
        t = d3['List type'].unique()[i]
        showlegend = True if ((i == 2)) else False
        subset = d3[((d3['config_id'] == status) & (d3['List type'] == t))]
        fig.add_trace(go.Scatter(x=subset['County, State'], y=-subset["Total Mortality(high estimate)"],
                                 marker=dict(symbol="diamond", color=color_map[status], size=10),
                            name=status, showlegend=showlegend,
                                 mode='markers'), row=1, col=(i+1))

# Second bar chart
for status in d3['config_id'].unique():
    for i in np.arange(3):
        t = d3['List type'].unique()[i]
        subset = d3[((d3['config_id'] == status) & (d3['List type'] == t))]
        fig.add_trace(go.Scatter(x=subset['County, State'], y=-subset["Total Mortality(high estimate) PER MILLION"],
                            name=status, marker=dict(symbol="diamond", color=color_map[status], size=10),
                                 mode='markers', showlegend=False), row=2, col=(i+1))

# Update layout for stacked bars
fig.update_layout(height=800, width=1000, title_text="",
                  yaxis=dict(title='Annual Premature Deaths (high)', titlefont=dict(size=12)),
                  yaxis4=dict(title='Annual Premature Deaths Per Million (high)', titlefont=dict(size=12)),
                  xaxis4=dict(tickangle=-90),
                  xaxis5=dict(tickangle=-90),
                  xaxis6=dict(tickangle=-90),
                  legend=dict(
                            yanchor="top",
                            y=0.425,
                            xanchor="left",
                            x=0.035,
                            traceorder="reversed"
                        )
                  )
fig.layout.annotations[0].update(font=dict(size=10.5))
fig.layout.annotations[1].update(font=dict(size=10.5))
fig.layout.annotations[2].update(font=dict(size=10.5))


fig.write_image(results_dir0 + "/a.finalData.results/mortality_criteria_counties.svg", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/mortality_criteria_counties.pdf", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/mortality_criteria_counties.jpg")
fig.show()


## Making the "Pause" permanent analysis

In [None]:
"""
Figure 2. Annual and cumulative premature mortality estimates due to the LNG build-out
"""

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Settings ====================================================================

cumsum = True
vars = ['Total Mortality(high estimate)', 'Total Mortality(low estimate)',
           '$ Total Health Benefits(high estimate) DISCOUNTED', '$ Total Health Benefits(low estimate) DISCOUNTED',
           'Total Incidence, Asthma',
           'Total Asthma Symptoms',
           'PM Work Loss Days', 'O3 School Loss Days, All Cause'
]
subplot_titles = ['Premature Deaths (high estimate)', 'Premature Deaths (low estimate)',
           '$ Health Costs (high estimate)', '$ Health Costs (low estimate)',
           'Childhood Asthma Cases',
           'Asthma Symptom Occurrences',
           'Work Loss Days', 'School Loss Days'
]

yrs = [2030, 2040, 2050]
scenario_names = ['No New Permits', 'Operating Projects Only']
# scenario_names = ["No New Permits"]


# Script =====================================================================

# Group by scenario and year, and take total sum
sum = - agg_df0.pivot_table(
    index=['config_id', 'ID'],
    values=vars,
    aggfunc='sum'
)

sum.reset_index(inplace=True)
sum['ID'] = sum['ID'].apply(lambda x: int(x))

# If cumsum = True, convert values from annual to cumulative sums
if cumsum:
    # Create cumulative columns
    for var in vars:
        sum[var] = sum.groupby('config_id')[var].cumsum()

# Filter DF down to only the years we are plotting
filtered = sum[sum['ID'].isin(yrs)]
plot_vars = {}

# Function to calculate difference between different scenarios
def custom_diff(group, var):
    print(group)
    diff_series = - group[var].diff().fillna(0)  # Calculate the difference and fill NaN with 0
    print(diff_series)
    iloc_stop = len(scenario_names) + 1
    diff_series = diff_series.iloc[1:(iloc_stop)]  # Remove the first value (NaN)
    diff_series.iloc[1] = diff_series.iloc[0] + diff_series.iloc[1]
    print(diff_series)
    return(diff_series)

for var in vars:
    diff = filtered.groupby('ID').apply(custom_diff, var)
    plot_vars[var] = diff.droplevel(1).reset_index()
    if len(scenario_names) > 1:
        plot_vars[var]['Scenario'] = np.tile(scenario_names, int(len(plot_vars[var])/2))
    else:
        plot_vars[var]['Scenario'] = scenario_names[0]

# Create subplots
vars_reshape = np.reshape(np.array(vars), (-1, 2))
nrows = vars_reshape.shape[0]
ncols = vars_reshape.shape[1]

fig = make_subplots(rows=nrows, cols=ncols, shared_xaxes=True,
                    vertical_spacing=0.1, horizontal_spacing=0.08,
                    subplot_titles=subplot_titles)

# color_map = {
#     scenario: color for scenario, color in zip(scenario_names, ['#E1BE6A', '#40B0A6'])
# }

color_map = {
    status: color for status, color in zip(scenario_names, ['#D54400','#003B4A'])
}

for i in np.arange(nrows):
    for j in np.arange(ncols):
        for k in ['No New Permits', 'Operating Projects Only']:
            if i == 0 and j == 0:
                show_legend = True
            else:
                show_legend = False
            var0 = vars_reshape[i, j]
            plot_df = plot_vars[var0]
            plot_df = plot_df[plot_df['Scenario'] == k]
            fig.add_trace(go.Scatter(x=plot_df['ID'], y=-plot_df[var0], mode='markers',

                                name=k, marker=dict(symbol="diamond", color=color_map[k], size=10), showlegend=show_legend),
                           row=(i+1), col=(j+1))

# Update layout for stacked bars
fig.update_layout(barmode='stack', height=800, width=1000, title_text="Avoided impacts",
                  yaxis=dict(title='# mortalities', range=[-2800, 200]),
                  yaxis2=dict(title='', range=[-2800, 200]),
                  yaxis3=dict(title='$ USD', range=[-38000000000, 3000000000]),
                  yaxis4=dict(title='', range=[-38000000000, 3000000000]),
                  yaxis5=dict(title='# onset cases', range=[-9000, 800]),
                  yaxis6=dict(title='', range=[-1400000, 120000]),
                  yaxis7=dict(title='# days lost', range=[-110000, 9000]),
                  yaxis8=dict(title='', range=[-600000, 47000]),
                  )

fig.add_annotation(
    xref="x domain",
    yref="y domain",
    x=1.06,
    y=-3.1,
    text="# occurrences",
    textangle=-90,
    showarrow=False,
    font=dict(size=13)
)

fig.write_image(results_dir0 + "/a.finalData.results/restricting_buildout.svg", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/restricting_buildout.pdf", engine="kaleido")
fig.write_image(results_dir0 + "/a.finalData.results/restricting_buildout.jpg")

fig.show()


         config_id    ID  $ Total Health Benefits(high estimate) DISCOUNTED  \
7   a.finalData.01  2030                                       1.183198e+10   
35  a.finalData.02  2030                                       9.994588e+09   
63  a.finalData.03  2030                                       7.654088e+09   

    $ Total Health Benefits(low estimate) DISCOUNTED  \
7                                       8.502618e+09   
35                                      7.359921e+09   
63                                      5.809377e+09   

    O3 School Loss Days, All Cause  PM Work Loss Days  Total Asthma Symptoms  \
7                    239688.168503       26543.639829          494926.822344   
35                   217825.762636       21351.163838          438517.802431   
63                   181615.067283       15166.515216          354253.698587   

    Total Incidence, Asthma  Total Mortality(high estimate)  \
7               3103.794014                      782.223578   
35         

In [None]:
filtered

Unnamed: 0,config_id,ID,$ Total Health Benefits(high estimate) DISCOUNTED,$ Total Health Benefits(low estimate) DISCOUNTED,"O3 School Loss Days, All Cause",PM Work Loss Days,Total Asthma Symptoms,"Total Incidence, Asthma",Total Mortality(high estimate),Total Mortality(low estimate)
7,a.finalData.01,2030,2453707000.0,1727552000.0,44283.474512,5599.328259,95330.534005,581.286861,162.736573,111.344762
17,a.finalData.01,2040,2679784000.0,1918499000.0,49160.062485,6360.933364,104032.505641,638.196326,189.862803,132.447241
27,a.finalData.01,2050,2579123000.0,1862892000.0,52229.903375,6992.244757,110761.264765,677.707682,197.457351,138.967562
35,a.finalData.02,2030,1850944000.0,1340918000.0,36589.012613,4004.7545,76231.818575,466.988476,122.126268,86.030408
45,a.finalData.02,2040,1968998000.0,1448296000.0,39565.115299,4423.361175,80953.14048,499.144682,138.819238,99.54828
55,a.finalData.02,2050,1906656000.0,1412334000.0,42120.52136,4884.258956,86398.60493,531.048949,145.277681,104.909748
63,a.finalData.03,2030,1047936000.0,802676700.0,24105.320527,1962.328206,47342.322495,292.381516,68.488551,51.130963
73,a.finalData.03,2040,1118599000.0,866510300.0,26080.006443,2167.186322,50228.752876,312.79521,78.170251,59.157895
83,a.finalData.03,2050,1082480000.0,842432300.0,27783.501892,2392.988079,53675.313884,333.007477,81.727958,62.124841
