In [1]:
# Import libraries for loading data, analysis and visualization
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from pathlib import Path
from os.path import join

# Load review data

In [2]:
path = Path.cwd()
ROOT_DIR = path.parent.absolute()

In [3]:
# Root for data directory
r_fp = join(ROOT_DIR, 'data')

# All papers
all_papers = pd.read_csv(join(r_fp, 'all_papers.csv'))

# Papers that measure equity
eqm = pd.read_csv(join(r_fp, 'measure_equity.csv'))

In [4]:
# Drop rows & columns that are all na from csv
all_papers = all_papers.dropna(how='all')
all_papers = all_papers.dropna(axis=1, how='all')

eqm = eqm.dropna(how='all')
eqm = eqm.dropna(axis=1, how='all')

In [25]:
# Update census areas to small census areas
eqm.loc[eqm['whom_dist'] == 'Census Areas',
        'whom_dist'] = 'Small Census Areas'

# Summarize review

In [5]:
# Get drop columns
# There are columns where "Yes" values indicate 
# papers should not be included in our review sample
drop_no_equity = all_papers['drop_no_equity'] == 'Yes'
drop_no_dist_equity = all_papers['drop_no_dist_equity'] == 'Yes'
drop_no_flood = all_papers['drop_no_flood'] == 'Yes'
drop_no_eng = all_papers['drop_no_eng'] == 'Yes'
drop_equity_use_not_fair = all_papers['drop_equity_use_not_fair'] == 'Yes'
drop_not_quant = all_papers['drop_not_quant'] == 'Yes'

# Keep track in dataframes and print rows in each
# no_acc_df = all_papers.loc[drop_no_acc]
no_eq_df = all_papers.loc[drop_no_equity]
no_dist_eq = all_papers.loc[drop_no_dist_equity]
no_flood = all_papers.loc[drop_no_flood]
no_eng = all_papers.loc[drop_no_eng]
eq_not_fair = all_papers.loc[drop_equity_use_not_fair]

drop_types = ['No Equity', 'No Outcome Equity',
              'No Flood Focus', 'No English', 'No Equity As Fairness']
dfs = [no_eq_df, no_dist_eq, no_flood,
       no_eng, eq_not_fair]

for dt, df in zip(drop_types, dfs):
    print('Dropped ' + str(len(df)) + ' Papers Because ' + dt)
    
# Equity measured/defined filling in na values
all_papers['eq_m'] = np.where(all_papers['equity_measure_flood'] == 'Yes',
                              'Yes', 'No')
all_papers['eq_d'] = np.where(all_papers['equity_defined'] == 'Yes',
                              'Yes', 'No')
    
# Subset df to qualitative/review entries
# And quantitative entries that are the "true" review
qual_df = all_papers.loc[drop_not_quant &
                         (~drop_no_equity) &
                         (~drop_no_dist_equity) &
                         (~drop_no_flood) & (~drop_no_eng) &
                         (~drop_equity_use_not_fair)]
qut_df = all_papers.loc[(all_papers['analysis_type'] == 'Quantitative') &
                        (~drop_no_equity) &
                        (~drop_no_dist_equity) &
                        (~drop_no_flood) & (~drop_no_eng) &
                        (~drop_equity_use_not_fair)]

# Print rows in each
print(str(len(qual_df)) + ' Qualitative/Review/Theory/Comment Papers')
print(str(len(qut_df)) + ' Quantitative Papers')

# Check mutual exclusive coding
mut_ex = (len(all_papers) - len(qut_df) - len(qual_df) - 
          len(no_eq_df) - len(no_dist_eq) - len(no_flood) - len(no_eng) -
          len(eq_not_fair))
print('Mutually Exclusive Coding if 0: ' + str(mut_ex))

Dropped 1 Papers Because No Equity
Dropped 1 Papers Because No Outcome Equity
Dropped 35 Papers Because No Flood Focus
Dropped 1 Papers Because No English
Dropped 17 Papers Because No Equity As Fairness
68 Qualitative/Review/Theory/Comment Papers
99 Quantitative Papers
Mutually Exclusive Coding if 0: 0


## Summarize review sample definitions

In [6]:
# Get boolean flag for quantitative and qualitative assessments
qut_df['emp_s'] = 'Quantitative'
qual_df['emp_s'] = 'Qualitative'

# Subset to qual and quant papers that were coded
sub_papers = pd.concat([qut_df, qual_df], axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qut_df['emp_s'] = 'Quantitative'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qual_df['emp_s'] = 'Qualitative'


In [7]:
print("Proportion of Papers with Equity Defined")
print(sub_papers.groupby(['eq_d']).size()/len(sub_papers))
print("Number of Papers with Equity Defined")
print(sub_papers.groupby(['eq_d']).size())

print("Propotion of Papers with Different Equity Definition Codes")
print(sub_papers.groupby(['equity_defined_cat']).size()/len(sub_papers))

print("Number of Papers with Different Equity Definition Codes")
print(sub_papers.groupby(['equity_defined_cat']).size())

Proportion of Papers with Equity Defined
eq_d
No     0.520958
Yes    0.479042
dtype: float64
Number of Papers with Equity Defined
eq_d
No     87
Yes    80
dtype: int64
Propotion of Papers with Different Equity Definition Codes
equity_defined_cat
0.0    0.682635
1.0    0.077844
2.0    0.125749
3.0    0.077844
4.0    0.017964
5.0    0.017964
dtype: float64
Number of Papers with Different Equity Definition Codes
equity_defined_cat
0.0    114
1.0     13
2.0     21
3.0     13
4.0      3
5.0      3
dtype: int64


## Summarize qualitative papers definitions

In [8]:
print("Proportion of Papers with Equity Defined")
print(qual_df.groupby(['eq_d']).size()/len(qual_df))
print("Number of Papers with Equity Defined")
print(qual_df.groupby(['eq_d']).size())

print("Propotion of Papers with Different Equity Definition Scores")
print(qual_df.groupby(['equity_defined_cat']).size()/len(qual_df))

print("Number of Papers with Different Equity Definition Scores")
print(qual_df.groupby(['equity_defined_cat']).size())

Proportion of Papers with Equity Defined
eq_d
No     0.514706
Yes    0.485294
dtype: float64
Number of Papers with Equity Defined
eq_d
No     35
Yes    33
dtype: int64
Propotion of Papers with Different Equity Definition Scores
equity_defined_cat
0.0    0.661765
1.0    0.044118
2.0    0.088235
3.0    0.132353
4.0    0.044118
5.0    0.029412
dtype: float64
Number of Papers with Different Equity Definition Scores
equity_defined_cat
0.0    45
1.0     3
2.0     6
3.0     9
4.0     3
5.0     2
dtype: int64


## Summarize quantitative papers definitions

In [9]:
print("Proportion of Papers with Equity Defined")
print(qut_df.groupby(['eq_d']).size()/len(qut_df))
print("Number of Papers with Equity Defined")
print(qut_df.groupby(['eq_d']).size())

print("Propotion of Papers with Different Equity Definition Scores")
print(qut_df.groupby(['equity_defined_cat']).size()/len(qut_df))

print("Number of Papers with Different Equity Definition Scores")
print(qut_df.groupby(['equity_defined_cat']).size())

print("Number of Assessments That Measure Equity by Definition")
print((qut_df.groupby(['equity_defined_cat', 'eq_m']).size()))

print("Proportion of Assessments That Measure Equity by Definition")
print((qut_df.groupby(['equity_defined_cat', 'eq_m']).size()/
       qut_df.groupby(['equity_defined_cat']).size()))

Proportion of Papers with Equity Defined
eq_d
No     0.525253
Yes    0.474747
dtype: float64
Number of Papers with Equity Defined
eq_d
No     52
Yes    47
dtype: int64
Propotion of Papers with Different Equity Definition Scores
equity_defined_cat
0.0    0.696970
1.0    0.101010
2.0    0.151515
3.0    0.040404
5.0    0.010101
dtype: float64
Number of Papers with Different Equity Definition Scores
equity_defined_cat
0.0    69
1.0    10
2.0    15
3.0     4
5.0     1
dtype: int64
Number of Assessments That Measure Equity by Definition
equity_defined_cat  eq_m
0.0                 No      52
                    Yes     17
1.0                 No       3
                    Yes      7
2.0                 No       1
                    Yes     14
3.0                 Yes      4
5.0                 No       1
dtype: int64
Proportion of Assessments That Measure Equity by Definition
equity_defined_cat  eq_m
0.0                 No      0.753623
                    Yes     0.246377
1.0               

# Map equity metrics onto taxonomy

In [26]:
# 1) Equity in what
# 2) Equity for whom
# 3) Equity why
# Color by ethically grounded
import plotly.graph_objects as go

eqm_plot = eqm[eqm['meas_scale'] == 'Yes']

# Create dimensions
what_dim = go.parcats.Dimension(
    values=eqm_plot['what_dist'],
)

whom_dim = go.parcats.Dimension(
    values=eqm_plot['whom_dist'],
)

why_dim = go.parcats.Dimension(
    values=eqm_plot['equity_why'],
)


# Create parcats trace
color = np.where(eqm_plot['equity_why'] == 'Not Stated',
                  0, 1)
colorscale = [[0, '#D3D3D3'], [1, '#009E73']]

fig = go.Figure(data = [go.Parcats(dimensions=[what_dim,
                                               whom_dim,
                                               why_dim],
        line={'color': color,
              'colorscale': colorscale},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

In [11]:
# Total number of equity metrics
len(eqm_plot)

27

In [12]:
# Total number of papers with equity metrics
len(eqm_plot['doi'].unique())

14

In [13]:
# Seeing how many studies use multiple equity metrics, and how many of them
eqm_plot.groupby(['doi']).size().value_counts()

1    10
2     2
7     1
6     1
dtype: int64

In [14]:
# Looking at the papers with multiple equity metrics
eqm_plot.groupby(['doi']).size().sort_values(ascending=False).head(4)

doi
10.1007/s10584-022-03395-y         7
10.1111/risa.13527                 6
10.1016/j.gloenvcha.2020.102050    2
10.3390/w12113152                  2
dtype: int64

In [15]:
# Outcomes proportions
eqm_plot.groupby(['what_dist']).size()/len(eqm_plot)

what_dist
Adaptation Funding    0.037037
Adaptation Impact     0.037037
Benefits              0.629630
Exposure              0.074074
Recovery              0.074074
Risk                  0.111111
Vulnerability         0.037037
dtype: float64

In [16]:
# Scales proportions
eqm_plot.groupby(['whom_dist']).size()/len(eqm_plot)

whom_dist
Census Areas    0.111111
Individuals     0.518519
Large Areas     0.296296
Regions         0.074074
dtype: float64

In [17]:
# Why proportions
eqm_plot.groupby(['equity_why']).size()/len(eqm_plot)

equity_why
Ad Hoc                           0.037037
Interpret a Policy Mandate       0.037037
Invoke a Moral Principle         0.703704
Not Stated                       0.185185
Reflect Community Preferences    0.037037
dtype: float64

In [18]:
# Why numbers
eqm_plot.groupby(['equity_why']).size()

equity_why
Ad Hoc                            1
Interpret a Policy Mandate        1
Invoke a Moral Principle         19
Not Stated                        5
Reflect Community Preferences     1
dtype: int64

In [19]:
# Outcomes proportions for ethically grounded
eqm_eth = eqm_plot[eqm_plot['equity_why'] != 'Not Stated']
eqm_eth.groupby(['what_dist']).size()/len(eqm_eth)

what_dist
Adaptation Funding    0.045455
Adaptation Impact     0.045455
Benefits              0.727273
Exposure              0.045455
Recovery              0.045455
Risk                  0.090909
dtype: float64

In [20]:
# Whom proportions for ethically grounded
eqm_eth.groupby(['whom_dist']).size()/len(eqm_eth)

whom_dist
Census Areas    0.090909
Individuals     0.545455
Large Areas     0.363636
dtype: float64

In [21]:
# Why proportions for ethically grounded
eqm_eth.groupby(['equity_why']).size()/len(eqm_eth)

equity_why
Ad Hoc                           0.045455
Interpret a Policy Mandate       0.045455
Invoke a Moral Principle         0.863636
Reflect Community Preferences    0.045455
dtype: float64

In [22]:
eqm_eth.groupby(['what_dist',
                 'whom_dist',
                 'equity_why']).size()/len(eqm_eth)

what_dist           whom_dist     equity_why                   
Adaptation Funding  Census Areas  Interpret a Policy Mandate       0.045455
Adaptation Impact   Individuals   Reflect Community Preferences    0.045455
Benefits            Individuals   Ad Hoc                           0.045455
                                  Invoke a Moral Principle         0.363636
                    Large Areas   Invoke a Moral Principle         0.318182
Exposure            Individuals   Invoke a Moral Principle         0.045455
Recovery            Individuals   Invoke a Moral Principle         0.045455
Risk                Census Areas  Invoke a Moral Principle         0.045455
                    Large Areas   Invoke a Moral Principle         0.045455
dtype: float64

In [23]:
eqm_eth.groupby(['what_dist',
                 'equity_why']).size()/len(eqm_eth)

what_dist           equity_why                   
Adaptation Funding  Interpret a Policy Mandate       0.045455
Adaptation Impact   Reflect Community Preferences    0.045455
Benefits            Ad Hoc                           0.045455
                    Invoke a Moral Principle         0.681818
Exposure            Invoke a Moral Principle         0.045455
Recovery            Invoke a Moral Principle         0.045455
Risk                Invoke a Moral Principle         0.090909
dtype: float64

In [27]:
import plotly.graph_objects as go

# Replot with small text for editing outside
# plotly defaults

# 1) Equity in what
# 2) Equity for whom
# 3) Equity why
# Color by ethically grounded
import plotly.graph_objects as go

eqm_plot = eqm[eqm['meas_scale'] == 'Yes']

# Create dimensions
what_dim = go.parcats.Dimension(
    values=eqm_plot['what_dist'],
)

whom_dim = go.parcats.Dimension(
    # Helps for the image touch-up
    values=eqm_plot['whom_dist'].str[0],
)

why_dim = go.parcats.Dimension(
    values=eqm_plot['equity_why'],
)


# Create parcats trace
color = np.where(eqm_plot['equity_why'] == 'Not Stated',
                  0, 1)
colorscale = [[0, '#D3D3D3'], [1, '#009E73']]

fig = go.Figure(data = [go.Parcats(dimensions=[what_dim,
                                               whom_dim,
                                               why_dim],
        line={'color': color,
              'colorscale': colorscale},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 1, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

# Compare equity metrics with non-equity metrics

In [30]:
# Link eqm dataframe with whether a study defined equity or not
# All studies with equity metrics do this
# Not always the case for non-equity metrics even
# if they say results have equity implications
eq_defs = dict(zip(sub_papers['doi'],
                   sub_papers['equity_defined_cat']))
eqm['eq_d'] = eqm['doi'].map(eq_defs)

eqm['eq_fair'] = 'Unclear'
eqm.loc[eqm['eq_d'] == 2,
        'eq_fair'] = 'Env. Justice'
eqm.loc[eqm['eq_d'] == 3,
        'eq_fair'] = 'Dist. Justice'

eqm['whom_dist_p'] = eqm['whom_dist'].copy()
eqm.loc[eqm['whom_dist'] == 'Zip Codes',
        'whom_dist_p'] = 'Small Census Areas'
eqm.loc[eqm['whom_dist'] == 'County',
        'whom_dist_p'] = 'Large Areas'

# 1) Equity in what
# 2) Equity for whom
# 3) Equity why
# Color by ethically grounded
import plotly.graph_objects as go


# Create dimensions
def_dim = go.parcats.Dimension(
    values=eqm['eq_fair'],
)

what_dim = go.parcats.Dimension(
    values=eqm['what_dist'],
)

whom_dim = go.parcats.Dimension(
    values=eqm['whom_dist_p'],
)


# Create parcats trace
color = np.where(eqm['meas_scale'] != 'Yes',
                  0, 1)
colorscale = [[0, '#D3D3D3'], [1, 'salmon']]

fig = go.Figure(data = [go.Parcats(dimensions=[def_dim,
                                               what_dim,
                                               whom_dim],
        line={'color': color,
              'colorscale': colorscale},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

In [20]:
eqm.groupby(['eq_fair', 'meas_scale']).size()

eq_fair        meas_scale
Dist. Justice  Yes           16
Env. Justice   No            29
               Yes            1
Unclear        No            26
               Yes           10
dtype: int64

In [22]:
len(eqm[eqm['meas_scale'] == 'No']['doi'].unique())

28

In [32]:
# Link eqm dataframe with whether a study defined equity or not
# All studies with equity metrics do this
# Not always the case for non-equity metrics even
# if they say results have equity implications
eq_defs = dict(zip(sub_papers['doi'],
                   sub_papers['equity_defined_cat']))
eqm['eq_d'] = eqm['doi'].map(eq_defs)

eqm['eq_fair'] = 'Unclear'
eqm.loc[eqm['eq_d'] == 2,
        'eq_fair'] = 'Env. Justice'
eqm.loc[eqm['eq_d'] == 3,
        'eq_fair'] = 'Dist. Justice'

eqm['whom_dist_p'] = eqm['whom_dist'].copy()
eqm.loc[eqm['whom_dist'] == 'Zip Codes',
        'whom_dist_p'] = 'Small Census Areas'
eqm.loc[eqm['whom_dist'] == 'County',
        'whom_dist_p'] = 'Large Areas'

# 1) Equity in what
# 2) Equity for whom
# 3) Equity why
# Color by ethically grounded
import plotly.graph_objects as go


# Create dimensions
def_dim = go.parcats.Dimension(
    values=eqm['eq_fair'],
)

what_dim = go.parcats.Dimension(
    values=eqm['what_dist'].str[-3:],
)

whom_dim = go.parcats.Dimension(
    values=eqm['whom_dist_p'],
)


# Create parcats trace
color = np.where(eqm['meas_scale'] != 'Yes',
                  0, 1)
colorscale = [[0, '#D3D3D3'], [1, 'salmon']]

fig = go.Figure(data = [go.Parcats(dimensions=[def_dim,
                                               what_dim,
                                               whom_dim],
        line={'color': color,
              'colorscale': colorscale},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 1, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

# Summary stats of coded categories

In [38]:
eqm_plot['eq_fair'].value_counts()/len(eqm_plot)

Unclear          0.470588
Env. Justice     0.294118
Dist. Justice    0.235294
Name: eq_fair, dtype: float64

In [39]:
print('What is Being Distributed Proportions')
print(eqm_plot['what_dist'].value_counts()/len(eqm_plot))

print('What is Being Distributed Totals')
print(eqm_plot['what_dist'].value_counts())

print('Proportions by Direct/Implicit')
print(eqm_plot.groupby(['meas_scale', 'what_dist']).size()/eqm_plot.groupby(['what_dist']).size())

What is Being Distributed Proportions
Exposure                  0.367647
Benefits                  0.264706
Risk                      0.102941
Recovery                  0.102941
Vulnerability             0.073529
Adaptation Instruments    0.044118
Adaptation Impact         0.029412
Adaptation Cost           0.014706
Name: what_dist, dtype: float64
What is Being Distributed Totals
Exposure                  25
Benefits                  18
Risk                       7
Recovery                   7
Vulnerability              5
Adaptation Instruments     3
Adaptation Impact          2
Adaptation Cost            1
Name: what_dist, dtype: int64
Proportions by Direct/Implicit
meas_scale  what_dist             
No          Adaptation Impact         0.500000
            Adaptation Instruments    1.000000
            Exposure                  0.920000
            Recovery                  0.714286
            Risk                      0.571429
            Vulnerability             0.800000
Yes    

In [41]:
print(eqm_plot['whom_dist'].value_counts()/len(eqm_plot))

Census Areas    0.367647
Individuals     0.338235
Large Areas     0.176471
Regions         0.058824
Zip Codes       0.029412
Neighborhood    0.014706
County          0.014706
Name: whom_dist, dtype: float64


In [42]:
print('How is it Distributed Proportions')
print(eqm_plot['how_dist_fair'].value_counts()/len(eqm_plot))

print('How is it Distributed Totals')
print(eqm_plot['how_dist_fair'].value_counts())

How is it Distributed Proportions
Other Considerations                 0.500000
Distributive; Policy                 0.308824
Distributive                         0.058824
Other Considerations; Policy         0.058824
Distributive; Procedural             0.029412
Distributive; Recognition; Policy    0.014706
Other Considerations; Procedural     0.014706
Policy; Recognition                  0.014706
Name: how_dist_fair, dtype: float64
How is it Distributed Totals
Other Considerations                 34
Distributive; Policy                 21
Distributive                          4
Other Considerations; Policy          4
Distributive; Procedural              2
Distributive; Recognition; Policy     1
Other Considerations; Procedural      1
Policy; Recognition                   1
Name: how_dist_fair, dtype: int64


In [45]:
eqm_plot[eqm_plot['meas_scale'] == 'Yes']['eq_fair'].value_counts()/len(eqm_plot[eqm_plot['meas_scale'] == 'Yes'])

Dist. Justice    0.571429
Unclear          0.392857
Env. Justice     0.035714
Name: eq_fair, dtype: float64

## Subsets of review referenced in paper 

In [67]:
# Referencing modal EJ measurement framework
eqm_plot[eqm_plot['eq_fair'] == 'Env. Justice'].groupby(['what_dist', 'whom_dist', 'how_dist_fair']).size()

what_dist               whom_dist     how_dist_fair                    
Adaptation Impact       Individuals   Distributive; Recognition; Policy    1
Adaptation Instruments  Census Areas  Other Considerations                 2
Exposure                Census Areas  Other Considerations                 9
                        Individuals   Other Considerations                 1
                        Large Areas   Other Considerations                 1
                        Regions       Other Considerations                 1
Recovery                Census Areas  Other Considerations                 1
Vulnerability           Census Areas  Other Considerations                 1
                                      Other Considerations; Policy         1
dtype: int64

In [20]:
len(eqm_plot[(eqm_plot['what_dist'] == 'Exposure') &
         (eqm_plot['whom_dist'] == 'Census Areas') &
         (eqm_plot['how_dist_fair'] == 'Other Considerations')])

12