In [None]:
# Import libraries for loading data, analysis and visualization
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from pathlib import Path
from os.path import join

# Load review data

In [None]:
path = Path.cwd()
ROOT_DIR = path.parent.absolute()

In [None]:
# Root for data directory
r_fp = join(ROOT_DIR, 'data')

# All papers
all_papers = pd.read_csv(join(r_fp, 'all_papers.csv'))

# Papers that measure equity
eqm = pd.read_csv(join(r_fp, 'measure_equity.csv'))

In [None]:
# Drop rows & columns that are all na from csv
all_papers = all_papers.dropna(how='all')
all_papers = all_papers.dropna(axis=1, how='all')

eqm = eqm.dropna(how='all')
eqm = eqm.dropna(axis=1, how='all')

# Summarize review

In [None]:
# Get drop columns
# There are columns where "Yes" values indicate 
# papers should not be included in our review sample
drop_no_equity = all_papers['drop_no_equity'] == 'Yes'
drop_no_dist_equity = all_papers['drop_no_dist_equity'] == 'Yes'
drop_no_flood = all_papers['drop_no_flood'] == 'Yes'
drop_no_eng = all_papers['drop_no_eng'] == 'Yes'
drop_equity_use_not_fair = all_papers['drop_equity_use_not_fair'] == 'Yes'
drop_not_quant = all_papers['drop_not_quant'] == 'Yes'

# Keep track in dataframes and print rows in each
# no_acc_df = all_papers.loc[drop_no_acc]
no_eq_df = all_papers.loc[drop_no_equity]
no_dist_eq = all_papers.loc[drop_no_dist_equity]
no_flood = all_papers.loc[drop_no_flood]
no_eng = all_papers.loc[drop_no_eng]
eq_not_fair = all_papers.loc[drop_equity_use_not_fair]

drop_types = ['No Equity', 'No Outcome Equity',
              'No Flood Focus', 'No English', 'No Equity As Fairness']
dfs = [no_eq_df, no_dist_eq, no_flood,
       no_eng, eq_not_fair]

for dt, df in zip(drop_types, dfs):
    print('Dropped ' + str(len(df)) + ' Papers Because ' + dt)
    
# Equity measured/defined filling in na values
all_papers['eq_m'] = np.where(all_papers['equity_measure_flood'] == 'Yes',
                              'Yes', 'No')
all_papers['eq_d'] = np.where(all_papers['equity_defined'] == 'Yes',
                              'Yes', 'No')
    
# Subset df to qualitative/review entries
# And quantitative entries that are the "true" review
qual_df = all_papers.loc[drop_not_quant &
                         (~drop_no_equity) &
                         (~drop_no_dist_equity) &
                         (~drop_no_flood) & (~drop_no_eng) &
                         (~drop_equity_use_not_fair)]
qut_df = all_papers.loc[(all_papers['analysis_type'] == 'Quantitative') &
                        (~drop_no_equity) &
                        (~drop_no_dist_equity) &
                        (~drop_no_flood) & (~drop_no_eng) &
                        (~drop_equity_use_not_fair)]

# Print rows in each
print(str(len(qual_df)) + ' Qualitative/Review/Theory/Comment Papers')
print(str(len(qut_df)) + ' Quantitative Papers')

# Check mutual exclusive coding
mut_ex = (len(all_papers) - len(qut_df) - len(qual_df) - 
          len(no_eq_df) - len(no_dist_eq) - len(no_flood) - len(no_eng) -
          len(eq_not_fair))
print('Mutually Exclusive Coding if 0: ' + str(mut_ex))

## Summarize review sample definitions

In [None]:
# Get boolean flag for quantitative and qualitative assessments
qut_df['emp_s'] = 'Quantitative'
qual_df['emp_s'] = 'Qualitative'

# Subset to qual and quant papers that were coded
sub_papers = pd.concat([qut_df, qual_df], axis=0)

In [None]:
print("Proportion of Papers with Equity Defined")
print(sub_papers.groupby(['eq_d']).size()/len(sub_papers))
print("Number of Papers with Equity Defined")
print(sub_papers.groupby(['eq_d']).size())

print("Propotion of Papers with Different Equity Definition Codes")
print(sub_papers.groupby(['equity_defined_cat']).size()/len(sub_papers))

print("Number of Papers with Different Equity Definition Codes")
print(sub_papers.groupby(['equity_defined_cat']).size())

## Summarize qualitative papers definitions

In [None]:
print("Proportion of Papers with Equity Defined")
print(qual_df.groupby(['eq_d']).size()/len(qual_df))
print("Number of Papers with Equity Defined")
print(qual_df.groupby(['eq_d']).size())

print("Propotion of Papers with Different Equity Definition Scores")
print(qual_df.groupby(['equity_defined_cat']).size()/len(qual_df))

print("Number of Papers with Different Equity Definition Scores")
print(qual_df.groupby(['equity_defined_cat']).size())

## Summarize quantitative papers definitions

In [None]:
print("Proportion of Papers with Equity Defined")
print(qut_df.groupby(['eq_d']).size()/len(qut_df))
print("Number of Papers with Equity Defined")
print(qut_df.groupby(['eq_d']).size())

print("Propotion of Papers with Different Equity Definition Scores")
print(qut_df.groupby(['equity_defined_cat']).size()/len(qut_df))

print("Number of Papers with Different Equity Definition Scores")
print(qut_df.groupby(['equity_defined_cat']).size())

print("Number of Assessments That Measure Equity by Definition")
print((qut_df.groupby(['equity_defined_cat', 'eq_m']).size()))

print("Proportion of Assessments That Measure Equity by Definition")
print((qut_df.groupby(['equity_defined_cat', 'eq_m']).size()/
       qut_df.groupby(['equity_defined_cat']).size()))

# Map equity indicators onto taxonomy

In [None]:
import plotly.graph_objects as go

eqm_plot = eqm[eqm['equity_why'] != 'None']

# Create dimensions
what_dim = go.parcats.Dimension(
    values=eqm_plot['what_dist'],
)

whom_dim = go.parcats.Dimension(
    values=eqm_plot['whom_dist'],
)

other_dim = go.parcats.Dimension(
    values=eqm_plot['other_dist'],
)

why_dim = go.parcats.Dimension(
    values=eqm_plot['equity_why'],
)


# Create parcats trace
# From https://cran.r-project.org/web/packages/khroma/vignettes/tol.html
# use teal from 'vibrant'
# Which will contrast in next plot with #BBBBBB 'grey'
color = '#009988'
fig = go.Figure(data = [go.Parcats(dimensions=[what_dim,
                                               whom_dim,
                                               other_dim,
                                               why_dim],
        line={'color': color},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

In [None]:
# Total number of equity indicators
len(eqm_plot)

In [None]:
# Total number of papers with equity indicators
len(eqm_plot['doi'].unique())

In [None]:
# Seeing how many studies use multiple equity indicators, and how many of them
eqm_plot.groupby(['doi']).size().value_counts()

In [None]:
# Looking at the papers with multiple equity indicators
eqm_plot.groupby(['doi']).size().sort_values(ascending=False).head(4)

In [None]:
# Outcomes #s
eqm_plot.groupby(['what_dist']).size()

In [None]:
# Scales #s
eqm_plot.groupby(['whom_dist']).size()

In [None]:
# Other #s
eqm_plot.groupby(['other_dist']).size()

In [None]:
# Why #s
eqm_plot.groupby(['equity_why']).size()

In [None]:
# Total summary
eqm_plot.groupby(['what_dist',
                  'whom_dist',
                  'other_dist',
                  'equity_why']).size()

In [None]:
# What and why summary
eqm_plot.groupby(['what_dist',
                  'equity_why']).size()

In [None]:
import plotly.graph_objects as go

# Replot with small text for editing outside
# plotly defaults

# Create dimensions
what_dim = go.parcats.Dimension(
    values=eqm_plot['what_dist'],
)

whom_dim = go.parcats.Dimension(
    values=eqm_plot['whom_dist'].str[0],
)

other_dim = go.parcats.Dimension(
    values=eqm_plot['other_dist'].str[0],
)

why_dim = go.parcats.Dimension(
    values=eqm_plot['equity_why'],
)


# Create parcats trace
# From https://cran.r-project.org/web/packages/khroma/vignettes/tol.html
# use teal from 'vibrant'
# Which will contrast in next plot with #BBBBBB 'grey'
color = '#009988'
fig = go.Figure(data = [go.Parcats(dimensions=[what_dim,
                                               whom_dim,
                                               other_dim,
                                               why_dim],
        line={'color': color},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 1, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

# Compare equity indicators with similar measurements

In [None]:
# Explode on other_dist since there are some measurements
# classified with semicolons for other characteristics
eqm_e = eqm.assign(other_dist_a=eqm['other_dist'].str.split(';'))
eqm_e = eqm_e.explode('other_dist_a')
eqm_e['other_dist_a'] = eqm_e['other_dist_a'].str.strip()

In [None]:
len(eqm_e)

In [None]:
# Link eqm_e dataframe with whether a study defined equity or not
# All studies with equity indicators do this
# Not always the case for similar measurements even
# if they say results have equity implications
eq_defs = dict(zip(sub_papers['doi'],
                   sub_papers['equity_defined_cat']))
eqm_e['eq_d'] = eqm_e['doi'].map(eq_defs)

eqm_e['eq_fair'] = 'Unclear'
eqm_e.loc[eqm_e['eq_d'] == 1,
        'eq_fair'] = 'Inequality'
eqm_e.loc[eqm_e['eq_d'] == 2,
        'eq_fair'] = 'Env. Justice'
eqm_e.loc[eqm_e['eq_d'] == 3,
        'eq_fair'] = 'Dist. Justice'

eqm_e['whom_dist_p'] = eqm_e['whom_dist'].copy()
eqm_e.loc[eqm_e['whom_dist'] == 'Zip Codes',
        'whom_dist_p'] = 'Small Census Areas'
eqm_e.loc[eqm_e['whom_dist'] == 'County',
        'whom_dist_p'] = 'Large Areas'
eqm_e.loc[eqm_e['whom_dist'] == 'Regions',
        'whom_dist_p'] = 'Large Areas'

# Color by indicators
import plotly.graph_objects as go


# Create dimensions
def_dim = go.parcats.Dimension(
    values=eqm_e['eq_fair'],
)

what_dim = go.parcats.Dimension(
    values=eqm_e['what_dist'],
)

whom_dim = go.parcats.Dimension(
    values=eqm_e['whom_dist_p'],
)

other_dim = go.parcats.Dimension(
    values=eqm_e['other_dist_a'],
)

# Create parcats trace
color = np.where(eqm_e['equity_why'] == 'None',
                  0, 1)
colorscale = [[0, '#BBBBBB'], [1, '#009988']]

fig = go.Figure(data = [go.Parcats(dimensions=[def_dim,
                                               what_dim,
                                               whom_dim,
                                               other_dim],
        line={'color': color,
              'colorscale': colorscale},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()

In [None]:
eqm_e.groupby(['eq_fair', 'what_dist',
               'whom_dist_p', 'other_dist_a']).size()

In [None]:
eqm_e.groupby(['eq_fair', 'equity_why']).size()

In [None]:
len(eqm_e[eqm_e['equity_why'] == 'None']['doi'].unique())

In [None]:
len(eqm_e[eqm_e['equity_why'] == 'None'])

In [None]:
# Redo for touch up

import plotly.graph_objects as go


# Create dimensions
def_dim = go.parcats.Dimension(
    values=eqm_e['eq_fair'],
)

what_dim = go.parcats.Dimension(
    values=eqm_e['what_dist'].str[-3:],
)

whom_dim = go.parcats.Dimension(
    values=eqm_e['whom_dist_p'].str[0],
)

other_dim = go.parcats.Dimension(
    values=eqm_e['other_dist_a'],
)

# Create parcats trace
color = np.where(eqm_e['equity_why'] == 'None',
                  0, 1)
colorscale = [[0, '#BBBBBB'], [1, '#009988']]

fig = go.Figure(data = [go.Parcats(dimensions=[def_dim,
                                               what_dim,
                                               whom_dim,
                                               other_dim],
        line={'color': color,
              'colorscale': colorscale},
        hoveron='color', hoverinfo='count',
        tickfont={'size': 1, 'family': 'Times'},
        arrangement='freeform')]
              )
fig.update_layout(
    autosize=True)

fig.show()