In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import dash
from jupyter_dash import JupyterDash
#import dash_core_components as dcc
from dash import dcc
from dash import html
#import dash_core_components as dcc
#import dash_html_components as html
from dash.dependencies import Input, Output
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
%%capture
gss = pd.read_csv("https://github.com/jkropko/DS-6001/raw/master/localdata/gss2018.csv",
                 encoding='cp1252', na_values=['IAP','IAP,DK,NA,uncodeable', 'NOT SURE',
                                               'DK', 'IAP, DK, NA, uncodeable', '.a', "CAN'T CHOOSE"])
mycols = ['id', 'wtss', 'sex', 'educ', 'region', 'age', 'coninc',
          'prestg10', 'mapres10', 'papres10', 'sei10', 'satjob',
          'fechld', 'fefam', 'fepol', 'fepresch', 'meovrwrk'] 
gss_clean = gss[mycols]
gss_clean = gss_clean.rename({'wtss':'weight', 
                              'educ':'education', 
                              'coninc':'income', 
                              'prestg10':'job_prestige',
                              'mapres10':'mother_job_prestige', 
                              'papres10':'father_job_prestige', 
                              'sei10':'socioeconomic_index', 
                              'fechld':'relationship', 
                              'fefam':'male_breadwinner', 
                              'fehire':'hire_women', 
                              'fejobaff':'preference_hire_women', 
                              'fepol':'men_bettersuited', 
                              'fepresch':'child_suffer',
                              'meovrwrk':'men_overwork'},axis=1)
gss_clean.age = gss_clean.age.replace({'89 or older':'89'})
gss_clean.age = gss_clean.age.astype('float')

# markdown text
markdown_text = '''
Examining the gender pay gap in the United States attempts to discern the difference in pay between men and women that is a direct effect of being a women, rather than the effect of job selection, 
schedule selection, or other contributions. Meara, K., Pastore, F. & Webster, A.'s ['The gender pay gap in the USA: a matching study'](https://doi.org/10.1007/s00148-019-00743-8) from 2020's Journal of Population Economics 
attempts to use a "matching estimator" to control for the effects of job selection, parenthood, schedule selection, and other contributions. 
Their work finds that there are interaction effects between gender and other variables such as part-time work. England, P., Levine, A., & Mishel, E.'s 2020 article ['Progress toward gender equality in the United States has slowed or stalled'](https://www.pnas.org/doi/epdf/10.1073/pnas.1918891117) further describes how, 
even with the reversed gap in higher education attainment by women, the gender wage gap has not closed, and progress has stalled since 2018.


The data used in this dashboard is from the [General Social Survey](https://gssdataexplorer.norc.org/variables/) (GSS), a nationally representative survey of adults from the National Opinion Research Center (NORC) at the University of Chicago.
For upto 80 years, questions have been asked of representative samples of the US population. Included in the GSS are topics related to psychological well-being, social mobility, and, survery questions related to gender, education, income, and job prestige: the topics of this dashboard.
'''

gss_display = gss_clean.groupby('sex').agg({'income':'mean',
                                        'job_prestige':'mean',
                                        'socioeconomic_index':'mean',
                                        'education':'mean'})
gss_display = gss_display.rename({'income':'Income',
                                   'job_prestige':'Job Prestige',
                                   'socioeconomic_index':'Socioeconomic Index',
                                   'education':'Years of Education'}, axis=1)
gss_display = round(gss_display, 2)
gss_display = gss_display.reset_index().rename({'sex':'Sex'}, axis=1)
# Income column is a currency column, so we need to format it as such
gss_display['Income'] = gss_display['Income'].map('${:,.2f}'.format)

table_p2 = ff.create_table(gss_display)

m_breadwinner = pd.crosstab(gss_clean.sex, gss_clean.male_breadwinner).reset_index()
m_breadwinner = pd.melt(m_breadwinner, id_vars = 'sex', value_vars = ['agree', 'disagree', 'strongly agree','strongly disagree'])
m_breadwinner = m_breadwinner.rename({'value':'count'}, axis=1)

# in the m_breadwinner dataframe make the male_breadwinner column into a categorical column with the following order: strongly disagree, disagree, agree, strongly agree
m_breadwinner['male_breadwinner'] = pd.Categorical(m_breadwinner['male_breadwinner'], 
                      categories=["strongly disagree","disagree","agree","strongly agree"],
                      ordered=True)

fig_p3 = px.bar(m_breadwinner, x="male_breadwinner", y="count",
             color='sex', barmode='group',
             labels={'male_breadwinner':'It is much better for everyone involved if<br /> the man is the achiever outside the home and the woman takes care of the home and family.', 'count':'Count'},
             width=900, height=300, )
# set the font size of the x and y axis labels
fig_p3.update_layout(xaxis_title_font_size=12, yaxis_title_font_size=10)
# order the categories in the x-axis
fig_p3.update_xaxes(categoryorder='array', categoryarray=['strongly disagree','disagree','agree','strongly agree'])

fig_p4 = px.scatter(gss_clean, x='job_prestige', y='income', 
                 color='sex', trendline='lowess', 
                 hover_data=['education', 'socioeconomic_index'],
                 labels={'job_prestige':'Job Prestige', 'income':'Income'},
                 width=400, height=400)

box_income = px.box(gss_clean, x='sex', y='income',
                    labels={'sex':'', 'income':'Income'}, color='sex', width=400, height=400)
box_income.update_layout(showlegend=False)

box_jobprestige = px.box(gss_clean, x='sex', y='job_prestige',
                         labels={'sex':'', 'job_prestige':'Job Prestige'}, color='sex', width=400, height=400)
box_jobprestige.update_layout(showlegend=False)

df_p6 = gss_clean[['income','sex','job_prestige']]
# break job prestige into 6 categories  with the cuts at 16, 27, 38, 49, 60, 71, 82
#df_p6['jp_cat'] = pd.cut(df_p6['job_prestige'], bins=[15, 27, 38, 49, 60, 71, 82], labels=['1','2','3','4','5','6'])
df_p6['jp_cat'] = pd.cut(df_p6['job_prestige'], 6, labels=['1','2','3','4','5','6'])
# drop rows with missing values
df_p6 = df_p6.dropna()

# make jp_cat columns categorical and ordered categorical
df_p6['jp_cat'] = pd.Categorical(df_p6['jp_cat'], ordered=True, categories=['1','2','3','4','5','6'])

# sort df_p6 by jp_cat
df_p6 = df_p6.sort_values(by='jp_cat')

fig_box = px.box(df_p6, x='sex', y='income', color='sex', 
             facet_col='jp_cat', facet_col_wrap=2,
            labels={'sex':'', 'income':'Income', 'jp_cat':'Job Prestige Level'}, 
            width=1000, height=800)

fig_box.update(layout=dict(title=dict(x=0.5)))

the_title = "The Gender Wage Gap in the United States: A Dashboard"

# create the dashboard
#app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(
    [
        html.H1(the_title),
        
        dcc.Markdown(children = markdown_text),

        html.H4("Income, occupational prestige, socioeconomic index, and years of education for men and for women"),
        dcc.Graph(figure=table_p2),


        html.H4("Responses to men as househod earners and women as homemakers"),     
        dcc.Graph(figure=fig_p3),
        
        
        html.Div([        
                html.H5("Income vs. job prestige for men and women"),
                html.P(
                    children=(
                        "The income for women is lower than for men at every level of job prestige."
                    ),
                    className="header-description",
                ),
                dcc.Graph(figure=fig_p4),
                
                ], style = {'width':'32%', 'float':'left', 'backgroundColor':'#f2f2f2'},
                ),
        html.Div([
        
            html.H5("Income distribution for men and women"),
            html.P(
                    children=(
                        "Income distributions are similar; although men's 75th quantile is greater."
                    ),
                    className="header-description",
                ),
            dcc.Graph(figure=box_income)], style = {'width':'32%', 'float':'left', 'backgroundColor':'#f2f2f2'}),
        html.Div([
        
            html.H5("Job prestige distribution for men and women"),
            html.P(
                    children=(
                        "Job prestige distributions for men and women are similar."
                    ),
                    className="header-description",
                ),
            
            dcc.Graph(figure=box_jobprestige)
        ], style = {'width':'32%', 'float':'left', 'backgroundColor':'#f2f2f2'}),
        html.Div([
            html.H4("Income distribution for men and women by job prestige level"),
            dcc.Graph(figure=fig_box),
        ], style = {'width':'45%', 'float':'left', 'backgroundColor':'#f2f2f2',}),

        
         html.Div([
            html.P(
                    children=(
                        "In the 1940s, my grandmother, preparing to become a teacher by taking chemistry at Manchester College."
                    ),
                    className="header-description",
                ),
            html.Img(src="https://scontent.forf1-4.fna.fbcdn.net/v/t1.6435-9/52080008_10215557580990058_8736795737351782400_n.jpg?_nc_cat=107&ccb=1-7&_nc_sid=730e14&_nc_ohc=CiUMuR_ySdQAX87Iix2&_nc_ht=scontent.forf1-4.fna&oh=00_AfD0WnmnUHmzOdC27I3pHtxIgLjS9_cDXIkLHEau_Yd2TA&oe=646E71AA", height="400")
         ], style = {'width':'32%', 'float':'right', 'backgroundColor':'#f2f2f2',}),
    
    ],style = {'backgroundColor':'#f2f2f2', 'padding':'5px', 'width':'1600px'}
)


if __name__ == '__main__':
    app.run_server(debug=True)