# Plotly Interactive Graphics for "The Problem with Blaming Men for Not Working"

-------

*Brian Dew*<br>
*November 9, 2017*<br>
*Center for Economic and Policy Research*

-----

*Goal*: Calculate employment rates for less educated men and women and compare trends in these rates since 2000.

## 1. Prepare data for analysis

Data source is CEPR current population survey outgoing rotation group (ORG) uniform extracts. Details on this datasource can be found here: http://ceprdata.org/cps-uniform-data-extracts/cps-outgoing-rotation-group/

### 1.1. Import libraries

In [1]:
import pandas as pd
import numpy as np
import itertools

#data_loc = 'C:/Working/econ_data/micro/data'         # Windows
data_loc = '/home/domestic-ra/Working/CPS_ORG/Data/' # Linux

### 1.2. Set Variables

In [2]:
# Identify which columns to keep from the full CPS
cols = ['year', 'female', 'age', 'educ', 'empl', 'orgwgt']
# Set remaining variables
gender = [('Male', 0), ('Female', 1)]
years = range(1990, 2018) # Year range, ending year add one. 
ages = [(25, 34), (35, 44), (45, 54), (25, 54)]   # Age groups
educs = [['LTHS', 'HS'], ['College', 'Advanced']] # Education

### 1.3. Filter data and calculate employment rate

In [3]:
data = pd.DataFrame()
for y in years:
    yr = pd.to_datetime('{}-01-01'.format(y)) 
    filename = '{}/cepr_org_{}.dta'.format(data_loc, y) 
    df = pd.read_stata(filename, columns=cols).dropna()
    for args in itertools.product(gender, ages, educs):
        # Generate subgroup description column name (cname)
        age = '-'.join(str(arg) for arg in args[1])
        col_name = '{}: {}: {}'.format(args[0][0], age, ' or '.join(args[2]))
        # Filter CPS to subgroup and calculate employment rate
        dft = df[(df['age'] >= args[1][0]) & (df['age'] <= args[1][1]) &
                (df['female'] == args[0][1]) & (df['educ'].isin(args[2]))]    
        epop = np.average(dft['empl'].astype(float), weights=dft['orgwgt'])
        data.set_value(yr, col_name, round(epop * 100, 1)) 

### 1.4 Convert to plot-friendly components

In [4]:
# Hover text: Example: Men 25-34 <br>High School Diploma <br>or Less: 1990: 86.5
for group, values in data.iteritems():
    if 'HS' in group.split(': ')[2]:
        gender = group.split(': ')[0]
        ages = group.split(': ')[1]
        educ = '<br>High School Diploma <br>or Less: '
        for idxno, idxval in enumerate(data[group].index):
            year = str(idxval)[0:4]
            if '2017' in year:
                year = '2017 (preliminary)'
            data_val = values[idxno]
            hover_text = '{} {}{}{}: {}'.format(gender, ages, educ, year, data_val)
            data.set_value(idxval, '{}_text'.format(group), hover_text)

In [5]:
data['Male: 35-44: LTHS or HS']

1990-01-01    86.2
1991-01-01    84.2
1992-01-01    82.7
1993-01-01    83.2
1994-01-01    83.0
1995-01-01    83.1
1996-01-01    83.9
1997-01-01    84.7
1998-01-01    85.3
1999-01-01    86.2
2000-01-01    85.5
2001-01-01    84.4
2002-01-01    82.9
2003-01-01    82.8
2004-01-01    83.6
2005-01-01    84.0
2006-01-01    84.6
2007-01-01    84.5
2008-01-01    82.8
2009-01-01    77.7
2010-01-01    77.3
2011-01-01    78.2
2012-01-01    78.3
2013-01-01    78.8
2014-01-01    80.7
2015-01-01    80.6
2016-01-01    81.5
2017-01-01    82.1
Name: Male: 35-44: LTHS or HS, dtype: float64

### 1.5 Read recession dates into memory for recession bars

In [6]:
rec_dates = pd.read_csv('{}recession_dates.csv'.format(data_loc), index_col='Date')

## 2. Plot the results

Using plotly, generate interactive graphics for each age subgroup (25-34, 35-44, 45-54).

### 2.1 Plotly library and credentials

In [7]:
import config2 # Contains plotly id and api key
import plotly.plotly as py
from plotly.graph_objs import *
py.sign_in(config2.id, config2.key)

# CEPR colors for figures
dark_blue = 'rgb(37, 64, 97)'
light_blue = 'rgb(79, 129, 189)'

### 2.2 Generate plot for age group 25-34

In [9]:
men_group = 'Male: 25-34: LTHS or HS'
fem_group = 'Female: 25-34: LTHS or HS'

men = {
  "x": data.index, 
  "y": data[men_group].values, 
  "hoverinfo": "text", 
  "line": {
    "color": dark_blue, 
    "width": 4
  }, 
  "mode": "lines", 
  "name": "Men 25-34 <br>High School Diploma <br>or Less", 
  "text": data['{}_text'.format(men_group)].values,
  "type": "scatter", 
  "visible": True, 
}
women = {
  "x": data.index, 
  "y": data[fem_group].values, 
  "hoverinfo": "text", 
  "line": {
    "color": light_blue, 
    "width": 4
  }, 
  "mode": "lines", 
  "name": "Women 25-34  <br>High School Diploma <br>or Less", 
  "text": data['{}_text'.format(fem_group)].values,
  "type": "scatter", 
  "visible": True, 
}
rec_bars = {
  "x": rec_dates.index,
  "y": rec_dates.values,
  "fill": "tozeroy", 
  "fillcolor": "rgba(211, 211, 211, 0.5)", 
  "hoverinfo": "none", 
  "mode": "none", 
  "name": "Recession_Bar", 
  "type": "scatter", 
}
men_val = {
  "x": [data[men_group].idxmax(), data[men_group].idxmin(), data.index[-1]], 
  "y": [data[men_group].max(), data[men_group].min(), data[men_group][-1]], 
  "hoverinfo": "none", 
  "marker": {
    "color": dark_blue, 
    "line": {"width": 0}, 
    "size": 10
  }, 
  "mode": "markers+text", 
  "name": "High", 
  "text": [data[men_group].max(), data[men_group].min(), round(data[men_group][-1], 1)], 
  "textfont": {"color": "rgb(127, 127, 127)"}, 
  "textposition": ["top center", "bottom center", "top center"], 
  "type": "scatter", 
}
fem_val = {
  "x": [data[fem_group].idxmax(), data[fem_group].idxmin(), data.index[-1]], 
  "y": [data[fem_group].max(), data[fem_group].min(), data[fem_group][-1]], 
  "hoverinfo": "none", 
  "marker": {
    "color": light_blue, 
    "size": 10
  }, 
  "mode": "markers+text", 
  "name": "Low", 
  "text": [data[fem_group].max(), data[fem_group].min(), round(data[fem_group][-1], 1)], 
  "textfont": {"color": "rgb(127, 127, 127)"}, 
  "textposition": ["top center", "bottom center", "top center"], 
  "type": "scatter", 
}
data_py = Data([rec_bars, men, women, men_val, fem_val])
layout = {
  "annotations": [
    {
      "x": "1993-01-01", 
      "y": 87, 
      "font": {
        "color": "rgb(127, 127, 127)", 
        "size": 22
      }, 
      "showarrow": False, 
      "text": "Men", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": "1993-03-01", 
      "y": 63.83, 
      "font": {
        "color": "rgb(127, 127, 127)", 
        "size": 22
      }, 
      "showarrow": False, 
      "text": "Women", 
      "xref": "x", 
      "yref": "y"
    },  
    {
      "x": -0.083, 
      "y": 1.22, 
      "align": "left",
      "font": {
        "color": "rgb(68, 68, 68)", 
        "size": 18
      }, 
      "showarrow": False, 
      "text": "Employment Rate Trends<br>Age 25–34, High School Diploma or Less", 
      "xref": "paper", 
      "yref": "paper"
    },
    {
      "x": -0.066, 
      "y": -0.202, 
      "align": "left",
      "font": {"color": "rgb(127, 127, 127)"}, 
      "showarrow": False, 
      "text": "Source and notes: CEPR Current Population Survey Outgoing Rotation&lt;br&gt;Group (CPS ORG). Shaded bars represent recessions.", 
      "xref": "paper", 
      "yref": "paper"
    }
  ], 
  "autosize": False, 
  "dragmode": "zoom", 
  "height": 380, 
  "hovermode": "closest", 
  "margin": {
    "r": 0, 
    "t": 60, 
    "b": 50, 
    "l": 40, 
    "pad": 0
  }, 
  "paper_bgcolor": "rgb(255, 252, 245)", 
  "plot_bgcolor": "rgb(255, 252, 245)", 
  "showlegend": False, 
  "title": "", 
  "width": 480, 
  "xaxis": {
    "autorange": False, 
    "fixedrange": True, 
    "range": ["1989-01-01", "2019-03-01"], 
    "showgrid": False, 
    "showspikes": False, 
    "tickfont": {"color": "rgb(127, 127, 127)"}, 
    "title": "", 
    "type": "date"
  }, 
  "yaxis": {
    "autorange": False, 
    "range": [50, 91], 
    "showspikes": False, 
    "tickfont": {"color": "rgb(127, 127, 127)"}, 
    "title": "Percentage Employed", 
    "titlefont": {"color": "rgb(127, 127, 127)"}, 
    "type": "linear"
  }
}
fig = Figure(data=data_py, layout=layout)
plot_url = py.plot(fig, filename="prob_men2")

### 2.3 Generate plot for age group 35-44

In [10]:
men_group = 'Male: 35-44: LTHS or HS'
fem_group = 'Female: 35-44: LTHS or HS'

men = {
  "x": data.index, 
  "y": data[men_group].values, 
  "hoverinfo": "text", 
  "line": {
    "color": dark_blue, 
    "width": 4
  }, 
  "mode": "lines", 
  "name": "Men 35-44 <br>High School Diploma <br>or Less", 
  "text": data['{}_text'.format(men_group)].values,
  "type": "scatter", 
  "visible": True, 
}
women = {
  "x": data.index, 
  "y": data[fem_group].values, 
  "hoverinfo": "text", 
  "line": {
    "color": light_blue, 
    "width": 4
  }, 
  "mode": "lines", 
  "name": "Women 35-44  <br>High School Diploma <br>or Less", 
  "text": data['{}_text'.format(fem_group)].values,
  "type": "scatter", 
  "visible": True, 
}
rec_bars = {
  "x": rec_dates.index,
  "y": rec_dates.values,
  "fill": "tozeroy", 
  "fillcolor": "rgba(211, 211, 211, 0.5)", 
  "hoverinfo": "none", 
  "mode": "none", 
  "name": "Recession_Bar", 
  "type": "scatter", 
}
men_val = {
  "x": [data[men_group].idxmax(), data[men_group].idxmin(), data.index[-1]], 
  "y": [data[men_group].max(), data[men_group].min(), data[men_group][-1]], 
  "hoverinfo": "none", 
  "marker": {
    "color": dark_blue, 
    "line": {"width": 0}, 
    "size": 10
  }, 
  "mode": "markers+text", 
  "name": "High", 
  "text": [data[men_group].max(), data[men_group].min(), round(data[men_group][-1], 1)], 
  "textfont": {"color": "rgb(127, 127, 127)"}, 
  "textposition": ["top center", "bottom center", "top center"], 
  "type": "scatter", 
}
fem_val = {
  "x": [data[fem_group].idxmax(), data[fem_group].idxmin(), data.index[-1]], 
  "y": [data[fem_group].max(), data[fem_group].min(), data[fem_group][-1]], 
  "hoverinfo": "none", 
  "marker": {
    "color": light_blue, 
    "size": 10
  }, 
  "mode": "markers+text", 
  "name": "Low", 
  "text": [data[fem_group].max(), data[fem_group].min(), round(data[fem_group][-1], 1)], 
  "textfont": {"color": "rgb(127, 127, 127)"}, 
  "textposition": ["top center", "bottom center", "top center"], 
  "type": "scatter", 
}
data_py = Data([rec_bars, men, women, men_val, fem_val])
layout = {
  "annotations": [
    {
      "x": "1993-01-01", 
      "y": 87, 
      "font": {
        "color": "rgb(127, 127, 127)", 
        "size": 22
      }, 
      "showarrow": False, 
      "text": "Men", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": "1993-03-01", 
      "y": 63.83, 
      "font": {
        "color": "rgb(127, 127, 127)", 
        "size": 22
      }, 
      "showarrow": False, 
      "text": "Women", 
      "xref": "x", 
      "yref": "y"
    },  
    {
      "x": -0.083, 
      "y": 1.22, 
      "align": "left",
      "font": {
        "color": "rgb(68, 68, 68)", 
        "size": 18
      }, 
      "showarrow": False, 
      "text": "Employment Rate Trends<br>Age 35–44, High School Diploma or Less", 
      "xref": "paper", 
      "yref": "paper"
    },
    {
      "x": -0.066, 
      "y": -0.202, 
      "align": "left",
      "font": {"color": "rgb(127, 127, 127)"}, 
      "showarrow": False, 
      "text": "Source and notes: CEPR Current Population Survey Outgoing Rotation&lt;br&gt;Group (CPS ORG). Shaded bars represent recessions.", 
      "xref": "paper", 
      "yref": "paper"
    }
  ], 
   "autosize": False, 
  "dragmode": "zoom", 
  "height": 380, 
  "hovermode": "closest", 
  "margin": {
    "r": 0, 
    "t": 60, 
    "b": 50, 
    "l": 40, 
    "pad": 0
  }, 
  "paper_bgcolor": "rgb(255, 252, 245)", 
  "plot_bgcolor": "rgb(255, 252, 245)", 
  "showlegend": False, 
  "title": "", 
  "width": 480, 
  "xaxis": {
    "autorange": False, 
    "fixedrange": True, 
    "range": ["1989-01-01", "2019-03-01"], 
    "showgrid": False, 
    "showspikes": False, 
    "tickfont": {"color": "rgb(127, 127, 127)"}, 
    "title": "", 
    "type": "date"
  }, 
  "yaxis": {
    "autorange": False, 
    "range": [50, 91], 
    "showspikes": False, 
    "tickfont": {"color": "rgb(127, 127, 127)"}, 
    "title": "Percentage Employed", 
    "titlefont": {"color": "rgb(127, 127, 127)"}, 
    "type": "linear"
  }
}
fig = Figure(data=data_py, layout=layout)
plot_url = py.plot(fig, filename="prob_men3")

### 2.4 Generate plot for age group 45-54

In [11]:
men_group = 'Male: 45-54: LTHS or HS'
fem_group = 'Female: 45-54: LTHS or HS'

men = {
  "x": data.index, 
  "y": data[men_group].values, 
  "hoverinfo": "text", 
  "line": {
    "color": dark_blue, 
    "width": 4
  }, 
  "mode": "lines", 
  "name": "Men 45-54 <br>High School Diploma <br>or Less", 
  "text": data['{}_text'.format(men_group)].values,
  "type": "scatter", 
  "visible": True, 
}
women = {
  "x": data.index, 
  "y": data[fem_group].values, 
  "hoverinfo": "text", 
  "line": {
    "color": light_blue, 
    "width": 4
  }, 
  "mode": "lines", 
  "name": "Women 45-54  <br>High School Diploma <br>or Less", 
  "text": data['{}_text'.format(fem_group)].values,
  "type": "scatter", 
  "visible": True, 
}
rec_bars = {
  "x": rec_dates.index,
  "y": rec_dates.values,
  "fill": "tozeroy", 
  "fillcolor": "rgba(211, 211, 211, 0.5)", 
  "hoverinfo": "none", 
  "mode": "none", 
  "name": "Recession_Bar", 
  "type": "scatter", 
}
men_val = {
  "x": [data[men_group].idxmax(), data[men_group].idxmin(), data.index[-1]], 
  "y": [data[men_group].max(), data[men_group].min(), data[men_group][-1]], 
  "hoverinfo": "none", 
  "marker": {
    "color": dark_blue, 
    "line": {"width": 0}, 
    "size": 10
  }, 
  "mode": "markers+text", 
  "name": "High", 
  "text": [data[men_group].max(), data[men_group].min(), round(data[men_group][-1], 1)], 
  "textfont": {"color": "rgb(127, 127, 127)"}, 
  "textposition": ["top center", "bottom center", "top center"], 
  "type": "scatter", 
}
fem_val = {
  "x": [data[fem_group].idxmax(), data[fem_group].idxmin(), data.index[-1]], 
  "y": [data[fem_group].max(), data[fem_group].min(), data[fem_group][-1]], 
  "hoverinfo": "none", 
  "marker": {
    "color": light_blue, 
    "size": 10
  }, 
  "mode": "markers+text", 
  "name": "Low", 
  "text": [data[fem_group].max(), data[fem_group].min(), round(data[fem_group][-1], 1)], 
  "textfont": {"color": "rgb(127, 127, 127)"}, 
  "textposition": ["top center", "bottom center", "top center"], 
  "type": "scatter", 
}
data_py = Data([rec_bars, men, women, men_val, fem_val])
layout = {
  "annotations": [
    {
      "x": "1993-01-01", 
      "y": 83, 
      "font": {
        "color": "rgb(127, 127, 127)", 
        "size": 22
      }, 
      "showarrow": False, 
      "text": "Men", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": "1993-03-01", 
      "y": 67, 
      "font": {
        "color": "rgb(127, 127, 127)", 
        "size": 22
      }, 
      "showarrow": False, 
      "text": "Women", 
      "xref": "x", 
      "yref": "y"
    },  
    {
      "x": -0.083, 
      "y": 1.22, 
      "align": "left",
      "font": {
        "color": "rgb(68, 68, 68)", 
        "size": 18
      }, 
      "showarrow": False, 
      "text": "Employment Rate Trends<br>Age 45–54, High School Diploma or Less", 
      "xref": "paper", 
      "yref": "paper"
    },
    {
      "x": -0.066, 
      "y": -0.202, 
      "align": "left",
      "font": {"color": "rgb(127, 127, 127)"}, 
      "showarrow": False, 
      "text": "Source and notes: CEPR Current Population Survey Outgoing Rotation&lt;br&gt;Group (CPS ORG). Shaded bars represent recessions.", 
      "xref": "paper", 
      "yref": "paper"
    }
  ], 
   "autosize": False, 
  "dragmode": "zoom", 
  "height": 380, 
  "hovermode": "closest", 
  "margin": {
    "r": 0, 
    "t": 60, 
    "b": 50, 
    "l": 40, 
    "pad": 0
  }, 
  "paper_bgcolor": "rgb(255, 252, 245)", 
  "plot_bgcolor": "rgb(255, 252, 245)", 
  "showlegend": False, 
  "title": "", 
  "width": 480, 
  "xaxis": {
    "autorange": False, 
    "fixedrange": True, 
    "range": ["1989-01-01", "2019-03-01"], 
    "showgrid": False, 
    "showspikes": False, 
    "tickfont": {"color": "rgb(127, 127, 127)"}, 
    "title": "", 
    "type": "date"
  }, 
  "yaxis": {
    "autorange": False, 
    "range": [50, 91], 
    "showspikes": False, 
    "tickfont": {"color": "rgb(127, 127, 127)"}, 
    "title": "Percentage Employed", 
    "titlefont": {"color": "rgb(127, 127, 127)"}, 
    "type": "linear"
  }
}
fig = Figure(data=data_py, layout=layout)
plot_url = py.plot(fig, filename="prob_men4")

### Generate Figure 4

In [12]:
trace1 = {
  "x": ["Age 25-34", "Age 35-44", "Age 45-54"], 
  "y": ["-8.2", "-3.4", "-3.3"], 
  "hoverinfo": "y+name", 
  "marker": {
    "color": "rgb(37, 64, 97)", 
    "line": {"color": "rgb(37, 64, 97)"}
  }, 
  "name": "Men", 
  "type": "bar", 
  "uid": "d59cb9", 
  "xsrc": "brianwdew:21:78a5f6", 
  "ysrc": "brianwdew:21:30581b"
}
trace2 = {
  "x": ["Age 25-34", "Age 35-44", "Age 45-54"], 
  "y": ["-6.9", "-9.7", "-6.7"], 
  "hoverinfo": "y+name", 
  "marker": {"color": "rgb(79, 129, 189)"}, 
  "name": "Women", 
  "type": "bar", 
  "uid": "efb5ad", 
  "xsrc": "brianwdew:21:78a5f6", 
  "ysrc": "brianwdew:21:c9f951"
}
data = Data([trace1, trace2])
layout = {
  "annotations": [
    {
      "x": -0.413157894737, 
      "y": -6.31364152697, 
      "font": {"size": 14}, 
      "showarrow": False, 
      "text": "Men", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": 0.27, 
      "y": 0.4386, 
      "font": {"size": 14}, 
      "showarrow": False, 
      "text": "Women", 
      "xref": "paper", 
      "yref": "paper"
    }, 
    {
      "x": -0.179783367258, 
      "y": -9.0028708134, 
      "showarrow": False, 
      "text": "-8.2", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": 0.126017737714, 
      "y": -7.65197755671, 
      "showarrow": False, 
      "text": "-6.9", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": 0.83418871765, 
      "y": -4.04210526316, 
      "showarrow": False, 
      "text": "-3.4", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": 1.12389502762, 
      "y": -10.4456300988, 
      "showarrow": False, 
      "text": "-9.7", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": 1.82401861006, 
      "y": -3.81062943387, 
      "showarrow": False, 
      "text": "-3.3", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": 2.12981971503, 
      "y": -7.34796048329, 
      "showarrow": False, 
      "text": "-6.7", 
      "xref": "x", 
      "yref": "y"
    }, 
    {
      "x": -0.0263157894737, 
      "y": 1.32386363636, 
      "align": "left",
      "font": {
        "color": "rgb(68, 68, 68)", 
        "size": 14
      }, 
      "showarrow": False, 
      "text": "Employment Rate Change Since 2000<br>High School Diploma or Less", 
      "xref": "paper", 
      "yref": "paper"
    }, 
    {
      "x": -0.0236842105263, 
      "y": -0.277009222661, 
      "align": "left",
      "showarrow": False, 
      "text": "Source and notes: CEPR Current Population Survey Outgoing Rotation&lt;br&gt;Group (CPS ORG). Change from 2000 to preliminary 2017 annual&lt;br&gt;average of January to September values.", 
      "xanchor": "left", 
      "xref": "paper", 
      "yref": "paper"
    }
  ], 
  "autosize": False, 
  "bargap": 0.4, 
  "bargroupgap": 0.2, 
  "barmode": "group", 
  "height": 350, 
  "margin": {
    "r": 0, 
    "t": 70, 
    "b": 60, 
    "l": 20
  }, 
  "paper_bgcolor": "rgb(255, 252, 245)", 
  "plot_bgcolor": "rgb(255, 252, 245)", 
  "showlegend": False, 
  "title": "", 
  "width": 400, 
  "xaxis": {
    "anchor": "y", 
    "autorange": True, 
    "domain": [0, 1], 
    "position": 1, 
    "range": [-0.558011049724, 2.5], 
    "showspikes": False, 
    "showticklabels": True, 
    "side": "top", 
    "type": "category"
  }, 
  "yaxis": {
    "autorange": True, 
    "domain": [0, 1], 
    "range": [-11.0217679699, 0], 
    "showgrid": False, 
    "showspikes": False, 
    "showticklabels": False, 
    "title": "", 
    "type": "linear", 
    "zeroline": True, 
    "zerolinecolor": "rgb(224, 224, 224)"
  }
}
fig = Figure(data=data, layout=layout)
plot_url = py.plot(fig, filename="Plot 22")