In [144]:
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import plotly.express as px

import pandas as pd
import numpy as np
import pyreadstat
import re



# load data
fpath = 'data/ATP W42.sav'

df, meta = pyreadstat.read_sav(fpath)

df_copy = pyreadstat.pyreadstat.set_value_labels(df, meta)

In [18]:
""" 
data cleaning, transformation

"""

# regular expression that matches with every character up until the first whitespace character.
pattern = re.compile(r'.+?\.\s?')

label_dict = {}

for key, value in meta.column_names_to_labels.items():
    label_dict[key] = re.sub(pattern=pattern, string=value, repl=' ')


# helper function used to sort survey items according to thematic subject matter code (e.g. starts with 'RQ')
def list_helper(theme_code):
    return [i for i in df.columns if theme_code in i]


past_future = ['PAST_W42', 'FUTURE_W42', 'SC1_W42']
policy = list_helper('POLICY')
confidence = list_helper('CONF')
rq_form1 = list_helper('RQ')
pw_form2 = list_helper('PQ')
scm4 = list_helper('SCM4')
scm5 = list_helper('SCM5')
q = [i for i in df.columns if re.search("^Q[0-9]", i)] #regex to grab Q6, Q7, etc.
pop = list_helper('POP')
knowledge = list_helper('KNOW')
demographics = list_helper('F_')
weight = ['WEIGHT_W42']


# dictionary of column names to be used with the dcc.Dropdown() property 'options'
policy_dropdown = [{'label': v, 'value': k} for k,v in label_dict.items() if k in policy]

demo_dropdown = [{'label': v, 'value': k} for k,v in label_dict.items() if k in demographics]


SyntaxError: invalid syntax (<ipython-input-141-fb0be0b6c1a8>, line 1)

In [103]:
# function generates a static bar chart to be used as visuals
def frequency_chart(array1, array2):
    temp_df = pd.crosstab(df_copy[array1],
                       df_copy[array2],
                       df_copy.WEIGHT_W42, aggfunc = sum, dropna=True,
                       normalize='index').\
                       loc[meta.variable_value_labels[array1].values()].\
                       loc[:, meta.variable_value_labels[array2].values()]*100
    
    fig = px.bar(temp_df, x=temp_df.index, y=temp_df.columns )

    fig.update_layout(
        title={
            'text': label_dict[array2],
            'y':1,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font':dict(
                size=12)},
        
        xaxis_title="Frequency (%)",
        yaxis_title=None,
        legend=dict(
            title=label_dict[array2],
            yanchor="bottom",
            y= .9,
            xanchor="center",
            x=0)
        )
    
    
    return fig

#### Want to use the following cell for something...

In [171]:
# for k, v in meta.variable_value_labels.items():
#     print(f'''{meta.column_names_to_labels[k]}:
#     {v}
    
#     ''')
demo_dropdown

[{'label': 'Metropolitan area indicator coded from FIPS', 'value': 'F_METRO'},
 {'label': 'Census region based on self-reported zipcode',
  'value': 'F_CREGION'},
 {'label': 'Age category', 'value': 'F_AGECAT'},
 {'label': 'Sex', 'value': 'F_SEX'},
 {'label': 'Education level category', 'value': 'F_EDUCCAT'},
 {'label': 'Education level category 2', 'value': 'F_EDUCCAT2'},
 {'label': ' Are you of Hispanic, Latino, or Spanish origin, such as Mexican, Puerto Rican or Cuban? - Includes RACE backcodes',
  'value': 'F_HISP'},
 {'label': 'Combining race', 'value': 'F_RACECMB'},
 {'label': 'Race-Ethnicity', 'value': 'F_RACETHN'},
 {'label': ' Where were you born?', 'value': 'F_NATIVITY'},
 {'label': 'Citizenship', 'value': 'F_CITIZEN'},
 {'label': 'Marital status', 'value': 'F_MARITAL'},
 {'label': 'Religion', 'value': 'F_RELIG'},
 {'label': ' Would you describe yourself as a born-again or evangelical Christian, or not?',
  'value': 'F_BORN'},
 {'label': 'Religious service attendance', 'value

In [243]:
theme_categories = ['Social impact of scientific developments',
                    'Policy decisions on scientific issues',
                    'Confidence in public figures',
                    'Opinions on Medical, Environmental & Nutrition research scientists',
                    'Opinions on Medical Doctors, Environmental Health Specialists & Dieticians',
                    'Importance of scientific issues',
                    'Opinions on research scientists',
                    'Questions regarding scientific research',
                    'Solving the countires problems',
                    'General scientific knowledge']

theme_labels = [society, policy, confidence, rq_form1, pw_form2, scm4, scm5, q, pop, knowledge]

theme_select_dropdown = dict(zip(theme_categories, theme_labels))

[{'label': k, 'value': k} for k in theme_select_dropdown.keys()]
[{'label': i, 'value': i} for i in theme_select_dropdown['Social impact of scientific developments']]

[{'label': 'PAST_W42', 'value': 'PAST_W42'},
 {'label': 'FUTURE_W42', 'value': 'FUTURE_W42'},
 {'label': 'SC1_W42', 'value': 'SC1_W42'}]

In [250]:
    """
Dash app

"""

app = JupyterDash(__name__, assets_ignore='.*bootstrap-journal.css.*')

layout = html.Div([
        dbc.Container([
            dbc.NavbarSimple(
                brand="Science and Society",
                brand_href="#",
                color="primary",
                dark=True,
                fluid=True
            ),
            html.Br(),

            dbc.Row([
                dbc.Col(
                    [html.H4("Introduction"),
                     html.P("""\
                     In 2019, the Pew Research Center conducted a survey of 4,464 adults living in households
                     in the United States. Part of their American Trends Panel, the survey measured respondent
                     attitudes regarding a number of topics, from trust in researchers and the scientific process
                     to whether or not scientists should be involved with guiding public policy decisions.
                     This dashboard's purpose is to provide the user with the ability to examine theses trends themselves.
                    """)
                    ],
                    lg=8,
                )
            ]),
            html.Br(),
            html.H4(children=['Exploring by demographic']),
            html.Hr(),
            
            html.Div([
                dbc.Row([
                    dbc.Col([
                        dcc.Dropdown(
                            id = 'xaxis-column',
                            options = demo_dropdown,
                            value = 'F_AGECAT'
                        ),
                    ]),
                    dbc.Col([
                        dcc.Dropdown(
                            id = 'theme-selection',
                            options = [{'label': k, 'value': k} for k in theme_select_dropdown.keys()],
                            value = 'Social impact of scientific developments'
                        )
                    ])
                ]),
                
                dbc.Row([
                    dbc.Col([
                        dcc.Graph(id='indicator-bar',
                                  config={'displayModeBar': False})
                        ]),
                    dbc.Col([
                        dcc.RadioItems(id='yaxis-column',
                                      value = 'PAST_W42')
                    ])
                ])
            ]),
        ])

])


app.layout = layout


@app.callback(
    Output('yaxis-column', 'options'),
    [Input('theme-selection', 'value')])
def set_theme_options(selected_theme):
        return [{'label': i, 'value': i} for i in theme_select_dropdown[selected_theme]]


@app.callback(
    Output('indicator-bar', 'figure'),
    [Input('xaxis-column', 'value'),
     Input('yaxis-column', 'value')])
def update_graph(x_axis, y_axis):
        new_df = pd.crosstab(df_copy[x_axis],
                             df_copy[y_axis],
                             df_copy.WEIGHT_W42, aggfunc = sum, dropna=True,
                             normalize='index'). \
                             loc[meta.variable_value_labels[x_axis].values()]. \
                             loc[:, meta.variable_value_labels[y_axis].values()]*100

        fig = px.bar(new_df, x=new_df.columns, y=new_df.index )

        fig.update_layout(
            title={
                'text': None,
                'y':1,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font':{
                    'size':18}},

            margin=dict(l=20, r=20, t=20, b=20),

            xaxis_title="Frequency (%)",
            yaxis_title=None,
            legend=dict(
                title=None,
                yanchor="bottom",
                y= .9,
                xanchor="center",
                x=0
            ))

        return fig






if __name__ == '__main__':
        app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/


In [None]:
# @app.callback(
#     Output('yaxis-column', 'value'),
#     [Input('yaxis-column', 'options')])
# def set_theme_value(available_options):
#         return available_options[0]['value']
    
    
# @app.callback(
#     Output('display-selected-values', 'children'),
#     [Input('countries-radio', 'value'),
#      Input('cities-radio', 'value')])
# def set_display_children(selected_country, selected_city):
#         return u'{} is a city in {}'.format(selected_city, selected_country)

### TEMPORARY PLACEMENT OF GRAPH CALLBACK