# Welcome to your Quantitative Social Sciences Analysis Toolkit!

Use this notebook to perform the following operations:
    - load data
    - make histograms of variables of interest
    - calculate group differences on variables of interest
    - calculate correlations between variables of interest

In [1]:
import pandas as pd
import logging

df = pd.read_csv('data/anes_pilot_2018.csv', low_memory=False)

In [97]:
%matplotlib inline 

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from matplotlib import pyplot as plt

@interact(x=df.columns)
def categorical_plot(x='gender'):
    plt.figure(figsize=(10,5))
    ax=plt.gca()
    return df.groupby(x).size().plot(ax=ax, kind='bar')

pass

interactive(children=(Dropdown(description='x', index=284, options=('version', 'caseid', 'weight', 'weight_sps…

In [58]:
%matplotlib inline 

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from matplotlib import pyplot as plt

def cast(s):
    try:
        return float(s)
    except:
        return None

@interact(dependent=df.columns.sort_values(), independent=df.columns.sort_values())
def comparison_plot(dependent='ftblack', independent='ftantifa', bins=list(range(10,100,5))):
    plt.figure(figsize=(10,5))
    ax = plt.gca()
    
    msg = "nrows: {}; nrows after:{}".format(str(len(df[dependent])),  str(len(df[dependent].dropna())))
    logging.warning(msg)

    dff = df.copy()
    dff = dff.dropna(subset=[dependent, independent])
    
    if len(dff[independent].unique()) > 15:
        plt.scatter(dff[independent], dff[dependent])
        return ax
    
    try:
        dff[dependent] = dff[dependent].apply(cast)
        dff[independent] = dff[independent].apply(cast)
        if dff[dependent].mean() < 900:
            dff = dff[dff[dependent]<900]
        dff.groupby(independent)[dependent].plot(ax=ax, kind='hist', alpha=.33, bins=bins)
        
    except Exception as e:
        logging.warn(e)
        dff.groupby(independent)[dependent].plot(ax=ax, kind='hist', alpha=.33, bins=bins)
        
    ax.legend()
    return ax


interactive(children=(Dropdown(description='dependent', index=141, options=('acaapprove', 'acaapprove_page_tim…

In [62]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols


def cast(s):
    try:
        return int(s)
    except:
        return None


@interact(dependent=df.columns.sort_values(), independent=df.columns.sort_values())
def comparison_plot(dependent='birthyr', independent='gender'):
    
    
    msg = "nrows: {}; nrows after:{}".format(str(len(df[dependent])),  str(len(df[dependent].dropna())))
    logging.warning(msg)

    dff = df.copy()
    dff = dff.dropna(subset=[dependent, independent])
    
    try:
        dff[dependent] = dff[dependent].apply(cast)
        dff[independent] = dff[independent].apply(cast)
        
        if dff[dependent].mean() <900:
            dff = dff[dff[dependent]<900]
            
        results = sm.OLS(dff[dependent], dff[independent], hasconst=False).fit()
        
        if len(dff[independent].unique()) < 10:
            results = ols('{} ~ C({})'.format(dependent, independent), data=dff).fit()
        else:
            results = ols('{} ~ {}'.format(dependent, independent), data=dff).fit()
        
        print(results.summary())
        return pd.crosstab(df[independent], df[dependent])
    
    except Exception as e:
        logging.error(e)


interactive(children=(Dropdown(description='dependent', index=20, options=('acaapprove', 'acaapprove_page_timi…