# Load and preprocess 2012 data

We will, over time, look over other years. Our current goal is to explore the features of a single year.

---

In [1]:
%pylab --no-import-all inline
import pandas as pd
%matplotlib inline

Populating the interactive namespace from numpy and matplotlib


## Load the data.

---

If this fails, be sure that you've saved your own data in the prescribed location, then retry.

In [2]:
file = "../data/anes_timeseries_2012.dta"
df_raw = pd.read_stata(file)

good_columns = ['campfin_limcorp', # "Should gov be able to limit corporate contributions"
                'pid_self', # Your own party identification
                'spsrvpr_ssself', # Government services
                'defsppr_self', # Defense spending
                'inspre_self', 
                'gun_control', # Gun control
                'guarpr_self',  # Is the gov't responsible for standard of living?
                'immig_policy', 
                'aidblack_self', 
                'envjob_self', 
                'aa_uni', 
                'fedspend_ss', 
                'fedspend_schools', 
                'fedspend_scitech', 
                'fedspend_crime', 
                'fedspend_welfare', 
                'envir_gwarm', 
                'gayrt_marry', 
                'penalty_favdpen', 
                'relig_churchoft', # Do you go to church often?
                'dem_edu',  # Not really about parties.
                'dem_veteran', # Are you a veteran?
                'budget_rdefctax', 
                'budget_rdefmil', 
                'patriot_amident', 
                'milln_milltax', 
                'fairjob_opin', 
                'immigpo_jobs', # Do immigrants take jobs?
                'wiretap_warrant',
               ]  # TODO: add voted for
                  # TODO: for or against a progressive tax scheme
                  # TODO: add diplomacy and military intervention


df_raw = df_raw[good_columns]

## Clean the data
---

In [3]:
def convert_to_int(s):
    """Turn ANES data entry into an integer.
    
    >>> convert_to_int("1. Govt should provide many fewer services")
    1
    >>> convert_to_int("2")
    2
    """
    try:
        return int(s.partition('.')[0])
    except ValueError:
        warnings.warn("Couldn't convert: "+s)
        return np.nan
    except AttributeError:
        return s

def negative_to_nan(value):
    """Convert negative values to missing.
    
    ANES codes various non-answers as negative numbers.
    For instance, if a question does not pertain to the 
    respondent.
    """
    return value if value >= 0 else np.nan

def lib1_cons2_neutral3(x):
    """Rearrange questions where 3 is neutral."""
    return -3 + x if x != 1 else x

def liblow_conshigh(x):
    """Reorder questions where the liberal response is low."""
    return -x

def dem_edu_special_treatment(x):
    """Eliminate negative numbers and {95. Other}"""
    return np.nan if x == 95 or x <0 else x

def pid_self_special_treatment(x):
    return np.nan if x not in {1, 2, 3} else lib1_cons2_neutral3(x)

transforms = {}
transforms['campfin_limcorp'] = lib1_cons2_neutral3
transforms['pid_self'] = pid_self_special_treatment
transforms['defsppr_self'] = liblow_conshigh
transforms['inspre_self'] = liblow_conshigh
transforms['gun_control'] = lib1_cons2_neutral3
transforms['guarpr_self'] = liblow_conshigh
transforms['aidblack_self'] = liblow_conshigh
transforms['envjob_self'] = liblow_conshigh
transforms['aa_uni'] = lib1_cons2_neutral3
transforms['fedspend_ss'] = lib1_cons2_neutral3
transforms['fedspend_schools'] = lib1_cons2_neutral3
transforms['fedspend_scitech'] = lib1_cons2_neutral3
transforms['fedspend_crime'] = lib1_cons2_neutral3
transforms['fedspend_welfare'] = lib1_cons2_neutral3
transforms['envir_gwarm'] = liblow_conshigh
transforms['gayrt_marry'] = liblow_conshigh
transforms['budget_rdefctax'] = lib1_cons2_neutral3
transforms['budget_rdefmil'] = lib1_cons2_neutral3
transforms['milln_milltax'] = lib1_cons2_neutral3
transforms['fairjob_opin'] = lambda x: np.nan if x not in {1, 2} else -x
transforms['wiretap_warrant'] = lib1_cons2_neutral3
transforms['dem_edu'] = dem_edu_special_treatment

df = df_raw.applymap(convert_to_int)
df = df.applymap(negative_to_nan)

# Transform each column.
for column in df.columns:
    try:
        df[column] = df[column].map(transforms[column], na_action='ignore')
    except KeyError:
        pass

In [4]:
print("Variables now available: df")

Variables now available: df
