# Load and preprocess 1992 data

We will, over time, look over other years. Our current goal is to explore the features of a single year.

---

In [1]:
%pylab --no-import-all inline
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


## Load the data.

---

If this fails, be sure that you've saved your own data in the prescribed location, then retry.

In [2]:
file = "../data/interim/1992data.dta"
df_rawest = pd.read_stata(file)

In [3]:
good_columns = [#'campfin_limcorp', # "Should gov be able to limit corporate contributions"
    'V923634',  # Your own party identification
    
    'V923732',  # Abortion
    'V926115',  # Moral Relativism
    'V926118',  # "Newer" lifetyles
    'V926116',  # Moral tolerance
    'V926117',  # Traditional Families
    'V925924',  # Gay Job Discrimination
    'V925928',  # Gay Adoption
    'V925926',  # Gay Military Service
    
    'V923716',  # National health insurance
    'V923718',  # Guaranteed Job
    'V923701',  # Services/Spending
    
    'V900464',  # Affirmative Action -- 1-5; 7 is other
    'V926126', 
    'V926129', 
    'V926127',
    'V926128',
]

df_raw = df_rawest[good_columns]

## Clean the data
---

In [4]:
def convert_to_int(s):
    """Turn ANES data entry into an integer.
    
    >>> convert_to_int("1. Govt should provide many fewer services")
    1
    >>> convert_to_int("2")
    2
    """
    try:
        return int(s.partition('.')[0])
    except ValueError:
        warnings.warn("Couldn't convert: "+s)
        return np.nan
    except AttributeError:
        return s

def negative_to_nan(value):
    """Convert negative values to missing.
    
    ANES codes various non-answers as negative numbers.
    For instance, if a question does not pertain to the 
    respondent.
    """
    return value if value >= 0 else np.nan

def lib1_cons2_neutral3(x):
    """Rearrange questions where 3 is neutral."""
    return -3 + x if x != 1 else x

def liblow_conshigh(x):
    """Reorder questions where the liberal response is low."""
    return -x

def not_informative_to_nan(x):
    """Convert non-informative values to missing.
    
    ANES codes various non-answers as 8, 9, and 0.
    For instance, if a question does not pertain to the 
    respondent.
    """
    return np.nan if x in {8, 9, 0} else x


df = df_raw.applymap(convert_to_int)
non_pid_columns = list(df.columns)
non_pid_columns.remove('V923634')
df[non_pid_columns] = df[non_pid_columns].applymap(not_informative_to_nan)


df.rename(inplace=True, columns=dict(zip(
    good_columns,
    ["PartyID",
    
    "Abortion",
    "MoralRelativism",
    "NewerLifestyles",
    "MoralTolerance",
    "TraditionalFamilies",
    "GayJobDiscrimination",
    "GayAdoption",
    "GayMilitaryService",

    "NationalHealthInsurance",
    "StandardOfLiving",
    "ServicesVsSpending",

    "AffirmativeAction",
    "RacialWorkWayUp",
    "RacialGenerational",
    "RacialDeserve",
    "RacialTryHarder",

    ]
)))

# Code so that liberal is lower numbers
df.loc[:, 'PartyID'] = df.PartyID.apply(lambda x: np.nan if x >= 7 else x)  # 7: other minor party, 8: apolitical, 9: NA

df.loc[:, 'Abortion'] = df.Abortion.apply(lambda x: np.nan if x in {7, 8, 9, 0} else -x)


df.loc[:, 'NewerLifestyles'] = df.NewerLifestyles.apply(lambda x: -x)  # Tolerance. 1: tolerance, 7: not
df.loc[:, 'TraditionalFamilies'] = df.TraditionalFamilies.apply(lambda x: -x)  # 1: moral relativism, 5: no relativism

df.loc[:, 'ServicesVsSpending'] = df.ServicesVsSpending.apply(lambda x: -x)  # Gov't insurance?

df.loc[:, 'RacialTryHarder'] = df.RacialTryHarder.apply(lambda x: -x)  # Racial support
df.loc[:, 'RacialWorkWayUp'] = df.RacialWorkWayUp.apply(lambda x: -x)  # Systemic factors?

In [5]:
print("Variables now available: df")

Variables now available: df


In [6]:
df_rawest.V923634.value_counts()

0. STRONG DEMOCRAT                      444
1. WEAK DEMOCRAT                        432
2. INDEPENDENT-DEMOCRAT                 353
5. WEAK REPUBLICAN                      349
4. INDEPENDENT-REPUBLICAN               305
3. INDEPENDENT-INDEPENDENT              287
6. STRONG REPUBLICAN                    275
8. APOLITICAL                            26
9. NA                                     9
7. OTHER-MINOR PARTY, REFUSES TO SAY      5
Name: V923634, dtype: int64

In [7]:
df.PartyID.value_counts()

0.0    444
1.0    432
2.0    353
5.0    349
4.0    305
3.0    287
6.0    275
Name: PartyID, dtype: int64

In [8]:
df.head()

Unnamed: 0,PartyID,Abortion,MoralRelativism,NewerLifestyles,MoralTolerance,TraditionalFamilies,GayJobDiscrimination,GayAdoption,GayMilitaryService,NationalHealthInsurance,StandardOfLiving,ServicesVsSpending,AffirmativeAction,RacialWorkWayUp,RacialGenerational,RacialDeserve,RacialTryHarder
0,6.0,-2.0,5.0,-1.0,3.0,-1.0,5.0,5.0,5.0,7.0,6.0,-3.0,,-1.0,4.0,5.0,-1.0
1,4.0,-3.0,3.0,-1.0,3.0,-2.0,5.0,5.0,,6.0,6.0,-4.0,,-2.0,4.0,5.0,-2.0
2,6.0,-1.0,5.0,-1.0,5.0,-1.0,5.0,5.0,5.0,2.0,,-4.0,,-4.0,2.0,4.0,-4.0
3,0.0,-1.0,2.0,-3.0,2.0,-2.0,1.0,4.0,1.0,1.0,1.0,-7.0,,-1.0,1.0,1.0,-4.0
4,1.0,-1.0,1.0,,5.0,-1.0,2.0,5.0,5.0,,,,,-1.0,1.0,2.0,-1.0


In [9]:
df.to_csv("../data/processed/1992.csv")