# Load and preprocess 1988 data

We will, over time, look over other years. Our current goal is to explore the features of a single year.

---

In [50]:
%pylab --no-import-all inline
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


## Load the data.

---

If this fails, be sure that you've saved your own data in the prescribed location, then retry.

In [51]:
file = "../data/interim/1988data.dta"
df_rawest = pd.read_stata(file)

In [76]:
good_columns = [
    'V880274',  # Party Identification
    
    'V880395',  #Abortion
    'V880951',  #Moral Relativism
    'V880954',  #“Newer Lifestyles”
    'V880952',  #Moral Tolerance
    'V880953',  #Traditional Families
    'V880852',  #Gay Job Discrimination
    
    'V880318',  #National Health Insurance
    'V880323',  #Guaranteed Job
    'V880302',  #Services/Spending
    
    'V880857',  #Affirmative Action
    'V880961',  #Racial Resentment 1
    'V880962',  #Racial Resentment2
    'V880963',  #Racial Resentment3
    'V880964',  #Racial Resentment4
]
df_raw = df_rawest[good_columns]

## Clean the data
---

In [86]:
def convert_to_int(s):
    """Turn ANES data entry into an integer.
    
    >>> convert_to_int("1. Govt should provide many fewer services")
    1
    >>> convert_to_int("2")
    2
    """
    try:
        return int(s.partition('.')[0])
    except ValueError:
        warnings.warn("Couldn't convert: "+s)
        return np.nan
    except AttributeError:
        return s
    
def not_informative_to_nan(x):
    """Convert non-informative values to missing.
    
    ANES codes various non-answers as 8, 9, and 0.
    For instance, if a question does not pertain to the 
    respondent.
    """
    return np.nan if x in {8, 9, 0} else x

df = df_raw.applymap(convert_to_int)
non_pid_columns = list(df.columns)
non_pid_columns.remove('V880274')
df[non_pid_columns] = df[non_pid_columns].applymap(not_informative_to_nan)

df.loc[:, 'V880395'] = df.V880395.apply(lambda x: np.nan if x in {7, 8, 9, 0} else x)

# Code so that liberal is higher numbers
df.loc[:, 'V880274'] = df.V880274.apply(lambda x: np.nan if x >= 7 else -x)  # 7: other minor party, 8: apolitical, 9: NA

df.loc[:, 'V880951'] = df.V880951.apply(lambda x: -x)  # 1: moral relativism, 5: no relativism
df.loc[:, 'V880952'] = df.V880952.apply(lambda x: -x)  # Tolerance. 1: tolerance, 7: not
df.loc[:, 'V880852'] = df.V880852.apply(lambda x: -x)  # protect gays against discrimination

df.loc[:, 'V880318'] = df.V880318.apply(lambda x: -x)  # Gov't insurance?
df.loc[:, 'V880323'] = df.V880323.apply(lambda x: -x)  # Gov't guaranteed job?

df.loc[:, 'V880857'] = df.V880857.apply(lambda x: -x)  # affirmative action
df.loc[:, 'V880962'] = df.V880962.apply(lambda x: -x)  # Racial support
df.loc[:, 'V880964'] = df.V880964.apply(lambda x: -x)  # Systemic factors?

df.rename(inplace=True, columns=dict(zip(
    good_columns,
    ["PartyID",
    
    "Abortion",
    "MoralRelativism",
    "NewerLifestyles",
    "MoralTolerance",
    "TraditionalFamilies",
    "GayJobDiscrimination",

    "NationalHealthInsurance",
    "StandardOfLiving",
    "ServicesVsSpending",

    "AffirmativeAction",
    "RacialResentment1",
    "RacialResentment2",
    "RacialResentment3",
    "RacialResentment4",
    ]
)))

In [87]:
print("Variables now available: df")

Variables now available: df


In [88]:
df_rawest.V880274.value_counts()

1. WEAK DEM         359
0. STRONG DEM       355
5. WEAK REP         281
6. STRONG REP       279
4. IND-REP          270
2. IND-DEM          240
3. IND-IND          215
8. APOLITICAL        33
9. NA                 7
7. OTH-MINOR PTY      1
Name: V880274, dtype: int64

In [89]:
df.PartyID.value_counts()

-1.0    359
 0.0    355
-5.0    281
-6.0    279
-4.0    270
-2.0    240
-3.0    215
Name: PartyID, dtype: int64

In [90]:
df.Abortion.value_counts()

4.0    711
2.0    662
3.0    370
1.0    253
Name: Abortion, dtype: int64

In [91]:
df.head()

Unnamed: 0,PartyID,Abortion,MoralRelativism,NewerLifestyles,MoralTolerance,TraditionalFamilies,GayJobDiscrimination,NationalHealthInsurance,StandardOfLiving,ServicesVsSpending,AffirmativeAction,RacialResentment1,RacialResentment2,RacialResentment3,RacialResentment4
0,-1.0,1.0,-2.0,1.0,-2.0,1.0,-5.0,-2.0,-2.0,6.0,-4.0,1.0,-4.0,1.0,-4.0
1,-4.0,4.0,,2.0,-2.0,2.0,-5.0,,,5.0,-2.0,1.0,-4.0,2.0,-4.0
2,-5.0,4.0,,,,,,-7.0,-5.0,4.0,,,,,
3,-2.0,2.0,-5.0,1.0,-2.0,2.0,-1.0,-3.0,-3.0,3.0,-5.0,4.0,-2.0,2.0,-2.0
4,-5.0,2.0,,4.0,-2.0,2.0,-1.0,-6.0,-4.0,4.0,-5.0,4.0,-4.0,4.0,-1.0


In [92]:
df.to_csv("../data/processed/1988.csv")