# Machine Learning with Voter Preferences

In [1]:
# Arshiya Sabzevari, Colton Ragland, Noumik Thadani, Trevor Huis in 't Veld

In [2]:
import warnings
warnings.simplefilter("ignore")
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

In [3]:
%matplotlib inline

## Data Prep
Cleaning the rest of our data set to best fit our needs of analysis

In [4]:
data = pd.read_csv("cces18_common_vv2.csv")
data = data.dropna()
print(data.shape)
data.head()

(31945, 31)


Unnamed: 0,birthyr,gender,educ,race,marstat,region,National Economy,Income,Trump Approval,Congress Approval,...,M4A,Repeal ACA,Paris Accord,TPP,Ideology,sexuality,inputstate,EPA CO2 Regulate,Iran Deal,Party
0,1950,2,6,2,3.0,1,4.0,3.0,4.0,4.0,...,1.0,2.0,2.0,2.0,2.0,1.0,34,1.0,2.0,1.0
3,1970,2,5,1,6.0,2,4.0,3.0,4.0,3.0,...,1.0,2.0,2.0,2.0,4.0,1.0,27,1.0,2.0,1.0
4,1971,2,4,1,3.0,4,1.0,1.0,1.0,2.0,...,2.0,1.0,1.0,1.0,4.0,1.0,8,2.0,1.0,3.0
5,1957,2,5,3,1.0,4,3.0,2.0,4.0,4.0,...,1.0,2.0,2.0,2.0,2.0,1.0,53,1.0,2.0,1.0
6,1973,2,5,1,3.0,4,4.0,5.0,4.0,4.0,...,1.0,2.0,2.0,2.0,3.0,1.0,53,1.0,2.0,1.0


In [None]:
data_Y = data['Party']
data_X = data.drop(['Party'], axis=1)
data_X = data_X.drop(['Ideology'], axis=1)
print(data_X.shape)
print(data_Y.shape)
data_X.head()

## Data Exploration
Alternative looks into our dataset for better comprehension of the features

In [None]:
for col in data_X:
    print(data[col].unique())

## Neural Network
Using a Neural Network MLFClassifier & gaining a measure of accuracy to assess our model

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

scaler = StandardScaler()
clf = MLPClassifier()
pipeline = Pipeline(steps = [('scaler', scaler), ('clf', clf)])

param_grid = {'clf__hidden_layer_sizes': [10, 20, 30, 40, 50, 60],
             'clf__activation': ['logistic', 'tanh', 'relu']}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)

scores = cross_val_score(grid_search, data_X, data_Y, cv=5)
print("Accuracy:", scores.mean()*100)

## Final Model
Using the Neural Network model & classifer & creating a final model to predict newly inputted records & gaining insight into voter preferences, decisions & outlooks

In [None]:
import pickle

scaler = StandardScaler()
clf = MLPClassifier()
pipeline = Pipeline(steps = [('scaler', scaler), ('clf', clf)])

param_grid = {'clf__hidden_layer_sizes': [10, 20, 30, 40, 50, 60],
             'clf__activation': ['logistic', 'tanh', 'relu']}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)
final = grid_search.fit(data_X, data_Y)

#final model
final_model = final

filename = 'finalized_model.sav'
pickle.dump(final_model, open(filename, 'wb'))

In [None]:
record = [[ 1970, 2, 5, 3, 2, 2,
  3, 3, 4, 3, 3, 2,
 2, 2, 1, 3, 1, 2, 1, 2, 1, 2, 1, 1, 1, 43, 1, 
 3, 2 ]]

 
# load the model
loaded_model = pickle.load(open(filename, 'rb'))

prediction = loaded_model.predict(record)
if prediction == 1:
    print('Democrat')
elif prediction == 2:
    print('Republican')
elif prediction == 3:
    print('Independent')
elif prediction == 4:
     print('Other')
else:
    print('Not sure')