In [48]:
# System Modules
import os
import sys
import datetime as dt
sys.path.append('../')

# Data Science Modules
import numpy as np
import pandas as pd
from plotnine import *  # Polluting name space sorry!!
import matplotlib.pyplot as plt
%matplotlib inline

# SQL Modules
import sqlalchemy as sa
from sqlalchemy import create_engine
from utilities import sql_utils as su

# Deep Learning Modules
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
# from keras.preprocessing.text import Tokenizer

# from utilities import model_eval_utils as meu

DWH = os.getenv('CD_DWH')
engine = create_engine(DWH)

pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [49]:
QUERY = su.get_sql_as_string('../notebooks/sql/campaign_finance_and_election_results_VN')
with engine.begin() as conn:
    df = pd.read_sql(QUERY, conn)
#     df.set_index('candidate_name', inplace=True)

In [50]:
df

Unnamed: 0,candidate_name,total_transaction,recipient_candidate_office,is_winner
0,larryaceves,468831.9,Superintendent of Public Instruction,0
1,danteacosta,47600.0,Other,1
2,danteacosta,1168239.91,State Assembly,1
3,richardalarcon,54881.17,Other,0
4,richardalarcon,1374456.08,State Assembly,0
5,richardalarcon,629978.18,State Senate,0
6,travisallen,73050.0,Governor,1
7,travisallen,150000.0,Other,1
8,travisallen,1726041.36,State Assembly,1
9,peteramundson,19925.0,State Assembly,0


In [51]:
df.shape

(293, 4)

# Data Preparation

In [52]:
df.groupby('recipient_candidate_office').count()['candidate_name']

recipient_candidate_office
Attorney General                          6
Board of Equalization                     6
Board of Supervisors                      2
City Council Member                       1
Governor                                  7
Insurance Commissioner                    3
Lieutenant Governor                       8
Other                                    70
Secretary of State                        4
State Assembly                          135
State Controller                          6
State Senate                             37
State Treasurer                           4
Superintendent of Public Instruction      4
Name: candidate_name, dtype: int64

In [53]:
filtered_candidates = df[(df['recipient_candidate_office'] == ('State Senate')) | (df['recipient_candidate_office'] == ('State Assembly')) | (df['recipient_candidate_office'] == ('Other'))]
filtered_candidates.shape
# processed_data = pd.get_dummies(data, columns=['rank'])

(242, 4)

In [54]:
# One-hot encode recipient candidate office
filtered_candidates = pd.get_dummies(filtered_candidates, columns=['recipient_candidate_office'])

In [55]:
filtered_candidates.head(10)

Unnamed: 0,candidate_name,total_transaction,is_winner,recipient_candidate_office_Other,recipient_candidate_office_State Assembly,recipient_candidate_office_State Senate
1,danteacosta,47600.0,1,1,0,0
2,danteacosta,1168239.91,1,0,1,0
3,richardalarcon,54881.17,0,1,0,0
4,richardalarcon,1374456.08,0,0,1,0
5,richardalarcon,629978.18,0,0,0,1
7,travisallen,150000.0,1,1,0,0
8,travisallen,1726041.36,1,0,1,0
9,peteramundson,19925.0,0,0,1,0
10,michaeldantonovich,1822447.13,0,0,0,1
11,joaquinarambula,57114.3,1,1,0,0


In [65]:
# Split up data into inputs (X) and outputs (Y)
X = np.array(filtered_candidates)[:, (1, 3, 4, 5)]
# X = np.array(filtered_candidates)[:, 1]
X.astype('float32')
y = keras.utils.to_categorical(filtered_candidates["is_winner"],2)

print("Shape of X:", X.shape)
print("\nShape of y:", y.shape)
print("\nFirst 5 rows of X")
print(X[:5])
print("\nFirst 5 rows of y")
print(y[:5])

Shape of X: (242, 4)

Shape of y: (242, 2)

First 5 rows of X
[[47600.0 1 0 0]
 [1168239.91 0 1 0]
 [54881.17 1 0 0]
 [1374456.08 0 1 0]
 [629978.18 0 0 1]]

First 5 rows of y
[[ 0.  1.]
 [ 0.  1.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]]


In [66]:
#  Split up into training and testing sets
(X_train, X_test) = X[50:], X[:50]
(y_train, y_test) = y[50:], y[:50]

# Shape of training set (need for model input shape)
print('x_train shape:', X_train.shape)

# Number of training and test samples
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

x_train shape: (192, 4)
192 train samples
50 test samples


# Model Architecture

In [87]:
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(4,)))
model.add(Dropout(.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(.1))
model.add(Dense(2, activation='softmax'))

# Compiling the model
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_49 (Dense)             (None, 256)               1280      
_________________________________________________________________
dropout_33 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_50 (Dense)             (None, 128)               32896     
_________________________________________________________________
dropout_34 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_51 (Dense)             (None, 2)                 258       
Total params: 34,434
Trainable params: 34,434
Non-trainable params: 0
_________________________________________________________________


In [88]:
model.fit(X_train, y_train, epochs=10, batch_size=30, verbose=0)

<keras.callbacks.History at 0x1c1da12e80>

In [89]:
score = model.evaluate(X_train, y_train)
print("\n Training Accuracy:", score[1])
score = model.evaluate(X_test, y_test)
print("\n Testing Accuracy:", score[1])


 Training Accuracy: 0.713541666667

 Testing Accuracy: 0.660000001192


Future Steps:
- Add candidate's party status
- Incorporate candidate's incumbency status
- Re-run analysis and represent candidate's total donation amount as percentage of overall money spent in the race