In [24]:
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
import statsmodels.formula.api as sm
from sklearn import linear_model
from sklearn import metrics
import pandas as pd
import numpy as np
import os

In [25]:
data = pd.read_csv(os.getcwd() + '\data\merged_train.csv')

**Choose columns**

In [26]:
columns = np.array([3, 4, 5, 6, 7, 10, 11, 12, 14, 15], dtype=np.intp)
# columns = np.array([i for i in range(3, 16)], dtype=np.intp)

k = len(columns)

**Model Evaluation**

In [27]:
def evaluate(y_test, y_pred):
    eval = ''
    eval += 'Root Mean Square Error: {}'.format(metrics.mean_squared_error(y_test, y_pred) ** 0.5) + '\n'
    eval += 'Mean Absolute Error: {}'.format(metrics.mean_absolute_error(y_test, y_pred)) + '\n'
    n = len(y_test)
    r2 = metrics.r2_score(y_test, y_pred)
    adjusted_r2 = 1 - ((1-r2)*(n-1)/(n-k-1))
    eval += 'R-Squared: {}'.format(r2) + '\n'
    eval += 'Adjusted R-Squared: {}'.format(adjusted_r2)
    return eval

**Training Model**

In [28]:
def train_model(x, y):
    model = Pipeline([
        ('scalar', StandardScaler()),
        ('clf', linear_model.LassoCV(cv = 3))
    ])
    model.fit(x, y)
    return model

**Democratic votes**

In [29]:
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, columns], data['Democratic'], train_size=0.75)
model = train_model(x_train, y_train)
y_pred = model.predict(x_test)
print(evaluate(y_test, y_pred))

Root Mean Square Error: 13703.07222250221
Mean Absolute Error: 7478.2924258083785
R-Squared: 0.9502101749818938
Adjusted R-Squared: 0.9484813616132095


**Republican Votes**

In [30]:
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, columns], data['Republican'], train_size=0.75)
model = train_model(x_train, y_train)
y_pred = model.predict(x_test)
print(evaluate(y_test, y_pred))

Root Mean Square Error: 17535.825593461173
Mean Absolute Error: 7269.7053375883315
R-Squared: 0.8843426330830064
Adjusted R-Squared: 0.8803267522872775
