# Including states as unscaled dummy variables

In [33]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import explained_variance_score,mean_absolute_error
from sklearn.model_selection import train_test_split

print(os.getcwd())

df = pd.read_csv('../data/step3_output.csv')
df.drop(df.columns[[0,1]],axis=1, inplace=True)

outputColumns = ['AdultWeekend','AdultWeekday','projectedDaysOpen', 'daysOpenLastYear',]
unusedColumns = ['Name', 'summit_elev','base_elev','kmeans',  ]
dummyColumns = ['state']
dummies = pd.get_dummies(df[dummyColumns], columns=dummyColumns)

df = pd.concat([df, dummies], axis=1)
df.drop(columns=dummyColumns, inplace=True)

Input = df.drop( unusedColumns + outputColumns , axis=1)
Output = df[outputColumns]

X_train, X_test, y_train, y_test = train_test_split(Input, Output, test_size=0.25, random_state=25)

from sklearn.compose import ColumnTransformer

ct = ColumnTransformer([
        ('somename', StandardScaler(), dummies.columns)
    ], remainder='passthrough')

steps = [
    ('scale', ct),
    ('estimator', LinearRegression())
]
model3 = Pipeline(steps)
model3.fit(Input, Output)
y_pred = model3.predict(X_test)

print('Interecpt:' , model3.named_steps['estimator'].intercept_, '\n')
coefficients = dict(zip(X_test.columns.tolist(), model3.named_steps['estimator'].coef_[0]))
for key, value in sorted(coefficients.items(),key=lambda item: abs(item[1]), reverse = True):
    print ('%s : %s' % (key, round(coefficients[key],3)))

model_3_scores = (explained_variance_score(y_test,y_pred,multioutput='raw_values')[0], mean_absolute_error(y_test,y_pred, multioutput='raw_values')[0])

C:\Users\jeffk\OneDrive\Springboard\GuidedCapstone\notebooks
Interecpt: [38.11736904 32.72518073 81.95423158 76.35319022] 

state_New Mexico : -35.494
yearsOpen : -4.18
quad : -2.948
state_New York : -2.74
state_Illinois : 2.142
NightSkiing_ac : 2.133
state_Minnesota : -2.115
state_New Jersey : -2.108
averageSnowfall : 2.101
state_Idaho : -2.095
state_North Carolina : 1.924
state_Ohio : 1.902
state_Rhode Island : -1.801
state_Oregon : 1.759
state_Michigan : 1.728
state_Colorado : 1.686
state_Nevada : -1.66
state_Alaska : 1.496
state_Missouri : 1.485
TerrainParks : 1.348
state_Indiana : 1.151
vertical_drop : -1.069
state_Montana : 1.046
state_Connecticut : -0.964
Runs : 0.95
fastQuads : 0.871
state_Maine : 0.845
state_Arizona : -0.793
fastSixes : -0.784
state_Massachusetts : 0.743
state_Pennsylvania : 0.718
state_Iowa : -0.695
surface : -0.586
triple : 0.543
state_South Dakota : 0.522
state_Maryland : 0.401
trams : 0.355
SkiableTerrain_ac : -0.352
state_California : 0.331
double : -0.27