In [1]:
# Loading libraries required and reading dataset into python
import pandas as pd
import re
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import skew
from scipy import stats
from scipy.stats.stats import pearsonr
from scipy.stats import norm
# Modeling
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_squared_log_error
from sklearn.metrics import mean_squared_error as mae,r2_score
from sklearn import linear_model
import sklearn.model_selection as ms
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from mlxtend.regressor import StackingCVRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('vehicles.csv')
df.columns

In [None]:
drop_columns = ['url','region_url','region','vin', 'lat','long','description','county','image_url']
df = df.drop(columns= drop_columns)

In [None]:
df.isna().sum()

In [None]:
df['price'].describe()

In [None]:
new_df = df[df['price'] > 1000]
price_df = new_df[new_df['price'] <= 150000]
rec_df = price_df[price_df['odometer'] > 1000]
rec_df.shape

In [None]:
rec_df['paint_color'].fillna('unkown',inplace = True)
rec_df['condition'].fillna('unknown',inplace = True)

In [None]:
missing = rec_df.isna().sum()
missing = missing[missing>0]
missing_percent = missing/rec_df.shape[0] * 100
df_missing = pd.DataFrame([missing, missing_percent], index = ['total', 'missing percent']).T
df_missing.sort_values(['missing percent'], ascending = [False])

In [None]:
rec_df = rec_df.dropna()
rec_df.shape

In [None]:
rec_df.dtypes

In [None]:
rec_df['year'] = rec_df['year'].astype(int)
rec_df['odometer'] = rec_df['odometer'].astype(int)

In [None]:
#histogram of SalePrice to see the distribution 
fig, (ax1, ax2) = plt.subplots(1,2,figsize=(14,4))
sns.distplot(rec_df['price'], ax = ax1)
ax1.set_ylabel('Frequency')
ax1.set_title('Car Price Distribution')
#QQ-plot
stats.probplot(rec_df['price'], plot=plt)
plt.show()

In [None]:
rec_df['price_log'] = np.log(rec_df['price'])
#histogram of SalePrice to see the distribution after log transformation
fig, (ax1, ax2) = plt.subplots(1,2,figsize=(14,4))
sns.distplot(rec_df['price_log'], ax = ax1)
(mu, sigma) = norm.fit(rec_df['price_log'])
print( '\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)],
            loc='best')
ax1.set_ylabel('Frequency')
ax1.set_title('Car Price Distribution')
#QQ-plot
stats.probplot(rec_df['price_log'], plot=plt)
plt.show()

In [None]:
#histogram of the car mileage to see the distribution 
fig, (ax1, ax2) = plt.subplots(1,2,figsize=(14,4))
sns.distplot(rec_df['odometer'], ax = ax1)
ax1.set_xlabel('Mileage')
ax1.set_ylabel('Frequency')
ax1.set_title('Car Mileage Distribution')
#QQ-plot
stats.probplot(rec_df['odometer'], plot=plt)
plt.show()

In [None]:
rec_df['odometer_log'] = np.log(rec_df['odometer']+10)
#histogram of SalePrice to see the distribution after log transformation
fig, (ax1, ax2) = plt.subplots(1,2,figsize=(14,4))
sns.distplot(rec_df['odometer_log'], ax = ax1)
(mu, sigma) = norm.fit(rec_df['odometer_log'])
print( '\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)],
            loc='best')
ax1.set_xlabel('Mileage')
ax1.set_ylabel('Frequency')
ax1.set_title('Log Car Mileage Distribution')
#QQ-plot
stats.probplot(rec_df['odometer_log'], plot=plt)
plt.show()

In [None]:
# How expensive are the cars? 
print('The average age of the cars is {:,.0f} years old.'.format(2020 - rec_df.year.mean()))
print('The average mileage of the cars is {:,.0f} miles.'.format(rec_df.odometer.mean()))
print('The cheapest car is for ${:,.0f} dollars and the most expensive is for ${:,.0f}.'.format(
    rec_df.price.min(), rec_df.price.max()))
print('The average car price is ${:,.0f}, while median is ${:,.0f}.'.format(
    rec_df.price.mean(), rec_df.price.median()))
print('-' * 30)
rec_df.price.hist(bins=75, rwidth=.8, figsize=(14,4))
plt.title('How expensive are the cars?')
plt.show()

In [None]:
categori = ['object']
categorical = rec_df.select_dtypes(include= categori)
categorical= categorical.drop(columns = ['model','manufacturer','state'],axis =1)

In [None]:
fig, axes = plt.subplots(round(len(categorical.columns) / 4), 3, figsize=(20,15))

for i, ax in enumerate(fig.axes):
    if i < len(categorical.columns):
        ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=45)
        sns.countplot(x=categorical.columns[i], alpha=0.7, data=categorical, ax=ax,palette='Blues_d')

fig.tight_layout()

In [None]:
fig = plt.figure(figsize= (15,5))
plt.subplot(1,2,1)
plt.title('model')
rec_df['manufacturer'].value_counts().sort_values().nlargest(10).plot(kind = 'bar')
plt.xticks(rotation =90)
plt.subplot(1,2,2)
plt.title('model')
rec_df['model'].value_counts().sort_values().nlargest(10).plot(kind = 'bar')

In [None]:
# Box Plot for Categorical Features
f = pd.melt(rec_df, id_vars=['price'], value_vars=sorted(categorical))
g = sns.FacetGrid(f, col='variable', col_wrap=3, sharex=False, sharey=False, size=4)
g = g.map(sns.boxplot, 'value', 'price')
[plt.setp(ax.get_xticklabels(), rotation=90) for ax in g.axes.flat]
g.fig.tight_layout()
plt.show()

In [None]:
rec_df.isna().sum()

In [None]:
rec_df = rec_df.drop(columns=['id','price','odometer'],axis =1)

In [None]:
df_final = rec_df.sample(frac = 0.10)
df_final.shape

In [None]:
#df_final.to_csv('cleaned_cars.csv')

In [2]:
df_final = pd.read_csv('cleaned_cars.csv',index_col=0)
#df_final = df_final.drop(columns = ['id'],axis = 1)
df_final.head()

Unnamed: 0_level_0,year,manufacturer,model,condition,cylinders,fuel,title_status,transmission,drive,size,type,paint_color,state,price_log,odometer_log
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
483253,2007,bmw,x3,excellent,6 cylinders,gas,clean,automatic,4wd,mid-size,SUV,blue,oh,8.682708,11.856586
435863,2015,subaru,outback 2.5i premium,good,4 cylinders,gas,clean,automatic,4wd,mid-size,SUV,silver,or,9.740674,11.377816
130114,2006,nissan,titan,unknown,8 cylinders,gas,clean,automatic,rwd,full-size,pickup,red,fl,9.258654,11.971944
167417,1998,dodge,dakota,good,6 cylinders,gas,clean,automatic,rwd,full-size,truck,red,tx,9.104424,10.767853
383754,2010,ford,f-150,excellent,8 cylinders,gas,clean,automatic,4wd,full-size,truck,silver,mt,9.041922,12.037713


In [3]:
df_final  = pd.get_dummies(df_final, drop_first=True)
df_final.columns

Index(['year', 'price_log', 'odometer_log', 'manufacturer_alfa-romeo', 'manufacturer_audi', 'manufacturer_bmw', 'manufacturer_buick', 'manufacturer_cadillac', 'manufacturer_chevrolet', 'manufacturer_chrysler',
       ...
       'state_sd', 'state_tn', 'state_tx', 'state_ut', 'state_va', 'state_vt', 'state_wa', 'state_wi', 'state_wv', 'state_wy'], dtype='object', length=4666)

In [4]:
def printErrors(Yt,predMod):
    """
    This function takes the following inputs:
    1. Yt = the trained predicted values
    2. predMod = the prediction from the fitted model
    It thens prints the errors for the model ie MAE, MSE, RMSLE
    """
    errors = abs(np.expm1(predMod) - np.expm1(Yt))
    print('Mean Absolute Error (MAE): $', round(np.mean(errors), 2))
    print('Mean Squared Error (MSE):', mean_squared_error(Yt, predMod))
    print('Root Mean Square Error (RMSE):', np.sqrt(mean_squared_error(Yt, predMod)))
    print('Root Mean Square Log Error (RMSLE):', np.sqrt(mean_squared_log_error(Yt, predMod)))
    mape = 100 * (errors / np.expm1(Yt))
    print('Mean Absolute Percent Error (MAPE):', round(np.mean(mape), 2), '%.')

In [5]:
def printMod(Xtr,Xte,Ytr, Yte, modSel, modName):
    """
    This function takes the following inputs:
    1. Xtr,Xte,Ytr,Yte = X train data set, X test data set, Y train array, Y test array
    2. modSel is the fitted model
    3. modName is the model name used for plotting i.e. OLS, Lasso, Random Forest
    It then creates a plot for test vs train predicted prices
    """
    y_train_eNet = modSel.predict(Xtr)
    y_test_eNet = modSel.predict(Xte)
    # Plot predictions
    plt.figure(figsize=(12,8))
    plt.scatter(np.expm1(y_train_eNet), np.expm1(Ytr), c='black', marker="o", s=15, label = "Training data")
    plt.scatter(np.expm1(y_test_eNet), np.expm1(Yte), c='orange', marker='o', s=15, label = "Test data")
    plt.title(modName + " Model Train vs Test", fontsize = 20)
    plt.xlabel("Predicted Prices", fontsize = 16)
    plt.ylabel("Actual Prices", fontsize = 16)
    plt.xlim(0, 800000)
    plt.ylim(0, 800000)
    plt.legend(loc = "upper left")
    plt.plot([0, 800000], [0, 800000], c = "grey")
    plt.show()

In [6]:
def rmse_stack(model):
    """
    Perform kfold cross validation on stack model
    k = 5
    """
    kf = KFold(n_splits = 5, shuffle=True, random_state=20).get_n_splits(X_train)
    rmse= np.sqrt(-cross_val_score(model, np.array(X_train), Y_train, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

In [7]:
kfo = KFold(n_splits=5,shuffle = True, random_state = 32)

In [8]:
df_final.columns

Index(['year', 'price_log', 'odometer_log', 'manufacturer_alfa-romeo', 'manufacturer_audi', 'manufacturer_bmw', 'manufacturer_buick', 'manufacturer_cadillac', 'manufacturer_chevrolet', 'manufacturer_chrysler',
       ...
       'state_sd', 'state_tn', 'state_tx', 'state_ut', 'state_va', 'state_vt', 'state_wa', 'state_wi', 'state_wv', 'state_wy'], dtype='object', length=4666)

In [11]:
df_final.shape

(21739, 4666)

In [12]:
x = df_final.loc[:,df_final.columns != 'price_log'].copy()
y = df_final['price_log'].reset_index(drop=True)
print('train', x.shape, 'y', y.shape)

train (21739, 4665) y (21739,)


In [14]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size= 0.3,random_state = 31)

In [15]:
regex = re.compile(r"\[|\]|<", re.IGNORECASE)

X_train.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in X_train.columns.values]

In [32]:
regex = re.compile(r"\[|\]|<", re.IGNORECASE)

X_test.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in X_test.columns.values]

In [33]:
print('train', X_test.shape, 'y', X_train.shape)

train (6522, 4665) y (15217, 4665)


In [34]:
# Scale Data
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns= X_train.columns)
X_test_scaled = pd.DataFrame(scaler.fit_transform(X_test), columns= X_test.columns)
X_train_scaled.head()

Unnamed: 0,year,odometer_log,manufacturer_alfa-romeo,manufacturer_audi,manufacturer_bmw,manufacturer_buick,manufacturer_cadillac,manufacturer_chevrolet,manufacturer_chrysler,manufacturer_datsun,manufacturer_dodge,manufacturer_ferrari,manufacturer_fiat,manufacturer_ford,manufacturer_gmc,manufacturer_harley-davidson,manufacturer_honda,manufacturer_hyundai,manufacturer_infiniti,manufacturer_jaguar,manufacturer_jeep,manufacturer_kia,manufacturer_land rover,manufacturer_lexus,manufacturer_lincoln,manufacturer_mazda,manufacturer_mercedes-benz,manufacturer_mercury,manufacturer_mini,manufacturer_mitsubishi,manufacturer_nissan,manufacturer_pontiac,manufacturer_ram,manufacturer_rover,manufacturer_saturn,manufacturer_subaru,manufacturer_tesla,manufacturer_toyota,manufacturer_volkswagen,manufacturer_volvo,model_/ braun ♿ van,model_08 sienna ce,model_1 series,model_1 series 128i,model_1 series 135 i,model_1 ton crew cab,model_1-series,model_1.8 quattro turbo sedan,model_1.9 z3,model_1000,model_124 pininfarina spider 2000,model_124 spider,model_124 spider classica,model_128,model_128i,model_128i 1-series coupe,model_128i coupe,model_135i,model_135i convertible,model_15,model_15 crew cab,model_150 reg cab,model_1500,model_1500 4wd,model_1500 4x4,model_1500 4x4 big horn,model_1500 avalanche,model_1500 big horn,model_1500 big horn 4x4,model_1500 big horn 4x4 crew,model_1500 bighorn,model_1500 classic,model_1500 classic big horn,model_1500 conversion van,model_1500 crew cab,model_1500 crew cab 4x4,model_1500 crew cab 4x4 limited,model_1500 crew cab laramie,model_1500 crew cab limited 4x4,model_1500 crew cab lt,model_1500 crew cab v8 4x4,model_1500 crew outdoorsman,model_1500 excab,model_1500 express,model_1500 hemi,model_1500 laramie,model_1500 laramie 4x4,model_1500 laramie 5.7l hemi,model_1500 laramie longhorn,model_1500 lone star,model_1500 long bed truck,model_1500 longhorn,model_1500 lt express van,model_1500 mega cab,model_1500 outdoorsman 4x4,model_1500 quad cab,model_1500 quad cab 2004,model_1500 quad cab 4x4,model_1500 quad cab slt,model_1500 quad hemi big hor,model_1500 quadcab,model_1500 rebel,model_1500 reg cab,model_1500 regular cab 4x4,model_1500 sierra,model_1500 sierra gt,model_1500 sierra sle crew cab,model_1500 sierra slt,model_1500 silverado,model_1500 silverado ltz z71,model_1500 slt,model_1500 slt 4wd truck low mi d,model_1500 slt 4x4,model_1500 slt bighorn 4x4,model_1500 slt hemi,model_1500 slt hemi 5.7l v8,model_1500 slt mega cab,model_1500 slt rwd,model_1500 slt slt 4dr quad,model_1500 sport,model_1500 sport 4x4,model_1500 sport 4x4 short box,model_1500 st,model_1500 st quad cab 4x4,model_1500 std cab short bed,model_1500 suburban,model_1500 tradesman,model_1500 tradesman 4wd,model_1500 truck,model_1500 van,model_1500 yukon denali xl,model_1500 yukon slt,model_1500 yukon xl,model_1600,model_2,model_2 series 228i xdrive,model_2-series,model_2.5i sport wagon awd,model_200,model_200 4dr sdn limited,model_200 limit,model_200 limited,model_200 lx,model_200 s,model_200 touring,model_200-series,model_2000 jetta vr6 5sp stick,model_200c,model_200s,model_230i,model_240 sedan,model_240 sx,model_240 wagon,model_240sx,model_240sx s13,model_245 gl wagon,model_250,model_2500,"model_2500 4wd lonestar, diesel, low miles!!",model_2500 4x4,model_2500 4x4 cummins,model_2500 4x4 gas,model_2500 4x4 laramie diesel,model_2500 4x4 long bed,model_2500 4x4 megacab,model_2500 4x4 truck,model_2500 big horn,model_2500 big horn 2500,model_2500 big horn 4x4,model_2500 bighorn crew cab,model_2500 bighorn slt,model_2500 cc slt 4x4,model_2500 crew 4x4 dmax,model_2500 crewcab,model_2500 cummins,model_2500 diesel,model_2500 diesel 4x4,model_2500 hd,model_2500 hd 4x4 short bed,model_2500 hd crew cab,model_2500 hd crew cab ltz,model_2500 laramie,model_2500 laramie 4x4,model_2500 laramie cummins,model_2500 laramie diesel,model_2500 laramie diesel t,model_2500 laramie longhorn,model_2500 laramie slt,model_2500 lone star edition,model_2500 lt crew 4x4 dmax,model_2500 mega cab,model_2500 mega cab limited long,model_2500 megacab laramie,model_2500 outdoorsman,model_2500 quad cab,model_2500 quad cab slt,model_2500 savana cargo,model_2500 silverado,model_2500 slt,model_2500 slt 4x4,model_2500 slt 4x4 diesel,model_2500 slt crew cummins,"model_2500 slt hemi long bed, clean, low miles!!!",model_2500 slt single cab,model_2500 st,model_2500 st 4d 8’,model_2500 st diesel 4x4,model_2500 suburban,model_2500 super crew,model_2500 tradesman,model_2500 tradesman crew cab 4x4,model_2500 van,model_2500 van lwb,model_2500hd,model_2500hd 4x4,model_2500hd crew cab,model_2500hd crew cab 4x4,model_2500hd denali duramax diesel,model_2500hd duramax,model_2500hd ext cab 4x4,model_2500hd sierra denali duramax,model_2500hd slt 2wd,model_280zx,model_280zx turbo,model_3,model_3 hatchback,model_3 hatchback gt,model_3 i sport,model_3 s,model_3 s touring,model_3 sedan,model_3 series,model_3 series 328i,model_3 series 328i 328 i coupe,model_3 series 328i cabriolet,model_3 series 328i convertible,model_3 series 328i coupe,model_3 series 328i sedan,model_3 series 328i xdrive,model_3 series 328ix,model_3 series 328xi,model_3 series 330i,model_3 series x drive,model_3 speed turbo,model_3 sport,model_3-series,model_3-series 325i sedan,model_3.0 cl,model_3.2 tl,model_3.2tl,...,model_x3 xdrive30i,model_x3 xdrive35i,model_x5,model_x5 3.0,model_x5 3.0i,model_x5 3.0si awd,model_x5 4.8i,model_x5 4.8is,model_x5 sdrive35i,model_x5 xdrive30d,model_x5 xdrive35d,model_x5 xdrive35d awd,model_x5 xdrive35d diesel,model_x5 xdrive35i,model_x5 xdrive35i premium,model_x5 xdrive35i sport activity,model_x6,model_x6 m,model_xc 70 awd,model_xc 90,model_xc 90 turbo new,model_xc-90,model_xc60,model_xc60 3.2,model_xc60 awd,model_xc60 t6 awd,model_xc70,model_xc70 2.5t,model_xc70 awd,model_xc70 cross country awd,model_xc70 t6 awd,model_xc90,model_xc90 2.ol turbo 4wd,model_xc90 3.2,model_xc90 awd,model_xc90 fwd 3 row,model_xc90 t6 awd,model_xc90 v8 sport,model_xf,model_xf awd,model_xf premium,model_xf supercharged,model_xg350,model_xg350l,model_xj,model_xj40 vanden plas,model_xj6,model_xj6 series iii,model_xj6-l,model_xj8,model_xj8 vanden plas,model_xj8lv8-290hp,model_xjl,model_xjl portfolio,model_xjr supercharged,model_xjs,model_xjs convertible,model_xjs v12 convertible,model_xk convertible,model_xk8,model_xke 1.5 series,model_xkr convertible,model_xkr coupe,model_xlr,model_xlt 4x4,model_xlt f-350,model_xt5,model_xterra,"model_xterra ""s""",model_xterra 2wd,model_xterra 4wd,model_xterra 4x4,model_xterra off road,model_xterra s,model_xterra se,model_xterra se 4x4,model_xterra x,model_xterra x sport,model_xterra xe,model_xtrail,model_xts,model_xts 3.6l clean,model_xts base,model_xts luxury,model_xts luxury collection,model_xts4,model_xv crosstrek,model_xv crosstrek 2.0i limited,model_xv crosstrek 2.0i premium,model_xv crosstrek hybrid,model_xv crosstrek limited,model_yaris,model_yaris 4d hatchback,model_yaris hatchback,model_yaris ia,model_yaris sedan,model_yucon xl k1500 slt,model_yukon,model_yukon 1500 slt,model_yukon 4x4,model_yukon delali,model_yukon denali,model_yukon denali 4dr grey,model_yukon denali 4x4,model_yukon denali awd,model_yukon denali xl,model_yukon denali xl awd,model_yukon hybrid,model_yukon sl sl 4dr suv,model_yukon sle,model_yukon sle 1500,model_yukon sle 4wd,model_yukon slt,model_yukon slt 1500,model_yukon slt 4wd,model_yukon slt 4x4,model_yukon xl,model_yukon xl 1500 slt,model_yukon xl 4wd,model_yukon xl denali,model_yukon xl denali awd,model_yukon xl slt,model_yukon xl slt 1500,model_yukon xl slt 2x4,model_yukon xl slt 4wd,model_z,model_z-71 4 x 4 1500 truck,model_z28 camaro convertible,model_z3,model_z3 2.3,model_z3 convertible,model_z3 coupe,model_z3 m roadster,model_z3 roadster,model_z4,model_z4 2.5i,model_z4 3.0 6 speed,model_z4 3.0i,model_z4 3.0isi convertible,model_z4 roadster,model_z71,model_z71 avalanche,model_zephyr,model_zephyr z-7,model_zq-8 ext cab sportside,model_zypher,model_￼ accord vp￼ ￼,condition_fair,condition_good,condition_like new,condition_new,condition_salvage,condition_unknown,cylinders_12 cylinders,cylinders_3 cylinders,cylinders_4 cylinders,cylinders_5 cylinders,cylinders_6 cylinders,cylinders_8 cylinders,cylinders_other,fuel_electric,fuel_gas,fuel_hybrid,fuel_other,title_status_lien,title_status_missing,title_status_parts only,title_status_rebuilt,title_status_salvage,transmission_manual,transmission_other,drive_fwd,drive_rwd,size_full-size,size_mid-size,size_sub-compact,type_bus,type_convertible,type_coupe,type_hatchback,type_mini-van,type_offroad,type_other,type_pickup,type_sedan,type_truck,type_van,type_wagon,paint_color_blue,paint_color_brown,paint_color_custom,paint_color_green,paint_color_grey,paint_color_orange,paint_color_purple,paint_color_red,paint_color_silver,paint_color_unkown,paint_color_white,paint_color_yellow,state_al,state_ar,state_az,state_ca,state_co,state_ct,state_dc,state_de,state_fl,state_ga,state_hi,state_ia,state_id,state_il,state_in,state_ks,state_ky,state_la,state_ma,state_md,state_me,state_mi,state_mn,state_mo,state_ms,state_mt,state_nc,state_nd,state_ne,state_nh,state_nj,state_nm,state_nv,state_ny,state_oh,state_ok,state_or,state_pa,state_ri,state_sc,state_sd,state_tn,state_tx,state_ut,state_va,state_vt,state_wa,state_wi,state_wv,state_wy
0,-0.401944,0.875755,-0.016215,-0.09461,-0.1583,-0.11124,-0.125242,-0.434437,-0.126856,-0.011465,-0.182407,0.0,-0.036277,1.993615,-0.230706,-0.014042,-0.238655,-0.156767,-0.085331,-0.054461,-0.217609,-0.132873,-0.011465,-0.107236,-0.08051,-0.112744,-0.149346,-0.066501,-0.064477,-0.067981,-0.233066,-0.098765,-0.229281,-0.057416,-0.064989,-0.160468,-0.01813,-0.298899,-0.15163,-0.081333,0.0,0.0,-0.014042,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.12739,-0.008107,-0.04216,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.008107,0.0,0.0,-0.008107,-0.01813,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.01813,-0.008107,-0.026896,0.0,-0.01813,-0.008107,-0.014042,0.0,-0.008107,-0.008107,0.0,-0.016215,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.014042,0.0,0.0,-0.014042,-0.037174,-0.008107,-0.008107,-0.01813,-0.01813,-0.011465,-0.024327,0.0,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.102758,-0.008107,-0.034413,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.021453,-0.008107,0.0,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.016215,-0.008107,-0.014042,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,0.0,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.044445,-0.011465,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.01813,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,0.0,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.037174,0.0,-0.008107,-0.011465,-0.016215,...,-0.008107,-0.011465,-0.04216,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.021453,0.0,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.008107,-0.014042,0.0,-0.008107,-0.008107,-0.024327,0.0,-0.011465,0.0,-0.008107,-0.021453,-0.008107,-0.008107,-0.01813,-0.008107,-0.008107,-0.008107,-0.021453,-0.008107,-0.014042,-0.008107,-0.011465,-0.014042,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.014042,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.014042,-0.031412,0.0,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.011465,-0.008107,0.0,-0.008107,0.0,0.0,-0.01813,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.016215,-0.016215,-0.008107,-0.008107,-0.008107,-0.033443,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.056253,-0.011465,-0.008107,-0.008107,-0.035358,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.025644,-0.008107,-0.008107,-0.011465,-0.036277,-0.014042,-0.011465,-0.01813,-0.008107,-0.019861,-0.008107,-0.008107,-0.011465,0.0,0.0,0.0,-0.016215,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.011465,-0.011465,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,0.0,0.0,6.470301,-0.663876,-0.359434,-0.046619,-0.034413,-0.293254,-0.021453,-0.041371,-0.668502,-0.096709,-0.720182,1.430967,-0.04937,-0.036277,0.31994,-0.10078,-0.034413,-0.115112,-0.022935,-0.016215,-0.195103,-0.120558,-0.266064,-0.074056,-0.730628,-0.492451,0.892868,-0.657718,-0.125782,-0.033443,-0.150492,-0.221228,-0.192181,-0.146329,-0.05069,-0.060776,2.745775,-0.579247,-0.462978,-0.188474,-0.144917,-0.335071,-0.172355,-0.178132,-0.171545,-0.361412,-0.070848,-0.052609,-0.329217,-0.408264,-0.18922,1.800629,-0.082148,-0.131587,-0.105339,-0.150492,-0.288068,-0.167234,-0.108173,-0.073606,-0.05799,-0.258624,-0.119714,-0.057416,-0.146563,-0.13236,-0.152309,-0.142296,-0.134148,-0.13236,-0.096362,-0.157426,-0.080096,12.113541,-0.215614,-0.1583,-0.115697,-0.085331,-0.130028,-0.17872,-0.056253,-0.077134,-0.07224,-0.128979,-0.087263,-0.080096,-0.247749,-0.210217,-0.123059,-0.172759,-0.190706,-0.08494,-0.124699,-0.069429,-0.172961,-0.255428,-0.042935,-0.165146,-0.102103,-0.111844,-0.208336,-0.057416,-0.052609
1,0.515841,0.430204,-0.016215,-0.09461,-0.1583,-0.11124,-0.125242,-0.434437,-0.126856,-0.011465,-0.182407,0.0,-0.036277,1.993615,-0.230706,-0.014042,-0.238655,-0.156767,-0.085331,-0.054461,-0.217609,-0.132873,-0.011465,-0.107236,-0.08051,-0.112744,-0.149346,-0.066501,-0.064477,-0.067981,-0.233066,-0.098765,-0.229281,-0.057416,-0.064989,-0.160468,-0.01813,-0.298899,-0.15163,-0.081333,0.0,0.0,-0.014042,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.12739,-0.008107,-0.04216,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.008107,0.0,0.0,-0.008107,-0.01813,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.01813,-0.008107,-0.026896,0.0,-0.01813,-0.008107,-0.014042,0.0,-0.008107,-0.008107,0.0,-0.016215,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.014042,0.0,0.0,-0.014042,-0.037174,-0.008107,-0.008107,-0.01813,-0.01813,-0.011465,-0.024327,0.0,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.102758,-0.008107,-0.034413,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.021453,-0.008107,0.0,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.016215,-0.008107,-0.014042,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,0.0,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.044445,-0.011465,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.01813,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,0.0,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.037174,0.0,-0.008107,-0.011465,-0.016215,...,-0.008107,-0.011465,-0.04216,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.021453,0.0,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.008107,-0.014042,0.0,-0.008107,-0.008107,-0.024327,0.0,-0.011465,0.0,-0.008107,-0.021453,-0.008107,-0.008107,-0.01813,-0.008107,-0.008107,-0.008107,-0.021453,-0.008107,-0.014042,-0.008107,-0.011465,-0.014042,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.014042,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.014042,-0.031412,0.0,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.011465,-0.008107,0.0,-0.008107,0.0,0.0,-0.01813,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.016215,-0.016215,-0.008107,-0.008107,-0.008107,-0.033443,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.056253,-0.011465,-0.008107,-0.008107,-0.035358,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.025644,-0.008107,-0.008107,-0.011465,-0.036277,-0.014042,-0.011465,-0.01813,-0.008107,-0.019861,-0.008107,-0.008107,-0.011465,0.0,0.0,0.0,-0.016215,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.011465,-0.011465,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,0.0,0.0,-0.154552,-0.663876,-0.359434,-0.046619,-0.034413,-0.293254,-0.021453,-0.041371,-0.668502,-0.096709,1.388537,-0.698828,-0.04937,-0.036277,0.31994,-0.10078,-0.034413,-0.115112,-0.022935,-0.016215,-0.195103,-0.120558,-0.266064,-0.074056,-0.730628,-0.492451,0.892868,-0.657718,-0.125782,-0.033443,-0.150492,-0.221228,-0.192181,-0.146329,-0.05069,-0.060776,2.745775,-0.579247,-0.462978,-0.188474,-0.144917,-0.335071,-0.172355,-0.178132,-0.171545,-0.361412,-0.070848,-0.052609,-0.329217,-0.408264,-0.18922,1.800629,-0.082148,-0.131587,-0.105339,-0.150492,-0.288068,-0.167234,-0.108173,-0.073606,-0.05799,-0.258624,-0.119714,-0.057416,-0.146563,-0.13236,-0.152309,-0.142296,-0.134148,-0.13236,-0.096362,-0.157426,-0.080096,-0.082552,-0.215614,-0.1583,-0.115697,-0.085331,-0.130028,-0.17872,-0.056253,-0.077134,-0.07224,-0.128979,-0.087263,-0.080096,-0.247749,-0.210217,-0.123059,5.788408,-0.190706,-0.08494,-0.124699,-0.069429,-0.172961,-0.255428,-0.042935,-0.165146,-0.102103,-0.111844,-0.208336,-0.057416,-0.052609
2,1.302513,-2.824672,-0.016215,-0.09461,-0.1583,-0.11124,-0.125242,2.301831,-0.126856,-0.011465,-0.182407,0.0,-0.036277,-0.501601,-0.230706,-0.014042,-0.238655,-0.156767,-0.085331,-0.054461,-0.217609,-0.132873,-0.011465,-0.107236,-0.08051,-0.112744,-0.149346,-0.066501,-0.064477,-0.067981,-0.233066,-0.098765,-0.229281,-0.057416,-0.064989,-0.160468,-0.01813,-0.298899,-0.15163,-0.081333,0.0,0.0,-0.014042,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.12739,-0.008107,-0.04216,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.008107,0.0,0.0,-0.008107,-0.01813,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.01813,-0.008107,-0.026896,0.0,-0.01813,-0.008107,-0.014042,0.0,-0.008107,-0.008107,0.0,-0.016215,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.014042,0.0,0.0,-0.014042,-0.037174,-0.008107,-0.008107,-0.01813,-0.01813,-0.011465,-0.024327,0.0,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.102758,-0.008107,-0.034413,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.021453,-0.008107,0.0,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.016215,-0.008107,-0.014042,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,0.0,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.044445,-0.011465,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.01813,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,0.0,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.037174,0.0,-0.008107,-0.011465,-0.016215,...,-0.008107,-0.011465,-0.04216,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.021453,0.0,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.008107,-0.014042,0.0,-0.008107,-0.008107,-0.024327,0.0,-0.011465,0.0,-0.008107,-0.021453,-0.008107,-0.008107,-0.01813,-0.008107,-0.008107,-0.008107,-0.021453,-0.008107,-0.014042,-0.008107,-0.011465,-0.014042,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.014042,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.014042,-0.031412,0.0,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.011465,-0.008107,0.0,-0.008107,0.0,0.0,-0.01813,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.016215,-0.016215,-0.008107,-0.008107,-0.008107,-0.033443,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.056253,-0.011465,-0.008107,-0.008107,-0.035358,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.025644,-0.008107,-0.008107,-0.011465,-0.036277,-0.014042,-0.011465,-0.01813,-0.008107,-0.019861,-0.008107,-0.008107,-0.011465,0.0,0.0,0.0,-0.016215,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.011465,-0.011465,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,0.0,0.0,-0.154552,1.506306,-0.359434,-0.046619,-0.034413,-0.293254,-0.021453,-0.041371,-0.668502,-0.096709,-0.720182,1.430967,-0.04937,-0.036277,0.31994,-0.10078,-0.034413,-0.115112,-0.022935,-0.016215,-0.195103,-0.120558,-0.266064,-0.074056,-0.730628,2.030657,-1.119987,1.520409,-0.125782,-0.033443,-0.150492,-0.221228,-0.192181,-0.146329,-0.05069,-0.060776,-0.364196,-0.579247,-0.462978,5.305784,-0.144917,-0.335071,-0.172355,-0.178132,-0.171545,-0.361412,-0.070848,-0.052609,-0.329217,-0.408264,-0.18922,1.800629,-0.082148,-0.131587,-0.105339,-0.150492,-0.288068,-0.167234,-0.108173,-0.073606,-0.05799,-0.258624,-0.119714,-0.057416,-0.146563,-0.13236,-0.152309,-0.142296,-0.134148,-0.13236,-0.096362,-0.157426,-0.080096,-0.082552,-0.215614,-0.1583,-0.115697,-0.085331,-0.130028,-0.17872,-0.056253,-0.077134,-0.07224,-0.128979,-0.087263,-0.080096,4.036341,-0.210217,-0.123059,-0.172759,-0.190706,-0.08494,-0.124699,-0.069429,-0.172961,-0.255428,-0.042935,-0.165146,-0.102103,-0.111844,-0.208336,-0.057416,-0.052609
3,1.302513,-2.29957,-0.016215,-0.09461,-0.1583,-0.11124,-0.125242,2.301831,-0.126856,-0.011465,-0.182407,0.0,-0.036277,-0.501601,-0.230706,-0.014042,-0.238655,-0.156767,-0.085331,-0.054461,-0.217609,-0.132873,-0.011465,-0.107236,-0.08051,-0.112744,-0.149346,-0.066501,-0.064477,-0.067981,-0.233066,-0.098765,-0.229281,-0.057416,-0.064989,-0.160468,-0.01813,-0.298899,-0.15163,-0.081333,0.0,0.0,-0.014042,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.12739,-0.008107,-0.04216,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.008107,0.0,0.0,-0.008107,-0.01813,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.01813,-0.008107,-0.026896,0.0,-0.01813,-0.008107,-0.014042,0.0,-0.008107,-0.008107,0.0,-0.016215,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.014042,0.0,0.0,-0.014042,-0.037174,-0.008107,-0.008107,-0.01813,-0.01813,-0.011465,-0.024327,0.0,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.102758,-0.008107,-0.034413,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.021453,-0.008107,0.0,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.016215,-0.008107,-0.014042,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,0.0,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.044445,-0.011465,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.01813,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,0.0,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.037174,0.0,-0.008107,-0.011465,-0.016215,...,-0.008107,-0.011465,-0.04216,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.021453,0.0,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.008107,-0.014042,0.0,-0.008107,-0.008107,-0.024327,0.0,-0.011465,0.0,-0.008107,-0.021453,-0.008107,-0.008107,-0.01813,-0.008107,-0.008107,-0.008107,-0.021453,-0.008107,-0.014042,-0.008107,-0.011465,-0.014042,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.014042,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.014042,-0.031412,0.0,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.011465,-0.008107,0.0,-0.008107,0.0,0.0,-0.01813,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.016215,-0.016215,-0.008107,-0.008107,-0.008107,-0.033443,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.056253,-0.011465,-0.008107,-0.008107,-0.035358,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.025644,-0.008107,-0.008107,-0.011465,-0.036277,-0.014042,-0.011465,-0.01813,-0.008107,-0.019861,-0.008107,-0.008107,-0.011465,0.0,0.0,0.0,-0.016215,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.011465,-0.011465,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,0.0,0.0,-0.154552,-0.663876,-0.359434,-0.046619,-0.034413,3.410017,-0.021453,-0.041371,-0.668502,-0.096709,1.388537,-0.698828,-0.04937,-0.036277,0.31994,-0.10078,-0.034413,-0.115112,-0.022935,-0.016215,-0.195103,-0.120558,-0.266064,-0.074056,1.368686,-0.492451,-1.119987,1.520409,-0.125782,-0.033443,-0.150492,-0.221228,-0.192181,-0.146329,-0.05069,-0.060776,-0.364196,1.726378,-0.462978,-0.188474,-0.144917,-0.335071,-0.172355,-0.178132,-0.171545,2.766922,-0.070848,-0.052609,-0.329217,-0.408264,-0.18922,-0.555362,-0.082148,-0.131587,-0.105339,-0.150492,-0.288068,-0.167234,-0.108173,-0.073606,-0.05799,-0.258624,-0.119714,-0.057416,-0.146563,-0.13236,-0.152309,-0.142296,-0.134148,-0.13236,-0.096362,-0.157426,-0.080096,-0.082552,-0.215614,-0.1583,-0.115697,-0.085331,-0.130028,-0.17872,-0.056253,-0.077134,-0.07224,-0.128979,-0.087263,-0.080096,-0.247749,-0.210217,8.126207,-0.172759,-0.190706,-0.08494,-0.124699,-0.069429,-0.172961,-0.255428,-0.042935,-0.165146,-0.102103,-0.111844,-0.208336,-0.057416,-0.052609
4,0.909177,-1.058292,-0.016215,-0.09461,-0.1583,-0.11124,-0.125242,-0.434437,-0.126856,-0.011465,-0.182407,0.0,-0.036277,-0.501601,-0.230706,-0.014042,4.190145,-0.156767,-0.085331,-0.054461,-0.217609,-0.132873,-0.011465,-0.107236,-0.08051,-0.112744,-0.149346,-0.066501,-0.064477,-0.067981,-0.233066,-0.098765,-0.229281,-0.057416,-0.064989,-0.160468,-0.01813,-0.298899,-0.15163,-0.081333,0.0,0.0,-0.014042,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.12739,-0.008107,-0.04216,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.008107,0.0,0.0,-0.008107,-0.01813,-0.014042,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.01813,-0.008107,-0.026896,0.0,-0.01813,-0.008107,-0.014042,0.0,-0.008107,-0.008107,0.0,-0.016215,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.011465,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,0.0,0.0,-0.014042,0.0,0.0,-0.014042,-0.037174,-0.008107,-0.008107,-0.01813,-0.01813,-0.011465,-0.024327,0.0,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,0.0,-0.102758,-0.008107,-0.034413,0.0,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.021453,-0.008107,0.0,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.008107,-0.016215,-0.008107,-0.014042,-0.008107,-0.016215,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,0.0,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,0.0,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.044445,-0.011465,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.01813,-0.011465,-0.008107,-0.008107,-0.008107,-0.011465,0.0,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.037174,0.0,-0.008107,-0.011465,-0.016215,...,-0.008107,-0.011465,-0.04216,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,0.0,-0.008107,-0.014042,-0.008107,0.0,-0.021453,0.0,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.008107,-0.014042,0.0,-0.008107,-0.008107,-0.024327,0.0,-0.011465,0.0,-0.008107,-0.021453,-0.008107,-0.008107,-0.01813,-0.008107,-0.008107,-0.008107,-0.021453,-0.008107,-0.014042,-0.008107,-0.011465,-0.014042,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,-0.014042,-0.008107,0.0,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,0.0,-0.008107,-0.008107,-0.014042,-0.031412,0.0,-0.008107,-0.008107,-0.016215,-0.008107,0.0,-0.011465,-0.008107,0.0,-0.008107,0.0,0.0,-0.01813,-0.008107,0.0,-0.014042,-0.008107,-0.008107,-0.016215,-0.016215,-0.008107,-0.008107,-0.008107,-0.033443,-0.008107,-0.008107,-0.008107,-0.016215,-0.008107,-0.056253,-0.011465,-0.008107,-0.008107,-0.035358,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.008107,0.0,-0.008107,-0.008107,-0.008107,-0.025644,-0.008107,-0.008107,-0.011465,-0.036277,-0.014042,-0.011465,-0.01813,-0.008107,-0.019861,-0.008107,-0.008107,-0.011465,0.0,0.0,0.0,-0.016215,-0.008107,-0.008107,-0.008107,-0.011465,-0.008107,-0.011465,-0.011465,-0.008107,-0.011465,-0.008107,-0.008107,-0.008107,-0.008107,-0.014042,-0.008107,-0.008107,0.0,0.0,-0.154552,-0.663876,2.782154,-0.046619,-0.034413,-0.293254,-0.021453,-0.041371,-0.668502,-0.096709,1.388537,-0.698828,-0.04937,-0.036277,0.31994,-0.10078,-0.034413,-0.115112,-0.022935,-0.016215,-0.195103,-0.120558,-0.266064,-0.074056,-0.730628,-0.492451,0.892868,-0.657718,-0.125782,-0.033443,-0.150492,-0.221228,-0.192181,-0.146329,-0.05069,-0.060776,-0.364196,-0.579247,2.159931,-0.188474,-0.144917,-0.335071,-0.172355,-0.178132,-0.171545,-0.361412,-0.070848,-0.052609,-0.329217,-0.408264,-0.18922,-0.555362,-0.082148,-0.131587,-0.105339,-0.150492,-0.288068,-0.167234,-0.108173,-0.073606,-0.05799,-0.258624,-0.119714,-0.057416,-0.146563,-0.13236,-0.152309,7.027618,-0.134148,-0.13236,-0.096362,-0.157426,-0.080096,-0.082552,-0.215614,-0.1583,-0.115697,-0.085331,-0.130028,-0.17872,-0.056253,-0.077134,-0.07224,-0.128979,-0.087263,-0.080096,-0.247749,-0.210217,-0.123059,-0.172759,-0.190706,-0.08494,-0.124699,-0.069429,-0.172961,-0.255428,-0.042935,-0.165146,-0.102103,-0.111844,-0.208336,-0.057416,-0.052609


In [18]:
lasMod = linear_model.Lasso(normalize= True)
lasMod = lasMod.set_params(random_state = 28, tol = 0.0001)
lasMod.fit(X_train,y_train) 

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000, normalize=True,
      positive=False, precompute=False, random_state=28, selection='cyclic',
      tol=0.0001, warm_start=False)

In [19]:
lasso_pred = lasMod.predict(X_train_scaled)
printErrors(y_train,lasso_pred)

Mean Absolute Error (MAE): $ 6849.49
Mean Squared Error (MSE): 0.5996604049518053
Root Mean Square Error (RMSE): 0.7743774305542519
Root Mean Square Log Error (RMSLE): 0.07732621029810119
Mean Absolute Percent Error (MAPE): 77.41 %.


In [37]:
alphaRange = 100**np.linspace(10,-2,1000)*0.5
param_grid_lasso = {"max_iter": [500, 1000, 1500],
                     "alpha": alphaRange}

In [40]:
def rmse_cv(model):
    kf = KFold(n_splits = 5, shuffle=True, random_state=42).get_n_splits(X_train_scaled)
    rmse= np.sqrt(-cross_val_score(model, X_train_scaled, y_train, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

In [41]:
rmse_cv(lasMod)

array([0.7648667 , 0.78611985, 0.78092966, 0.75748503, 0.78235214])

In [42]:
grid_search_lasso = ms.GridSearchCV(lasMod, param_grid_lasso, scoring= 'neg_mean_squared_error', 
                                    cv= kfo, n_jobs=-1, return_train_score = True)

In [None]:
%time grid_search_lasso.fit(X_train_scaled, y_train)

In [None]:
grid_search_lasso.best_params_

In [None]:
grid_lasso = grid_search_lasso.best_estimator_

In [None]:
print("The train set R^2 is: %.5f" % grid_lasso.score(X_train_scaled, y_train))
print("The test set R^2 is is: %.5f" % grid_lasso.score(X_test_scaled, y_test))

In [20]:
# Elastic Net
eNet = linear_model.ElasticNet(normalize=True)
eNet = eNet.set_params(random_state = 42)

#fit baseline elastic net model on Train df
eNet.fit(X_train_scaled,y_train)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=True, positive=False, precompute=False,
           random_state=42, selection='cyclic', tol=0.0001, warm_start=False)

In [21]:
eNet_pred = eNet.predict(X_test_scaled)
printErrors(y_test,eNet_pred)

Mean Absolute Error (MAE): $ 6929.05
Mean Squared Error (MSE): 0.5974246542353362
Root Mean Square Error (RMSE): 0.7729325030268401
Root Mean Square Log Error (RMSLE): 0.07701917033008812
Mean Absolute Percent Error (MAPE): 76.59 %.


In [22]:
rforest = RandomForestRegressor()
rforest.set_params(random_state=52,n_estimators=10,)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=52, verbose=0, warm_start=False)

In [23]:
#fit model on train data set
rforest.fit(X_train_scaled, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=52, verbose=0, warm_start=False)

In [24]:
#find R2 of model on train and test data sets
print("The train set R^2 is: %.5f" % rforest.score(X_train_scaled, y_train))
print("The test set R^2 is is: %.5f" % rforest.score(X_test_scaled, y_test))

The train set R^2 is: 0.96382
The test set R^2 is is: 0.79700


In [25]:
rforest_pred = rforest.predict(X_test_scaled)
printErrors(y_test, rforest_pred)

Mean Absolute Error (MAE): $ 2553.41
Mean Squared Error (MSE): 0.12127145746860918
Root Mean Square Error (RMSE): 0.3482405166958738
Root Mean Square Log Error (RMSLE): 0.03533063582144988
Mean Absolute Percent Error (MAPE): 25.93 %.


In [26]:
#initialize xgboost
xgb = XGBRegressor()
xgb.set_params(random_state=22,objective='reg:squarederror')

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:squarederror',
             random_state=22, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [27]:
xgb.fit(X_train_scaled, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:squarederror',
             random_state=22, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [35]:
print("Train R^2 is: %.5f" % xgb.score(X_train_scaled, y_train))
print("Test set R^2 is is: %.5f" % xgb.score(X_test_scaled, y_test))

Train R^2 is: 0.80201
Test set R^2 is is: 0.79354


In [36]:
xgb_pred = xgb.predict(X_test_scaled)
printErrors(y_test, xgb_pred)

Mean Absolute Error (MAE): $ 2765.84
Mean Squared Error (MSE): 0.12334021596824704
Root Mean Square Error (RMSE): 0.3511982573536592
Root Mean Square Log Error (RMSLE): 0.03555276618721775
Mean Absolute Percent Error (MAPE): 27.15 %.


In [None]:
alphaRange = 100**np.linspace(10,-2,1000)*0.5
param_grid_lasso = {"max_iter": [500, 1000, 1500],
                     "alpha": alphaRange}

In [None]:
def rmse_cv(model):
    kf = KFold(n_splits = 5, shuffle=True, random_state=42).get_n_splits(X_train)
    rmse= np.sqrt(-cross_val_score(model, X_train, y_train, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

In [None]:
rmse_cv(lasMod)

In [None]:
grid_search_lasso = ms.GridSearchCV(lasMod, param_grid_lasso, scoring= 'neg_mean_squared_error', 
                                    cv= kfo, n_jobs=-1, return_train_score = True)

In [None]:
%time grid_search_lasso.fit(X_train, y_train)

In [None]:
grid_search_lasso.best_params_

In [None]:
grid_lasso = grid_search_lasso.best_estimator_

In [None]:
print("The train set R^2 is: %.5f" % grid_lasso.score(X_train, y_train))
print("The test set R^2 is is: %.5f" % grid_lasso.score(X_test, y_test))

In [None]:
#Prediction with tuned hyperparameters
grid_lasso_pred = grid_lasso.predict(X_test)
printErrors(y_test,grid_lasso_pred)

In [None]:
printMod(X_train,X_test,y_train,y_test,grid_lasso,"Lasso")

In [None]:
X_train.columns

In [None]:
coeff = pd.DataFrame({"feature":X_train.columns,"coeff":grid_lasso.coef_})
coeff = coeff.sort_values(by = 'coeff', ascending = False)

print(coeff[coeff.coeff!=0].shape)
mod_=coeff[coeff.coeff!=0]
mod_