In [1]:
#Importing libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RepeatedKFold

In [2]:
#Importing and viewing Data
data_path = "./data/"
data = pd.read_csv(data_path + "forestfires.csv", delimiter = ',')
data.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [3]:
data.shape

(517, 13)

In [4]:
data.dtypes

X          int64
Y          int64
month     object
day       object
FFMC     float64
DMC      float64
DC       float64
ISI      float64
temp     float64
RH         int64
wind     float64
rain     float64
area     float64
dtype: object

In [5]:
#Encoding Categorical Data
data.month.replace(('jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'),(1,2,3,4,5,6,7,8,9,10,11,12), inplace=True)
data.day.replace(('mon','tue','wed','thu','fri','sat','sun'),(1,2,3,4,5,6,7), inplace=True)

X = data.iloc[:, 0:12].values
y = data.iloc[:, 12].values

array([[ 7. ,  5. ,  3. , ..., 51. ,  6.7,  0. ],
       [ 7. ,  4. , 10. , ..., 33. ,  0.9,  0. ],
       [ 7. ,  4. , 10. , ..., 33. ,  1.3,  0. ],
       ...,
       [ 7. ,  4. ,  8. , ..., 70. ,  6.7,  0. ],
       [ 1. ,  4. ,  8. , ..., 42. ,  4. ,  0. ],
       [ 6. ,  3. , 11. , ..., 31. ,  4.5,  0. ]])

In [6]:
sc = StandardScaler()
X = sc.fit_transform(X)

# Train the model  
ridgeR = Ridge()
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = dict()
grid['alpha'] = arange(0, 1, 0.01)
search = GridSearchCV(model, grid, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
scores = cross_val_score(ridgeR, X, y, scoring="neg_mean_squared_error", cv=10)
rmse_ridge = np.sqrt(-scores)
rmse_ridge

array([ 18.76694494,  14.348571  ,  14.61076237,  10.64319301,
       161.64891464,  18.32219609,  13.23235817,  31.14829684,
       105.32056793,  44.05806137])

In [7]:
print("Scores:", scores)
print("Mean:", scores.mean())
print("Standard deviation:", scores.std())

Scores: [  -352.19822254   -205.88148963   -213.47437701   -113.27755746
 -26130.37160333   -335.70286974   -175.09530276   -970.21639585
 -11092.42202812  -1941.11277133]
Mean: -4152.975261776771
Standard deviation: 7990.266194857569


In [8]:


lassoR = Lasso(alpha = 1)
scores = cross_val_score(lassoR, X, y, scoring = "neg_mean_squared_error", cv=10)
rmse_lasso = np.sqrt(-scores)
rmse_lasso

array([ 17.70412289,  13.65773264,  13.742667  ,   9.79033008,
       161.6419245 ,  15.68180125,  13.90736129,  29.47764868,
       105.65526937,  42.81711758])

In [9]:
print("Scores:", scores)
print("Mean:", scores.mean())
print("Standard deviation:", scores.std())

Scores: [  -313.43596721   -186.53366074   -188.86089628    -95.85056301
 -26128.111755     -245.91889045   -193.41469812   -868.93177145
 -11163.03594662  -1833.30555769]
Mean: -4121.739970656264
Standard deviation: 8011.217268855091


In [10]:
elasticR = ElasticNet(alpha = 1)
scores = cross_val_score(elasticR, X, y, scoring = "neg_mean_squared_error", cv=10)
rmse_elastic = np.sqrt(-scores)
rmse_elastic

array([ 16.45617995,  13.25882451,  13.62181683,   9.36060618,
       161.66085264,  14.76327556,  13.71501122,  28.75943497,
       105.69927954,  42.40366543])

In [11]:
print("Scores:", scores)
print("Mean:", scores.mean())
print("Standard deviation:", scores.std())

Scores: [  -270.80585869   -175.79642739   -185.55389379    -87.62094803
 -26134.23127682   -217.95430539   -188.10153273   -827.10509974
 -11172.33769526  -1798.07084164]
Mean: -4105.757787947068
Standard deviation: 8021.178838630566
