# Forest Fires ML Regression Project

In this notebook we will use tree based regression models and split the data into training and testing sets. for more details about the dataset and the previous models see [Notebook-01-Linear-Regression](http://localhost:8888/notebooks/Notebook-01-Linear-Regression.ipynb)

In [1]:
# importing frameworks and libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.feature_selection import RFE
from sklearn.model_selection import cross_val_score

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.svm import LinearSVR
from sklearn.svm import SVR

# importing tools and functions
import os
import IPython.display
import scoringfn

# setting notebook for visualization
%matplotlib inline
plt.rcParams['figure.figsize'] = (7, 5)
plt.rcParams['font.size'] = 12

In [2]:
# importing the processed version of the data from notebook 1
data = pd.read_csv('Data/forestfires_processed.csv')
data.drop(columns = ['Unnamed: 0'], inplace = True)
display(data.head())

Unnamed: 0,X,Y,sin(month),sin(day),log(FFMC),DMC,log(DC),log(ISI),temp,log(RH),wind,log(rain),log(area)
0,7,5,0.14112,-0.958924,4.468204,26.2,4.55703,1.808289,8.2,3.951244,6.7,0.0,0.0
1,7,4,-0.544021,0.909297,4.517431,35.4,6.507427,2.04122,18.0,3.526361,0.9,0.0,0.0
2,7,4,-0.544021,-0.279415,4.517431,43.7,6.533643,2.04122,14.6,3.526361,1.3,0.0,0.0
3,8,6,0.14112,-0.958924,4.529368,33.3,4.363099,2.302585,8.3,4.584967,4.0,0.182322,0.0
4,8,6,0.14112,0.656987,4.503137,51.3,4.636669,2.360854,11.4,4.60517,1.8,0.0,0.0


In [3]:
class Evaluate_Model():
    def __init__(self, model, data):
        self.model = model
        self.data = data
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            data.drop(['log(area)'], axis = 1), data['log(area)'], random_state = 42)
    def cross_validate_model(self, cv):
        scores = cross_val_score(model, self.data.drop(columns = ['log(area)']), self.data['log(area)'], cv = cv)
        print(f'max R2 score: {np.max(scores): .6f}')
        print(f'mean R2 score: {np.mean(scores): .6f}')
        print(f'min R2 score: {np.min(scores): .6f}')
    def evaluate_model(self):
        self.model.fit(self.X_train, self.y_train)
        train_preds = model.predict(self.X_train)
        test_preds = model.predict(self.X_test)
        print(f'R2 score on training set: {r2_score(self.y_train, train_preds): .6f}')
        print(f'R2 score on  testing set: {r2_score(self.y_test, test_preds): .6f}')

In [4]:
models = [LinearRegression(), Ridge(), Lasso(), ElasticNet()]
models += [DecisionTreeRegressor(), ExtraTreeRegressor(), RandomForestRegressor(), ExtraTreesRegressor()]

print('Cross Validating Models\n')
for model in models:
    print(type(model).__name__)
    modeling = Evaluate_Model(model, data)
    modeling.cross_validate_model(10)
    print()

Cross Validating Models

LinearRegression
max R2 score:  0.015761
mean R2 score: -1.453119
min R2 score: -10.373110

Ridge
max R2 score:  0.012892
mean R2 score: -1.415300
min R2 score: -10.209018

Lasso
max R2 score:  0.005888
mean R2 score: -1.178479
min R2 score: -8.452951

ElasticNet
max R2 score:  0.006450
mean R2 score: -1.172052
min R2 score: -8.365306

DecisionTreeRegressor
max R2 score:  0.000000
mean R2 score: -5.249194
min R2 score: -33.624174

ExtraTreeRegressor
max R2 score:  0.000000
mean R2 score: -3.798175
min R2 score: -20.051178

RandomForestRegressor
max R2 score:  0.000000
mean R2 score: -1.769958
min R2 score: -11.589711

ExtraTreesRegressor
max R2 score:  0.000000
mean R2 score: -2.049878
min R2 score: -14.232350



In [5]:
print('Evaluating Models\n')
for model in models:
    print(type(model).__name__)
    modeling = Evaluate_Model(model, data)
    modeling.evaluate_model()
    print()

Evaluating Models

LinearRegression
R2 score on training set:  0.026702
R2 score on  testing set:  0.005866

Ridge
R2 score on training set:  0.026502
R2 score on  testing set:  0.004903

Lasso
R2 score on training set:  0.010500
R2 score on  testing set: -0.016328

ElasticNet
R2 score on training set:  0.010590
R2 score on  testing set: -0.017659

DecisionTreeRegressor
R2 score on training set:  0.996112
R2 score on  testing set: -0.996052

ExtraTreeRegressor
R2 score on training set:  0.996112
R2 score on  testing set: -1.001492

RandomForestRegressor
R2 score on training set:  0.839581
R2 score on  testing set: -0.057614

ExtraTreesRegressor
R2 score on training set:  0.996112
R2 score on  testing set: -0.200016



In [6]:
# is this step we are using an additional feature, that is the index of the data. we find out huge advancemtn in the results.
data = pd.read_csv('Data/forestfires_processed.csv')
display(data.head())

Unnamed: 0.1,Unnamed: 0,X,Y,sin(month),sin(day),log(FFMC),DMC,log(DC),log(ISI),temp,log(RH),wind,log(rain),log(area)
0,0,7,5,0.14112,-0.958924,4.468204,26.2,4.55703,1.808289,8.2,3.951244,6.7,0.0,0.0
1,1,7,4,-0.544021,0.909297,4.517431,35.4,6.507427,2.04122,18.0,3.526361,0.9,0.0,0.0
2,2,7,4,-0.544021,-0.279415,4.517431,43.7,6.533643,2.04122,14.6,3.526361,1.3,0.0,0.0
3,3,8,6,0.14112,-0.958924,4.529368,33.3,4.363099,2.302585,8.3,4.584967,4.0,0.182322,0.0
4,4,8,6,0.14112,0.656987,4.503137,51.3,4.636669,2.360854,11.4,4.60517,1.8,0.0,0.0


In [7]:
print('Cross Validating Models\n')
for model in models:
    print(type(model).__name__)
    modeling = Evaluate_Model(model, data)
    modeling.cross_validate_model(10)
    print()

Cross Validating Models

LinearRegression
max R2 score:  0.073571
mean R2 score: -0.975446
min R2 score: -4.714631

Ridge
max R2 score:  0.067887
mean R2 score: -0.959834
min R2 score: -4.619326

Lasso
max R2 score:  0.000000
mean R2 score: -0.864428
min R2 score: -4.070984

ElasticNet
max R2 score:  0.000000
mean R2 score: -0.866471
min R2 score: -4.077369

DecisionTreeRegressor
max R2 score:  1.000000
mean R2 score: -0.619799
min R2 score: -2.448874

ExtraTreeRegressor
max R2 score:  0.046663
mean R2 score: -2.744140
min R2 score: -7.983624

RandomForestRegressor
max R2 score:  1.000000
mean R2 score: -0.121629
min R2 score: -0.785968

ExtraTreesRegressor
max R2 score:  0.765834
mean R2 score: -0.134656
min R2 score: -0.637278



In [8]:
print('Evaluating Models\n')
for model in models:
    print(type(model).__name__)
    modeling = Evaluate_Model(model, data)
    modeling.evaluate_model()
    print()

Evaluating Models

LinearRegression
R2 score on training set:  0.105297
R2 score on  testing set:  0.143035

Ridge
R2 score on training set:  0.104917
R2 score on  testing set:  0.141892

Lasso
R2 score on training set:  0.080623
R2 score on  testing set:  0.090320

ElasticNet
R2 score on training set:  0.080641
R2 score on  testing set:  0.090384

DecisionTreeRegressor
R2 score on training set:  1.000000
R2 score on  testing set: -0.103551

ExtraTreeRegressor
R2 score on training set:  1.000000
R2 score on  testing set: -0.234173

RandomForestRegressor
R2 score on training set:  0.912210
R2 score on  testing set:  0.502020

ExtraTreesRegressor
R2 score on training set:  1.000000
R2 score on  testing set:  0.499519



# End of Tree Regression