# Forest Fire Damage Prediction

In [1]:
#import libraries
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neural_network import MLPRegressor, MLPClassifier

import warnings
warnings.filterwarnings(action='ignore')

In [3]:
#opening the dataset
data = pd.read_csv('forestfires_updated.csv')
data.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


# Preprocessing

In [5]:
def ordinal_encode(df, column, ordering):
    df = df.copy()
    df[column] = df[column].apply(lambda x: ordering.index(x))
    return df

In [6]:
def preprocess_inputs(df, task='regression'):
    df = df.copy()
    
    # Ordinal encode month and day columns
    df = ordinal_encode(
        df,
        column='month',
        ordering=[
            'jan',
            'feb',
            'mar',
            'apr',
            'may',
            'jun',
            'jul',
            'aug',
            'sep',
            'oct',
            'nov',
            'dec'
        ]
    )
    df = ordinal_encode(
        df,
        column='day',
        ordering=[
            'sun',
            'mon',
            'tue',
            'wed',
            'thu',
            'fri',
            'sat'
        ]
    )
    
    # Split df into X and y
    if task == 'regression':
        y = df['area']
    elif task == 'classification':
        y = df['area'].apply(lambda x: 1 if x > 0 else 0)
        
    X = df.drop('area', axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale feature data (X)
    scaler = StandardScaler()
    scaler.fit(X_train)
    
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)
    
    return X_train, X_test, y_train, y_test

# Regression

In [8]:
X_train, X_test, y_train, y_test = preprocess_inputs(data, task='regression')

#trained dataset
X_train

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain
0,0.609497,0.549280,0.653317,-0.970161,0.060985,0.279029,0.572473,-0.406018,0.401972,-0.078036,-0.989687,-0.093528
1,-1.116216,-1.839756,-1.924250,-1.442804,-0.195757,-0.895350,-1.761748,0.137308,-2.243125,0.993349,1.363841,-0.093528
2,-0.253360,0.549280,0.223722,0.447767,-0.179711,2.460689,1.039077,-0.698578,-0.250931,0.678236,-0.473059,-0.093528
3,-0.253360,-0.247066,-1.924250,-0.497519,-0.388314,-1.295139,-1.899971,-1.074727,-0.803388,-0.015014,-0.702671,-0.093528
4,0.178069,-0.247066,0.223722,0.447767,0.686796,1.776676,0.620811,1.035885,0.234561,-0.078036,-0.702671,-0.093528
...,...,...,...,...,...,...,...,...,...,...,...,...
356,-1.116216,0.549280,1.082911,-1.442804,0.333774,-0.970311,0.593645,-0.029870,-0.585754,-0.519195,-1.735927,-0.093528
357,-1.116216,0.549280,0.223722,-0.024876,0.799121,-0.136376,-0.119444,0.889605,0.820500,-0.708263,0.847213,-0.093528
358,0.178069,-0.247066,-1.924250,0.920409,0.189357,-1.176452,-1.860422,0.011925,-0.552272,-1.149422,1.363841,-0.093528
359,1.472353,1.345625,0.223722,-1.442804,0.141217,0.527336,0.232507,0.346279,0.117373,-0.141059,1.076825,-0.093528


In [9]:
y_train

13       0.00
61       0.00
453      0.00
39       0.00
373      0.00
        ...  
129      0.00
144      0.77
72       0.00
235    196.48
37       0.00
Name: area, Length: 361, dtype: float64

In [10]:
lin_reg_model = LinearRegression()
lin_reg_model.fit(X_train, y_train)

#Linear regression
print("Linear Regression R^2: {:.5f}".format(lin_reg_model.score(X_test, y_test)))

Linear Regression R^2: 0.02156


In [11]:
nn_reg_model = MLPRegressor(hidden_layer_sizes=(16, 16))
nn_reg_model.fit(X_train, y_train)

#NN Regression
print("NN Regression R^2: {:.5f}".format(nn_reg_model.score(X_test, y_test)))

NN Regression R^2: 0.05209


# Classification

In [12]:
#train the data for classification
X_train, X_test, y_train, y_test = preprocess_inputs(data, task='classification')

In [13]:
#trained data
X_train

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain
0,0.609497,0.549280,0.653317,-0.970161,0.060985,0.279029,0.572473,-0.406018,0.401972,-0.078036,-0.989687,-0.093528
1,-1.116216,-1.839756,-1.924250,-1.442804,-0.195757,-0.895350,-1.761748,0.137308,-2.243125,0.993349,1.363841,-0.093528
2,-0.253360,0.549280,0.223722,0.447767,-0.179711,2.460689,1.039077,-0.698578,-0.250931,0.678236,-0.473059,-0.093528
3,-0.253360,-0.247066,-1.924250,-0.497519,-0.388314,-1.295139,-1.899971,-1.074727,-0.803388,-0.015014,-0.702671,-0.093528
4,0.178069,-0.247066,0.223722,0.447767,0.686796,1.776676,0.620811,1.035885,0.234561,-0.078036,-0.702671,-0.093528
...,...,...,...,...,...,...,...,...,...,...,...,...
356,-1.116216,0.549280,1.082911,-1.442804,0.333774,-0.970311,0.593645,-0.029870,-0.585754,-0.519195,-1.735927,-0.093528
357,-1.116216,0.549280,0.223722,-0.024876,0.799121,-0.136376,-0.119444,0.889605,0.820500,-0.708263,0.847213,-0.093528
358,0.178069,-0.247066,-1.924250,0.920409,0.189357,-1.176452,-1.860422,0.011925,-0.552272,-1.149422,1.363841,-0.093528
359,1.472353,1.345625,0.223722,-1.442804,0.141217,0.527336,0.232507,0.346279,0.117373,-0.141059,1.076825,-0.093528


In [14]:
y_train

13     0
61     0
453    0
39     0
373    0
      ..
129    0
144    1
72     0
235    1
37     0
Name: area, Length: 361, dtype: int64

In [15]:
lin_clf_model = LogisticRegression()
lin_clf_model.fit(X_train, y_train)

#Linear Classification Accuracy
print("Linear Classification Accuracy: {:.2f}%".format(lin_clf_model.score(X_test, y_test) * 100))

Linear Classification Accuracy: 50.64%


In [16]:
nn_clf_model = MLPClassifier(hidden_layer_sizes=(16, 16))
nn_clf_model.fit(X_train, y_train)

#NN Classification Accuracy
print("NN Classification Accuracy: {:.2f}%".format(nn_clf_model.score(X_test, y_test) * 100))

NN Classification Accuracy: 51.28%
