In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns

In [19]:
df = pd.read_csv('../dataset/Algerian_forest_fires_dataset_clean.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  243 non-null    float64
 1   RH           243 non-null    float64
 2   Ws           243 non-null    float64
 3   Rain         243 non-null    float64
 4   FFMC         243 non-null    float64
 5   DMC          243 non-null    float64
 6   DC           243 non-null    float64
 7   ISI          243 non-null    float64
 8   BUI          243 non-null    float64
 9   FWI          243 non-null    float64
 10  Classes      243 non-null    int64  
 11  Region       243 non-null    int64  
dtypes: float64(10), int64(2)
memory usage: 22.9 KB


In [20]:
import sklearn 
from sklearn.model_selection import train_test_split


In [21]:
x = df.iloc[:,:-3]
x['Region'] = df['Region']
x['Classes'] = df['Classes']

y = df['FWI']
x.shape,y.shape

((243, 11), (243,))

In [22]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

In [23]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [24]:
def correlation(dataset,threshold):
    col_corr = set()
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i,j]) > threshold:
                colname = corr_matrix.columns[i]
                col_corr.add(colname)
    
    return col_corr

In [25]:
corr_features = correlation(x_train,0.85)

In [26]:
x_train.drop(corr_features,axis=1,inplace=True)
x_test.drop(corr_features,axis=1,inplace=True)
x_train.shape,x_test.shape

((182, 9), (61, 9))

In [27]:
x_train

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,ISI,Region,Classes
97,29.0,74.0,19.0,0.1,75.8,3.6,2.1,0,0
141,31.0,72.0,14.0,0.2,60.2,3.8,0.8,1,0
192,40.0,31.0,15.0,0.0,94.2,22.5,16.6,1,1
68,32.0,60.0,18.0,0.3,77.1,11.3,2.2,0,0
119,32.0,47.0,14.0,0.7,77.5,7.1,1.8,0,0
...,...,...,...,...,...,...,...,...,...
106,24.0,82.0,15.0,0.4,44.9,0.9,0.2,0,0
14,28.0,80.0,17.0,3.1,49.4,3.0,0.4,0,0
92,25.0,76.0,17.0,7.2,46.0,1.3,0.2,0,0
179,34.0,59.0,16.0,0.0,88.1,19.5,7.4,1,1


In [28]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

### Linear Regression Model 

In [29]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,r2_score

linreg = LinearRegression()
linreg.fit(x_train_scaled,y_train)
y_pred = linreg.predict(x_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)

print("Mean Absolute error",mae)
print("R2 score",score)

Mean Absolute error 0.5468236465249976
R2 score 0.9847657384266951


### Lasso Regression 

In [30]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error,r2_score

lasso = Lasso()
lasso.fit(x_train_scaled,y_train)
y_pred = lasso.predict(x_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)

print("Mean Absolute error",mae)
print("R2 score",score)

Mean Absolute error 1.133175994914409
R2 score 0.9492020263112388


In [31]:
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error,r2_score

ElasticNetee = ElasticNet()
ElasticNetee.fit(x_train_scaled,y_train)
y_pred = ElasticNetee.predict(x_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)

print("Mean Absolute error",mae)
print("R2 score",score)

Mean Absolute error 1.8822353634896
R2 score 0.8753460589519703


In [32]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error,r2_score

ridgeee = Ridge()
ridgeee.fit(x_train_scaled,y_train)
y_pred = ridgeee.predict(x_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)

print("Mean Absolute error",mae)
print("R2 score",score)

Mean Absolute error 0.564230534010569
R2 score 0.9842993364555513


In [33]:
import pickle
pickle.dump(scaler,open('scaler.pkl','wb'))
pickle.dump(ridgeee,open('ridge.pkl','wb'))