In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

Data Preparation

In [3]:
df=pd.read_csv('./Algerian_forest_fires_cleaned.csv')

In [5]:
# encoding the values
df['Classes']=np.where(df['Classes'].str.contains("not fire"),0,1)

In [6]:
## Independent And dependent features
X=df.drop('FWI',axis=1)
y=df['FWI']

In [8]:
#Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [9]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

Model Training

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
import joblib



# Split the data into training and testing sets
X_train = X_train_scaled
X_test = X_test_scaled

# Initialize regression models
linear_model = LinearRegression()
lasso_model = Lasso(alpha=1.0)
ridge_model = Ridge(alpha=1.0)
elasticnet_model = ElasticNet(alpha=1.0, l1_ratio=0.5)

# Model Training and Evaluation
models = {'Linear Regression': linear_model, 'Lasso Regression': lasso_model, 'Ridge Regression': ridge_model, 'ElasticNet Regression': elasticnet_model}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Model: {model_name}")
    print(f"Mean Squared Error: {mse}")
    print(f"R-squared: {r2}")
    print()

# Model Comparison
mse_scores = {model_name: mean_squared_error(y_test, model.predict(X_test)) for model_name, model in models.items()}
best_model = min(mse_scores, key=mse_scores.get)
print(f"The best model is: {best_model} with MSE: {mse_scores[best_model]}")

Model: Linear Regression
Mean Squared Error: 0.47478920218370896
R-squared: 0.9892728563308907

Model: Lasso Regression
Mean Squared Error: 2.0000583653869723
R-squared: 0.9548117073146738

Model: Ridge Regression
Mean Squared Error: 0.5711397138119281
R-squared: 0.9870959622986035

Model: ElasticNet Regression
Mean Squared Error: 4.396625230301124
R-squared: 0.9006649049983613

The best model is: Linear Regression with MSE: 0.47478920218370896


Saving the model paramters

In [15]:
# Pickle the best model
best_model_instance = models[best_model]
joblib.dump(best_model_instance, 'best_forest_fire_model.pkl')

['best_forest_fire_model.pkl']

In [16]:
best_model_instance