In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("Cleaned_Algerian_forest_fires_dataset.csv")
df.head()

In [None]:
df["Classes"].value_counts()

In [None]:
df=df.drop(["day","month","year"],axis=1)

In [None]:
df.head()

In [None]:
x=df.drop("FWI",axis=1)
y=df["FWI"]

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)


In [None]:
## check for multicollinearity
plt.figure(figsize=(10,7))
sns.heatmap(x.corr(),annot=True)
plt.show()


In [None]:
## make function to detect features with high correlation
def correlation(dataset,threshold):
    col_corr=set() # set of all the names of correlated columns
    corr_matrix=dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i,j])>threshold:
                colname=corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

In [None]:
corr_features=correlation(x_train,0.85)

In [None]:
print("correlated features:",corr_features)

In [None]:
x_train.drop(corr_features,axis=1,inplace=True)
x_test.drop(corr_features,axis=1,inplace=True)
x_train.shape, x_test.shape

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)

In [None]:
## plot a box plots to seen the effect of scaling
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
sns.boxplot(data=x_train)
plt.title("Before Scaling")
plt.subplot(1,2,2)
sns.boxplot(data=x_train_scaled)
plt.title("After Scaling")
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
linreg=LinearRegression()
linreg.fit(x_train_scaled,y_train)
y_pred=linreg.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
plt.scatter(y_test,y_pred)
plt.show()


In [None]:
from sklearn.linear_model import Lasso
lasso=Lasso()
lasso.fit(x_train_scaled,y_train)
y_pred=lasso.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
print("Mean Squared error", mse)
plt.scatter(y_test,y_pred)
plt.show()


In [None]:
from sklearn.linear_model import Ridge
ridge=Ridge()
ridge.fit(x_train_scaled,y_train)
y_pred=ridge.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
print("Mean Squared error", mse)
plt.scatter(y_test,y_pred)
plt.show()

In [None]:
from sklearn.linear_model import ElasticNet
elasticnet=ElasticNet()
elasticnet.fit(x_train_scaled,y_train)
y_pred=elasticnet.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
print("Mean Squared error", mse)
plt.scatter(y_test,y_pred)
plt.show()

## Cross Validation

In [None]:
from sklearn.linear_model import LassoCV
lassocv=LassoCV(cv=5)
lassocv.fit(x_train_scaled,y_train)
y_pred=lassocv.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
print("Mean Squared error", mse)
plt.scatter(y_test,y_pred)
plt.show()

In [None]:
lassocv.alpha_  # best alpha value

In [None]:
from sklearn.linear_model import RidgeCV
ridgecv=RidgeCV(cv=5)
ridgecv.fit(x_train_scaled,y_train)
y_pred=ridgecv.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
print("Mean Squared error", mse)
plt.scatter(y_test,y_pred)
plt.show()

In [None]:
ridgecv.alpha_  # best alpha value

In [None]:
ridgecv.get_params()

In [None]:
## apply elastic net cv algo
from sklearn.linear_model import ElasticNetCV
elasticnetcv=ElasticNetCV(cv=10)
elasticnetcv.fit(x_train_scaled,y_train)
y_pred=elasticnetcv.predict(x_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
print("Mean Squared error", mse)
plt.scatter(y_test,y_pred)
plt.show()

In [None]:
elasticnetcv.alpha_  # best alpha value
elasticnetcv.alphas_
elasticnetcv.l1_ratio_


In [None]:
elasticnetcv.get_params()

In [None]:
import pickle
## save the model
pickle.dump(ridgecv,open("model.pkl","wb"))

In [None]:
pickle.dump(scaler,open("scaler.pkl","wb"))

In [None]:
df.columns
