In [None]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import pickle


df = pd.read_csv("/content/Algerian_forest_fires_cleaned.csv")


df.columns = ['day', 'month', 'year', 'Temperature', 'RH', 'Ws', 'Rain',
              'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes', 'Region']

# 3. Data Cleaning and Preprocessing

df.drop(columns=['day', 'month', 'year', 'Classes', 'Region'], inplace=True)

# Convert data to numeric and drop missing values
df = df.apply(pd.to_numeric, errors='coerce')
df.dropna(inplace=True)

# 4. Data Visualization
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

sns.pairplot(df[['Temperature', 'RH', 'Ws', 'Rain', 'FWI']])
plt.suptitle("Pairplot of Key Features", y=1.02)
plt.show()

# 5. Feature and Target Splitting
X = df.drop(columns=['FWI'])
y = df['FWI']

# 6. Train-Test Split and Scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 7. Model Training and Evaluation
models = {
    'LinearRegression': LinearRegression(),
    'Ridge': Ridge(alpha=1.0),
    'Lasso': Lasso(alpha=0.1),
    'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5)
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    print(f"\n{name} Results:")
    print("R2 Score:", r2_score(y_test, preds))
    print("RMSE:", np.sqrt(mean_squared_error(y_test, preds)))

# 8. Cross-Validation and Hyperparameter Tuning
ridge_params = {'alpha': [0.01, 0.1, 1, 10, 100]}
ridge_grid = GridSearchCV(Ridge(), ridge_params, cv=5, scoring='r2')
ridge_grid.fit(X_train_scaled, y_train)

print("\nBest Ridge Alpha:", ridge_grid.best_params_)
print("Best Cross-Validated Score:", ridge_grid.best_score_)

# 9. Save Best Model with Pickle
with open("best_ridge_model.pkl", "wb") as f:
    pickle.dump(ridge_grid.best_estimator_, f)

# 10. Load Model and Predict on Unseen Data
with open("best_ridge_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

preds_loaded = loaded_model.predict(X_test_scaled)
print("\nLoaded Model R2 Score:", r2_score(y_test, preds_loaded))