In [None]:
1. Load and Explore Data
df = pd.read_csv("amazon.csv")
print(df.head())
print(df.columns)
print(df.info())
print(df.describe())

2. Target Variable: Assume area > 0 means 'Severe' wildfire
df['Severe'] = (df['area'] > 0).astype(int)

3. Optional EDA
sns.histplot(df['area'], bins=50, kde=True)
plt.title("Burned Area Distribution")
plt.show()

sns.countplot(x='Severe', data=df)
plt.title("Severe vs Non-Severe Fires")
plt.show()

4. Preprocessing: Encode categorical (month/day) if present
if 'month' in df.columns:
df = pd.get_dummies(df, columns=['month'], drop_first=True)
if 'day' in df.columns:
df = pd.get_dummies(df, columns=['day'], drop_first=True)

5. Prepare features
X = df.drop(columns=['area', 'Severe'])
y = df['Severe']

6. Train/Test Split & Scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

7. Baseline Model - Logistic Regression
start_lr = time.time()
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_scaled, y_train)
end_lr = time.time()

y_pred_lr = logreg.predict(X_test_scaled)
y_prob_lr = logreg.predict_proba(X_test_scaled)[:, 1]

8. Parallel Model - Random Forest
start_rf = time.time()
rf = RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=42)
rf.fit(X_train_scaled, y_train)
end_rf = time.time()

y_pred_rf = rf.predict(X_test_scaled)
y_prob_rf = rf.predict_proba(X_test_scaled)[:, 1]

9. Evaluation
print("Logistic Regression Report")
print(classification_report(y_test, y_pred_lr))

print("Random Forest Report")
print(classification_report(y_test, y_pred_rf))

10. Confusion Matrices
ConfusionMatrixDisplay.from_estimator(logreg, X_test_scaled, y_test)
plt.title("Logistic Regression Confusion Matrix")
plt.show()

ConfusionMatrixDisplay.from_estimator(rf, X_test_scaled, y_test)
plt.title("Random Forest Confusion Matrix")
plt.show()

11. ROC Curves
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_prob_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_prob_rf)
auc_lr = roc_auc_score(y_test, y_prob_lr)
auc_rf = roc_auc_score(y_test, y_prob_rf)

plt.plot(fpr_lr, tpr_lr, label=f"Logistic Regression (AUC={auc_lr:.2f})")
plt.plot(fpr_rf, tpr_rf, label=f"Random Forest (AUC={auc_rf:.2f})")
plt.plot([0,1],[0,1],'k--')
plt.title("ROC Curve Comparison")
plt.legend()
plt.show()

12. Feature Importance (Random Forest only)
importances = pd.Series(rf.feature_importances_, index=X.columns)
importances.sort_values(ascending=False).plot(kind='barh', figsize=(10,6))
plt.title("Random Forest Feature Importances")
plt.show()

13. Model Comparison Table
metrics = pd.DataFrame({
'Model': ['Logistic Regression', 'Random Forest'],
'Accuracy': [accuracy_score(y_test, y_pred_lr), accuracy_score(y_test, y_pred_rf)],
'Precision': [precision_score(y_test, y_pred_lr), precision_score(y_test, y_pred_rf)],
'Recall': [recall_score(y_test, y_pred_lr), recall_score(y_test, y_pred_rf)],
'F1 Score': [f1_score(y_test, y_pred_lr), f1_score(y_test, y_pred_rf)],
'AUC': [auc_lr, auc_rf],
'Training Time (s)': [end_lr - start_lr, end_rf - start_rf]
})

print(metrics)

Next steps:

I’ll now create your report structure aligned with the 6 assessment questions using your actual dataset (amazon.csv).

Do you want me to include a full .ipynb and report template (.docx or .pdf)?

If yes, would you like your name and ID embedded in them?

Let me know and I’ll package everything for download.








You said:
give me the whole code


ChatGPT said:
Here is the complete Python code for your ML-based real-time wildfire severity classification system using your amazon.csv dataset. This includes:

Data loading & preprocessing

Exploratory Data Analysis (EDA)

Feature engineering

Train/test split

Scaling

Logistic Regression (baseline)

Parallel Random Forest

Evaluation (accuracy, precision, recall, F1-score, AUC)

Confusion matrix

ROC curve

Feature importances

Timing comparison

You can run this in Jupyter Notebook (.ipynb) or convert it to a .py script.

🔥 Complete Code: Wildfire Severity Prediction using Parallel ML

Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
classification_report, ConfusionMatrixDisplay,
roc_curve, roc_auc_score
)

1. Load Data
df = pd.read_csv("amazon.csv")
print("Data Preview:")
print(df.head())
print("\nColumns:")
print(df.columns)

2. Basic Preprocessing
df.dropna(inplace=True)

Create binary target: Severe if area > 0
df['Severe'] = (df['area'] > 0).astype(int)

Optional: Drop 'area' after deriving label
X = df.drop(columns=['area', 'Severe'])
y = df['Severe']

Encode categorical features if any
if 'month' in X.columns:
X = pd.get_dummies(X, columns=['month'], drop_first=True)
if 'day' in X.columns:
X = pd.get_dummies(X, columns=['day'], drop_first=True)

3. EDA Plots
plt.figure(figsize=(6,4))
sns.countplot(x='Severe', data=df)
plt.title("Fire Severity Distribution")
plt.xlabel("Severe (1 = Yes, 0 = No)")
plt.ylabel("Count")
plt.show()

plt.figure(figsize=(8,4))
sns.histplot(df['area'], bins=50, kde=True)
plt.title("Area Burned Distribution")
plt.xlabel("Area")
plt.ylabel("Frequency")
plt.show()

4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=42
)

5. Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

6. Logistic Regression (Baseline)
start_lr = time.time()
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_scaled, y_train)
end_lr = time.time()

y_pred_lr = logreg.predict(X_test_scaled)
y_prob_lr = logreg.predict_proba(X_test_scaled)[:, 1]

7. Parallel Random Forest
start_rf = time.time()
rf = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
rf.fit(X_train_scaled, y_train)
end_rf = time.time()

y_pred_rf = rf.predict(X_test_scaled)
y_prob_rf = rf.predict_proba(X_test_scaled)[:, 1]

8. Evaluation Reports
print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, y_pred_lr))

print("\nRandom Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))

9. Confusion Matrices
ConfusionMatrixDisplay.from_estimator(logreg, X_test_scaled, y_test)
plt.title("Logistic Regression - Confusion Matrix")
plt.show()

ConfusionMatrixDisplay.from_estimator(rf, X_test_scaled, y_test)
plt.title("Random Forest - Confusion Matrix")
plt.show()

10. ROC Curve
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_prob_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_prob_rf)
auc_lr = roc_auc_score(y_test, y_prob_lr)
auc_rf = roc_auc_score(y_test, y_prob_rf)

plt.figure(figsize=(8,6))
plt.plot(fpr_lr, tpr_lr, label=f"Logistic Regression AUC = {auc_lr:.2f}")
plt.plot(fpr_rf, tpr_rf, label=f"Random Forest AUC = {auc_rf:.2f}")
plt.plot([0,1],[0,1],'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Comparison")
plt.legend()
plt.grid()
plt.show()

11. Feature Importance (RF)
importances = pd.Series(rf.feature_importances_, index=X.columns)
importances.sort_values(ascending=False).plot(kind='barh', figsize=(10,6))
plt.title("Random Forest - Feature Importances")
plt.xlabel("Importance")
plt.show()

12. Model Metrics & Training Time
metrics_df = pd.DataFrame({
"Model": ["Logistic Regression", "Random Forest"],
"Accuracy": [accuracy_score(y_test, y_pred_lr), accuracy_score(y_test, y_pred_rf)],
"Precision": [precision_score(y_test, y_pred_lr), precision_score(y_test, y_pred_rf)],
"Recall": [recall_score(y_test, y_pred_lr), recall_score(y_test, y_pred_rf)],
"F1 Score": [f1_score(y_test, y_pred_lr), f1_score(y_test, y_pred_rf)],
"AUC": [auc_lr, auc_rf],
"Training Time (s)": [end_lr - start_lr, end_rf - start_rf]
})

print("\nModel Comparison Summary:")
print(metrics_df)