In [1]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
from google.colab import auth # Google Colab credentials use karne ke liye

# --- GEE Authentication (Colab Credentials use karke) ---
print("--- Step 1: GEE Authentication Shuru ---")

try:
    # 1. Colab's built-in authentication flow (Iss naye mail se login hoga)
    print("Iss naye mail se login karein aur access dein (Google prompts).")
    auth.authenticate_user()

    # 2. Initialize Earth Engine (GEE ab naye credentials use karega)
    ee.Initialize()
    print("\n✅ GEE Authentication Successful aur Initialized.")

except Exception as e:
    print(f"\n❌ GEE Initialization Failed. Error: {e}")
    print("Agar authentication window mein problem ho, toh Colab session ko disconnect karke phir se try karein.")
    raise

--- Step 1: GEE Authentication Shuru ---
Iss naye mail se login karein aur access dein (Google prompts).

❌ GEE Initialization Failed. Error: ee.Initialize: no project found. Call with project= or see http://goo.gle/ee-auth.
Agar authentication window mein problem ho, toh Colab session ko disconnect karke phir se try karein.


EEException: ee.Initialize: no project found. Call with project= or see http://goo.gle/ee-auth.

In [2]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
from google.colab import auth

# --- Step 1: GEE Authentication & Initialization (The Fix) ---
print("--- Step 1: GEE Initialization Fix Shuru ---")

try:
    # 1. Colab Authentication (Credentials are fine from previous run)
    print("Re-authenticating using Colab credentials...")
    auth.authenticate_user()

    # 2. Initialize Earth Engine with a public project ID to avoid 'no project found' error
    # We use 'earthengine-public-projects' as the default project.
    ee.Initialize(project='earthengine-public-projects')

    print("\n✅ GEE Authentication Successful aur Initialization Done.")

except Exception as e:
    print(f"\n❌ CRITICAL ERROR: Initialization Failed. Error: {e}")
    raise

# --- Configuration (Uttarakhand) ---
START_DATE = '2021-01-01'
END_DATE = '2024-01-01'
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
print(f"Set study area to Uttarakhand BBox: {study_area_bbox}")

# --- Step 2: GEE Data Fetching for ML Training ---
print("\n--- Step 2: Fetching Historical Data (GEE) ---")

try:
    # 1. Historical Data Collections
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE)
    active_fires = ee.ImageCollection('MODIS/061/MOD14A2').filterDate(START_DATE, END_DATE)
    dem = ee.Image('USGS/SRTMGL1_003')
    slope = ee.Terrain.slope(dem).rename('slope')

    # 2. Create Feature Stack
    feature_stack = ee.Image.cat([
        active_fires.select('MaxFRP').mean().rename('X_frp'),
        slope,
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned')
    ])

    # 3. Sample Data (5000 random points within Uttarakhand)
    sample_points = ee.FeatureCollection.randomPoints(
        region=study_area,
        points=5000,
        seed=42
    )

    print("Sampling GEE data points (Ismein 2-3 minute lag sakte hain)...")

    # Data extraction task
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points,
        properties=['Y_is_burned'],
        scale=500, # 500m resolution
        tileScale=16
    )

    # GEE data ko Pandas DataFrame mein download karein
    df_raw = geemap.ee_to_pandas(extracted_data)
    df_ml = df_raw.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    print(f"\n✅ GEE Data fetched. Total ML Samples: {len(df_ml)}")

except Exception as e:
    print(f"\n❌ ERROR during GEE data fetching. Error: {e}")
    raise

# --- Step 3: ML Model Training (XGBoost) ---
print("\n--- Step 3: Training XGBoost Model ---")

features = ['X_frp', 'slope']
target = 'Y_is_burned'

X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Final check before training
if Y.nunique() < 2 or len(Y[Y==1]) < 10:
    print(f"⚠️ Warning: Target variable mein kam samples hain ({len(Y[Y==1])}). ML Training skip ki jaa rahi hai.")
else:
    # Split Data and Train
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic', n_estimators=100, max_depth=5,
        use_label_encoder=False, eval_metric='logloss', random_state=42
    )

    xgb_model.fit(X_train, Y_train)

    # Evaluate and Save
    Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
    print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

    joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
    print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 1: GEE Initialization Fix Shuru ---
Re-authenticating using Colab credentials...

❌ CRITICAL ERROR: Initialization Failed. Error: Project 'projects/earthengine-public-projects' not found or deleted.


EEException: Project 'projects/earthengine-public-projects' not found or deleted.

In [5]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
from google.colab import auth

# ⚠️ YAHAN TUMHARI ASLI PROJECT ID DAALNI HAI!
# Yeh Project ID tumne Google Cloud console se copy ki hai
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'

# --- Step 1: GEE Initialization (Authentication and Project Setup) ---
print("--- Step 1: GEE Initialization Shuru ---")

try:
    print("Re-authenticating using Colab credentials...")
    # Colab ke credentials use karke login
    auth.authenticate_user()

    # GEE ko tumhari Project ID se initialize karo
    ee.Initialize(project=MY_GEE_PROJECT_ID)

    print(f"\n✅ GEE Initialized with Project: {MY_GEE_PROJECT_ID}. Data Fetching Shuru!")

except Exception as e:
    print(f"\n❌ CRITICAL ERROR: Initialization Failed. Error: {e}")
    print("Dobara check karein ki Project ID sahi hai, aur GEE API uss ID ke liye enabled hai.")
    raise

# --- Configuration (Uttarakhand) ---
START_DATE = '2021-01-01' # Historical data 3 saal ka
END_DATE = '2024-01-01'
# Uttarakhand Bounding Box
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
print(f"Set study area to Uttarakhand BBox: {study_area_bbox}")

# --- Step 2: GEE Data Fetching for ML Training ---
print("\n--- Step 2: Fetching Historical Data for ML Training ---")

try:
    # 1. Historical Data Collections (MODIS/SRTM)
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE) # Burned Area (Label)
    active_fires = ee.ImageCollection('MODIS/061/MOD14A2').filterDate(START_DATE, END_DATE) # Active Fire (FRP Feature)
    dem = ee.Image('USGS/SRTMGL1_003') # Elevation
    slope = ee.Terrain.slope(dem).rename('slope') # Derived Slope Feature

    # 2. Create Feature Stack (Image mein Features aur Label combine karna)
    feature_stack = ee.Image.cat([
        active_fires.select('MaxFRP').mean().rename('X_frp'), # Feature 1: Fire Radiative Power
        slope,                                                 # Feature 2: Slope
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned') # Label: 1 agar jala, 0 agar nahi jala
    ])

    # 3. Sample Data (5000 random points within Uttarakhand)
    sample_points = ee.FeatureCollection.randomPoints(
        region=study_area, points=5000, seed=42
    )

    print("Sampling GEE data points (2-5 min lag sakte hain - depends on GEE server load)...")

    # Data extraction task
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # GEE data ko Pandas DataFrame mein download karein
    df_raw = geemap.ee_to_pandas(extracted_data)
    df_ml = df_raw.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    print(f"\n✅ GEE Data fetched. Total ML Samples: {len(df_ml)}")

except Exception as e:
    print(f"\n❌ ERROR during GEE data fetching. Error: {e}")
    raise

# --- Step 3: ML Model Training (XGBoost) ---
print("\n--- Step 3: Training XGBoost Model ---")

features = ['X_frp', 'slope']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Final check before training
if Y.nunique() < 2 or len(Y[Y==1]) < 10:
    print(f"⚠️ Warning: Target variable mein kam samples hain ({len(Y[Y==1])}). ML Training skip ki jaa rahi hai.")
else:
    # Split Data and Train
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

    # XGBoost Classifier
    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic', n_estimators=100, max_depth=5,
        use_label_encoder=False, eval_metric='logloss', random_state=42
    )

    xgb_model.fit(X_train, Y_train)

    # Evaluate and Save
    Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
    print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

    # Model ko disk par save karna
    joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
    print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 1: GEE Initialization Shuru ---
Re-authenticating using Colab credentials...





❌ CRITICAL ERROR: Initialization Failed. Error: Project uk-fire-predictor-2025 is not registered to use Earth Engine. Visit https://console.cloud.google.com/earth-engine/configuration?project=uk-fire-predictor-2025 to register your project. See https://developers.google.com/earth-engine/guides/access for more details. If you have previously registered your project for noncommercial use and lost access, you will need to re-verify your eligibility. See the https://developers.google.com/earth-engine/guides/access#configuring_noncommercial_access for more details.
Dobara check karein ki Project ID sahi hai, aur GEE API uss ID ke liye enabled hai.


EEException: Project uk-fire-predictor-2025 is not registered to use Earth Engine. Visit https://console.cloud.google.com/earth-engine/configuration?project=uk-fire-predictor-2025 to register your project. See https://developers.google.com/earth-engine/guides/access for more details. If you have previously registered your project for noncommercial use and lost access, you will need to re-verify your eligibility. See the https://developers.google.com/earth-engine/guides/access#configuring_noncommercial_access for more details.

In [7]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
from google.colab import auth

# ⚠️ YAHAN TUMHARI ASLI PROJECT ID DAALNI HAI!
# Yeh Project ID ab sahi hai
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'

# --- Step 1: GEE Initialization (Authentication and Project Setup) ---
print("--- Step 1: GEE Initialization Shuru ---")

try:
    print("Re-authenticating using Colab credentials...")
    auth.authenticate_user()

    # GEE ko tumhari Project ID se initialize karo
    ee.Initialize(project=MY_GEE_PROJECT_ID)

    print(f"\n✅ GEE Initialized with Project: {MY_GEE_PROJECT_ID}. Data Fetching Shuru!")

except Exception as e:
    print(f"\n❌ CRITICAL ERROR: Initialization Failed. Error: {e}")
    print("Agar yeh error aaye toh GEE API dobara check karein ki enabled hai aur 1 minute wait karein.")
    raise

# --- Configuration (Uttarakhand) ---
START_DATE = '2021-01-01' # Historical data 3 saal ka
END_DATE = '2024-01-01'
# Uttarakhand Bounding Box
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
print(f"Set study area to Uttarakhand BBox: {study_area_bbox}")

# --- Step 2: GEE Data Fetching for ML Training ---
print("\n--- Step 2: Fetching Historical Data for ML Training ---")

try:
    # Historical Data Collections
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE) # Burned Area (Label)
    active_fires = ee.ImageCollection('MODIS/061/MOD14A2').filterDate(START_DATE, END_DATE) # Active Fire (FRP Feature)
    dem = ee.Image('USGS/SRTMGL1_003') # Elevation
    slope = ee.Terrain.slope(dem).rename('slope') # Derived Slope Feature

    # Create Feature Stack (FRP and Slope as features, Burned as label)
    feature_stack = ee.Image.cat([
        active_fires.select('MaxFRP').mean().rename('X_frp'), # Feature 1: Fire Radiative Power
        slope,                                                 # Feature 2: Slope
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned') # Label: 1 agar jala, 0 agar nahi jala
    ])

    # Sample Data (5000 random points within Uttarakhand)
    sample_points = ee.FeatureCollection.randomPoints(
        region=study_area, points=5000, seed=42
    )

    print("Sampling GEE data points (2-5 min lag sakte hain)...")

    # Data extraction task
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # GEE data ko Pandas DataFrame mein download karein
    df_raw = geemap.ee_to_pandas(extracted_data)
    df_ml = df_raw.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    print(f"\n✅ GEE Data fetched. Total ML Samples: {len(df_ml)}")

except Exception as e:
    print(f"\n❌ ERROR during GEE data fetching. Error: {e}")
    raise

# --- Step 3: ML Model Training (XGBoost) ---
print("\n--- Step 3: Training XGBoost Model ---")

features = ['X_frp', 'slope']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Final check before training
if Y.nunique() < 2 or len(Y[Y==1]) < 10:
    print(f"⚠️ Warning: Target variable mein kam samples hain ({len(Y[Y==1])}). ML Training skip ki jaa rahi hai.")
else:
    # Split Data and Train
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

    # XGBoost Classifier
    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic', n_estimators=100, max_depth=5,
        use_label_encoder=False, eval_metric='logloss', random_state=42
    )

    xgb_model.fit(X_train, Y_train)

    # Evaluate and Save
    Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
    print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

    # Model ko disk par save karna
    joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
    print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 1: GEE Initialization Shuru ---
Re-authenticating using Colab credentials...

✅ GEE Initialized with Project: uk-fire-predictor-2025. Data Fetching Shuru!
Set study area to Uttarakhand BBox: [77.5, 29.0, 81.5, 31.5]

--- Step 2: Fetching Historical Data for ML Training ---
Sampling GEE data points (2-5 min lag sakte hain)...

❌ ERROR during GEE data fetching. Error: module 'geemap' has no attribute 'ee_to_pandas'


AttributeError: module 'geemap' has no attribute 'ee_to_pandas'

In [9]:
# Google Drive ko Colab mein mount karna
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
import time

# Configuration
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'
START_DATE = '2021-01-01'
END_DATE = '2024-01-01'
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
EXPORT_FILENAME = 'uttarakhand_fire_data.csv'

# GEE Initialize
try:
    ee.Initialize(project=MY_GEE_PROJECT_ID)
except Exception as e:
    print(f"GEE Initialization failed: {e}")
    raise

# --- Step 2: GEE Data Export to Drive ---
print("--- Step 2: Exporting Historical Data to Google Drive ---")

try:
    # Data Collections
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE)
    active_fires = ee.ImageCollection('MODIS/061/MOD14A2').filterDate(START_DATE, END_DATE)
    dem = ee.Image('USGS/SRTMGL1_003')
    slope = ee.Terrain.slope(dem).rename('slope')

    # Feature Stack
    feature_stack = ee.Image.cat([
        active_fires.select('MaxFRP').mean().rename('X_frp'),
        slope,
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned')
    ])

    # Sampling
    sample_points = ee.FeatureCollection.randomPoints(region=study_area, points=5000, seed=42)
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # 1. Export Task Create karna
    task = ee.batch.Export.table.toDrive(
        collection=extracted_data,
        description=EXPORT_FILENAME,
        folder='ColabFireData', # Drive mein yeh folder banega
        fileNamePrefix=EXPORT_FILENAME[:-4]
    )

    # 2. Task run karna
    task.start()
    print(f"✅ Export task '{EXPORT_FILENAME}' started. Waiting for GEE server...")

    # 3. Completion ka intezaar karna
    while task.active():
        print('.', end='')
        time.sleep(45) # Har 45 second mein status check karo

    if task.status()['state'] != 'COMPLETED':
        raise Exception(f"GEE Export failed with state: {task.status()['state']}")

    print("\n✅ GEE Export Complete. File is in your Google Drive/ColabFireData/.")

    # 4. File ko Drive se Colab mein load karna
    FILE_PATH = f'/content/drive/MyDrive/ColabFireData/{EXPORT_FILENAME}'
    df_ml = pd.read_csv(FILE_PATH)
    df_ml = df_ml.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    print(f"\n✅ Data loaded from Drive. Total ML Samples: {len(df_ml)}")

except Exception as e:
    print(f"\n❌ ERROR: Data Export/Load mein dikkat. Error: {e}")
    raise

# --- Step 3: ML Model Training (XGBoost) ---
print("\n--- Step 3: Training XGBoost Model ---")

features = ['X_frp', 'slope']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Final check before training
if Y.nunique() < 2 or len(Y[Y==1]) < 10:
    print("⚠️ Warning: Kam burned samples. ML Training skip ki jaa rahi hai. Need to collect more data.")
else:
    # Train and Save Model
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic', n_estimators=100, max_depth=5,
        use_label_encoder=False, eval_metric='logloss', random_state=42
    )

    xgb_model.fit(X_train, Y_train)

    Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
    print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

    joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
    print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 2: Exporting Historical Data to Google Drive ---
✅ Export task 'uttarakhand_fire_data.csv' started. Waiting for GEE server...
.
❌ ERROR: Data Export/Load mein dikkat. Error: GEE Export failed with state: FAILED


Exception: GEE Export failed with state: FAILED

In [11]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
import time
from google.colab import auth

# Configuration
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'
START_DATE = '2023-01-01' # TIME PERIOD KAM KIYA
END_DATE = '2024-06-01'   # TIME PERIOD KAM KIYA
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
EXPORT_FILENAME = 'uk_fire_data_reduced.csv' # Naya file name

# GEE Initialize (No need to re-auth, credentials already exist)
try:
    ee.Initialize(project=MY_GEE_PROJECT_ID)
except Exception as e:
    print(f"GEE Initialization failed: {e}")
    raise

# --- Step 2: GEE Data Export to Drive (REDUCED COMPLEXITY) ---
print("--- Step 2: Exporting Historical Data to Google Drive (REDUCED COMPLEXITY) ---")

try:
    # Data Collections (Collections same hain)
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE)
    active_fires = ee.ImageCollection('MODIS/061/MOD14A2').filterDate(START_DATE, END_DATE)
    dem = ee.Image('USGS/SRTMGL1_003')
    slope = ee.Terrain.slope(dem).rename('slope')

    # Feature Stack
    feature_stack = ee.Image.cat([
        active_fires.select('MaxFRP').mean().rename('X_frp'), slope,
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned')
    ])

    # Sampling points 5000 se 3000 kiye
    sample_points = ee.FeatureCollection.randomPoints(region=study_area, points=3000, seed=42)
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # Export Task
    task = ee.batch.Export.table.toDrive(
        collection=extracted_data,
        description=EXPORT_FILENAME,
        folder='ColabFireData',
        fileNamePrefix=EXPORT_FILENAME[:-4]
    )

    task.start()
    print(f"✅ Export task '{EXPORT_FILENAME}' started (3000 points, 1.5 years). Waiting for GEE server...")

    # Completion ka intezaar karna
    while task.active():
        print('.', end='')
        time.sleep(30)

    if task.status()['state'] != 'COMPLETED':
        raise Exception(f"GEE Export failed again with state: {task.status()['state']}. Check GEE Tasks tab.")

    print(f"\n✅ GEE Export Complete. File is in your Google Drive/ColabFireData/{EXPORT_FILENAME}.")

    # File ko Drive se Colab mein load karna
    FILE_PATH = f'/content/drive/MyDrive/ColabFireData/{EXPORT_FILENAME}'
    df_ml = pd.read_csv(FILE_PATH)
    df_ml = df_ml.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    print(f"\n✅ Data loaded from Drive. Total ML Samples: {len(df_ml)}")

except Exception as e:
    print(f"\n❌ ERROR: Data Export/Load mein dikkat. Error: {e}")
    raise

# --- Step 3: ML Model Training (XGBoost) ---
print("\n--- Step 3: Training XGBoost Model ---")

features = ['X_frp', 'slope']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Final check before training
if Y.nunique() < 2 or len(Y[Y==1]) < 10:
    print("⚠️ Warning: Kam burned samples. ML Training skip ki jaa rahi hai.")
else:
    # Train and Save Model
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic', n_estimators=100, max_depth=5,
        use_label_encoder=False, eval_metric='logloss', random_state=42
    )

    xgb_model.fit(X_train, Y_train)

    Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
    print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

    joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
    print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 2: Exporting Historical Data to Google Drive (REDUCED COMPLEXITY) ---
✅ Export task 'uk_fire_data_reduced.csv' started (3000 points, 1.5 years). Waiting for GEE server...
.
❌ ERROR: Data Export/Load mein dikkat. Error: GEE Export failed again with state: FAILED. Check GEE Tasks tab.


Exception: GEE Export failed again with state: FAILED. Check GEE Tasks tab.

In [12]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
import time
from google.colab import auth

# Configuration
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'
START_DATE = '2023-01-01' # Time period kam rakha hai safety ke liye
END_DATE = '2025-10-20'
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
EXPORT_FILENAME = 'uk_fire_data_final_fix.csv' # Naya file name

# GEE Initialize
ee.Initialize(project=MY_GEE_PROJECT_ID)

# --- Step 2: GEE Data Export to Drive (FINAL FIX) ---
print("--- Step 2: Exporting Historical Data to Google Drive (VIIRS Fix) ---")

try:
    # 1. Historical Data Collections (MODIS for label, VIIRS for FRP feature)
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE) # Burned Area (Label)

    # ⚠️ FIX: MOD14A2 ki jagah VIIRS use kar rahe hain, jismein FRP band hota hai
    active_fires_viirs = ee.ImageCollection('NOAA/VIIRS/001/VNP14IMGTDL_NRT').filterDate(START_DATE, END_DATE)

    dem = ee.Image('USGS/SRTMGL1_003')
    slope = ee.Terrain.slope(dem).rename('slope')

    # 2. Create Feature Stack
    feature_stack = ee.Image.cat([
        # VIIRS se 'frp' band extract kiya
        active_fires_viirs.select('frp').mean().rename('X_frp'),
        slope,
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned')
    ])

    # Sampling points 3000 rakhe hain
    sample_points = ee.FeatureCollection.randomPoints(region=study_area, points=3000, seed=42)
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # Export Task
    task = ee.batch.Export.table.toDrive(
        collection=extracted_data, description=EXPORT_FILENAME, folder='ColabFireData', fileNamePrefix=EXPORT_FILENAME[:-4]
    )

    task.start()
    print(f"✅ Export task '{EXPORT_FILENAME}' started (VIIRS Fix). Waiting for GEE server...")

    # Completion ka intezaar karna
    while task.active():
        print('.', end='')
        time.sleep(30)

    if task.status()['state'] != 'COMPLETED':
        # Agar yeh fail ho, to data availability ya complexity ka issue hai
        raise Exception(f"GEE Export failed again with state: {task.status()['state']}. This indicates GEE server complexity/quota issue.")

    print(f"\n✅ GEE Export Complete. File is in your Google Drive/ColabFireData/{EXPORT_FILENAME}.")

    # File ko Drive se Colab mein load karna aur ML Train
    FILE_PATH = f'/content/drive/MyDrive/ColabFireData/{EXPORT_FILENAME}'
    df_ml = pd.read_csv(FILE_PATH)
    df_ml = df_ml.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    # --- Step 3: ML Model Training (XGBoost) ---
    features = ['X_frp', 'slope']
    target = 'Y_is_burned'
    X = df_ml[features].fillna(0)
    Y = df_ml[target].astype(int)

    if Y.nunique() < 2 or len(Y[Y==1]) < 10:
        print("⚠️ Warning: Kam burned samples. ML Training skip ki jaa rahi hai.")
    else:
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)
        xgb_model = xgb.XGBClassifier(objective='binary:logistic', n_estimators=100, max_depth=5, use_label_encoder=False, eval_metric='logloss', random_state=42)
        xgb_model.fit(X_train, Y_train)
        Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
        print(f"\nModel AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")
        joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
        print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

except Exception as e:
    print(f"\n❌ FINAL ERROR: {e}")
    raise

--- Step 2: Exporting Historical Data to Google Drive (VIIRS Fix) ---

❌ FINAL ERROR: ImageCollection.load: ImageCollection asset 'NOAA/VIIRS/001/VNP14IMGTDL_NRT' not found (does not exist or caller does not have access).


EEException: ImageCollection.load: ImageCollection asset 'NOAA/VIIRS/001/VNP14IMGTDL_NRT' not found (does not exist or caller does not have access).

In [13]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
import time
from google.colab import auth

# Configuration
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'
START_DATE = '2023-01-01'
END_DATE = '2024-10-20'   # Updated to today's date
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
EXPORT_FILENAME = 'uk_fire_data_final_working.csv' # Naya file name

# GEE Initialize
ee.Initialize(project=MY_GEE_PROJECT_ID)

# --- Step 2: GEE Data Export to Drive (FINAL WORKING FIX) ---
print("--- Step 2: Exporting Historical Data to Google Drive (FIRMS VIIRS Fix) ---")

try:
    # 1. Historical Data Collections (MODIS for label)
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE) # Burned Area (Label)

    # ⚠️ FINAL FIX: FIRMS VIIRS data use kar rahe hain jismein 'frp' band hota hai
    active_fires_viirs = ee.ImageCollection('FIRMS').filterDate(START_DATE, END_DATE).filter(ee.Filter.eq('satellite', 'NOAA-20'))

    dem = ee.Image('USGS/SRTMGL1_003')
    slope = ee.Terrain.slope(dem).rename('slope')

    # 2. Create Feature Stack
    feature_stack = ee.Image.cat([
        # FIRMS collection se 'FRP' band nikal rahe hain
        active_fires_viirs.select('FRP').mean().rename('X_frp'),
        slope,
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned')
    ])

    # Sampling points 3000
    sample_points = ee.FeatureCollection.randomPoints(region=study_area, points=3000, seed=42)
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # Export Task
    task = ee.batch.Export.table.toDrive(
        collection=extracted_data, description=EXPORT_FILENAME, folder='ColabFireData', fileNamePrefix=EXPORT_FILENAME[:-4]
    )

    task.start()
    print(f"✅ Export task '{EXPORT_FILENAME}' started (FIRMS VIIRS Fix). Waiting for GEE server...")

    # Completion ka intezaar karna
    while task.active():
        print('.', end='')
        time.sleep(45)

    if task.status()['state'] != 'COMPLETED':
        raise Exception(f"GEE Export failed again with state: {task.status()['state']}. This task might be too complex for GEE's non-commercial quota.")

    print(f"\n✅ GEE Export Complete. File is in your Google Drive/ColabFireData/{EXPORT_FILENAME}.")

    # File ko Drive se Colab mein load karna aur ML Train
    FILE_PATH = f'/content/drive/MyDrive/ColabFireData/{EXPORT_FILENAME}'
    df_ml = pd.read_csv(FILE_PATH)
    df_ml = df_ml.rename(columns={'Y_is_burned': 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    # --- Step 3: ML Model Training (XGBoost) ---
    features = ['X_frp', 'slope']
    target = 'Y_is_burned'
    X = df_ml[features].fillna(0)
    Y = df_ml[target].astype(int)

    if Y.nunique() < 2 or len(Y[Y==1]) < 10:
        print("⚠️ Warning: Kam burned samples. ML Training skip ki jaa rahi hai.")
    else:
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)
        xgb_model = xgb.XGBClassifier(objective='binary:logistic', n_estimators=100, max_depth=5, use_label_encoder=False, eval_metric='logloss', random_state=42)
        xgb_model.fit(X_train, Y_train)
        Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
        print(f"\nModel AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")
        joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
        print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

except Exception as e:
    print(f"\n❌ FINAL ERROR: {e}")
    print("\nAgar yeh phir se fail hua, toh hum data fetching ka tareeka badal kar sirf ek mahine ka data lekar try karenge.")
    raise

--- Step 2: Exporting Historical Data to Google Drive (FIRMS VIIRS Fix) ---
✅ Export task 'uk_fire_data_final_working.csv' started (FIRMS VIIRS Fix). Waiting for GEE server...
.
✅ GEE Export Complete. File is in your Google Drive/ColabFireData/uk_fire_data_final_working.csv.

❌ FINAL ERROR: ['Y_is_burned']

Agar yeh phir se fail hua, toh hum data fetching ka tareeka badal kar sirf ek mahine ka data lekar try karenge.


KeyError: ['Y_is_burned']

In [14]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
import numpy as np

print("--- Step 3: ML Model Training Final Attempt ---")

# File Path jahan data Drive mein save hua hai
EXPORT_FILENAME = 'uk_fire_data_final_working.csv'
FILE_PATH = f'/content/drive/MyDrive/ColabFireData/{EXPORT_FILENAME}'

try:
    # 1. File load karo
    df_raw = pd.read_csv(FILE_PATH)

    # 2. FIX: Sahi Label column dhoondo (GEE ne naam badal diya hoga)
    # Hum woh column dhoondhenge jiska naam 'Y_is_burned' se shuru hota hai.
    label_cols = [col for col in df_raw.columns if col.startswith('Y_is_burned')]

    if not label_cols:
        raise KeyError("Could not find the target label column starting with 'Y_is_burned'.")

    label_col = label_cols[0] # Pehla matching column use karo

    print(f"✅ Label column found: {label_col}")

    # 3. Data Cleaning and Prep
    df_ml = df_raw.rename(columns={label_col: 'Y_is_burned'}).dropna(subset=['Y_is_burned'])

    print(f"✅ Data loaded. Total ML Samples: {len(df_ml)}")

except FileNotFoundError:
    print(f"❌ ERROR: File {FILE_PATH} not found. Drive mein 'ColabFireData' folder check karein.")
    raise
except KeyError as e:
    print(f"❌ ERROR: Column name issue: {e}. Check if GEE exported the data correctly.")
    print(f"Available columns: {df_raw.columns.tolist()}")
    raise
except Exception as e:
    print(f"❌ ERROR: General loading error. {e}")
    raise


# --- Step 4: ML Model Training (XGBoost) ---
features = ['X_frp', 'slope']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Final check before training
if Y.nunique() < 2 or len(Y[Y==1]) < 10:
    print(f"⚠️ Warning: Kam burned samples ({len(Y[Y==1])}). ML Training skip ki jaa rahi hai. Need more data.")
else:
    # Train and Save Model
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic', n_estimators=100, max_depth=5,
        use_label_encoder=False, eval_metric='logloss', random_state=42
    )

    xgb_model.fit(X_train, Y_train)

    Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]

    # Check if prediction contained at least two classes to calculate AUC
    if np.all(Y_test == Y_test.iloc[0]):
        print("⚠️ Warning: Test set mein sirf ek class. AUC calculation skip ki gayi.")
    else:
        print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

    joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
    print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 3: ML Model Training Final Attempt ---
❌ ERROR: Column name issue: "Could not find the target label column starting with 'Y_is_burned'.". Check if GEE exported the data correctly.
Available columns: ['system:index', 'slope', '.geo']


KeyError: "Could not find the target label column starting with 'Y_is_burned'."

In [15]:
import ee
import geemap
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib
import time
from google.colab import auth

# Configuration
MY_GEE_PROJECT_ID = 'uk-fire-predictor-2025'
# ⚠️ FINAL FIX: TIME PERIOD BAHUT KAM KIYA GAYA HAI TAAKI DATA AA SAKE
START_DATE = '2024-01-01'
END_DATE = '2024-05-30' # Sirf Fire Season ka data
study_area_bbox = [77.5, 29.0, 81.5, 31.5]
study_area = ee.Geometry.Rectangle(study_area_bbox)
EXPORT_FILENAME = 'uk_fire_data_final_working_min.csv' # Naya file name

# GEE Initialize
ee.Initialize(project=MY_GEE_PROJECT_ID)

# --- Step 2: GEE Data Export to Drive (MINIMUM COMPLEXITY) ---
print("--- Step 2: Exporting Data (4 Months Only) ---")

try:
    # Historical Data Collections
    mcd64a1 = ee.ImageCollection('MODIS/061/MCD64A1').filterDate(START_DATE, END_DATE)
    active_fires_viirs = ee.ImageCollection('FIRMS').filterDate(START_DATE, END_DATE).filter(ee.Filter.eq('satellite', 'NOAA-20'))
    dem = ee.Image('USGS/SRTMGL1_003')
    slope = ee.Terrain.slope(dem).rename('slope')

    # Create Feature Stack
    feature_stack = ee.Image.cat([
        # FIRMS collection se 'FRP' band nikal rahe hain
        active_fires_viirs.select('FRP').mean().rename('X_frp'),
        slope,
        mcd64a1.select('BurnDate').max().gt(0).rename('Y_is_burned')
    ])

    # Sampling points 3000
    sample_points = ee.FeatureCollection.randomPoints(region=study_area, points=3000, seed=42)
    extracted_data = feature_stack.sampleRegions(
        collection=sample_points, properties=['Y_is_burned'], scale=500, tileScale=16
    )

    # Export Task
    task = ee.batch.Export.table.toDrive(
        collection=extracted_data, description=EXPORT_FILENAME, folder='ColabFireData', fileNamePrefix=EXPORT_FILENAME[:-4]
    )

    task.start()
    print(f"✅ Export task '{EXPORT_FILENAME}' started (4 Months). Waiting for GEE server...")

    # Completion ka intezaar karna
    while task.active():
        print('.', end='')
        time.sleep(45)

    if task.status()['state'] != 'COMPLETED':
        raise Exception(f"GEE Export failed: {task.status()['state']}. Data may not be available for all bands/dates.")

    print(f"\n✅ GEE Export Complete. File is in your Google Drive/ColabFireData/{EXPORT_FILENAME}.")

    # File ko Drive se Colab mein load karna aur ML Train
    FILE_PATH = f'/content/drive/MyDrive/ColabFireData/{EXPORT_FILENAME}'
    df_raw = pd.read_csv(FILE_PATH)

    # --- Step 3: ML Model Training (XGBoost) ---
    # Sahi columns ko dhoondh kar rename karna
    label_cols = [col for col in df_raw.columns if col.startswith('Y_is_burned')]
    frp_cols = [col for col in df_raw.columns if col.startswith('X_frp')]
    slope_col = 'slope'

    if not label_cols or not frp_cols or slope_col not in df_raw.columns:
        print("❌ FINAL ERROR: Required data columns (FRP or Burned Area) were still not exported.")
        print(f"Available columns in CSV: {df_raw.columns.tolist()}")
        raise Exception("Required data bands (FRP/Burned Area) were missing in the exported CSV.")

    df_ml = df_raw.rename(columns={label_cols[0]: 'Y_is_burned', frp_cols[0]: 'X_frp'}).dropna()

    features = ['X_frp', 'slope']
    target = 'Y_is_burned'
    X = df_ml[features].fillna(0)
    Y = df_ml[target].astype(int)

    if Y.nunique() < 2 or len(Y[Y==1]) < 5: # Very low threshold for training
        print("⚠️ Warning: Kam burned samples. Training skip ki jaa rahi hai.")
    else:
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)
        xgb_model = xgb.XGBClassifier(objective='binary:logistic', n_estimators=100, max_depth=5, use_label_encoder=False, eval_metric='logloss', random_state=42)
        xgb_model.fit(X_train, Y_train)
        Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
        print(f"\nModel AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")
        joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
        print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

except Exception as e:
    print(f"\n❌ FINAL ERROR: {e}")
    raise

--- Step 2: Exporting Data (4 Months Only) ---
✅ Export task 'uk_fire_data_final_working_min.csv' started (4 Months). Waiting for GEE server...
.
✅ GEE Export Complete. File is in your Google Drive/ColabFireData/uk_fire_data_final_working_min.csv.
❌ FINAL ERROR: Required data columns (FRP or Burned Area) were still not exported.
Available columns in CSV: ['system:index', 'slope', '.geo']

❌ FINAL ERROR: Required data bands (FRP/Burned Area) were missing in the exported CSV.


Exception: Required data bands (FRP/Burned Area) were missing in the exported CSV.

In [16]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib

# --- Step 1: Mock Data Creation ---
print("--- Step 1: Creating Mock Data for ML Training ---")
N_samples = 5000
np.random.seed(42)

# Slope and FRP values are random, but model will use them to create a prediction file.
df_ml = pd.DataFrame({
    'X_frp': np.random.uniform(5, 100, N_samples),
    'slope': np.random.uniform(1, 45, N_samples),

    # Target label: 10% fire probability
    'Y_is_burned': np.random.choice(
        [0, 1],
        N_samples,
        p=[0.90, 0.10]
    )
})

print(f"✅ Mock Data created. Total ML Samples: {len(df_ml)}")

# --- Step 2: ML Model Training (XGBoost) ---
print("\n--- Step 2: Training XGBoost Model on Mock Data ---")

features = ['X_frp', 'slope']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

# Train and Save Model
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

xgb_model = xgb.XGBClassifier(
    objective='binary:logistic', n_estimators=100, max_depth=5,
    use_label_encoder=False, eval_metric='logloss', random_state=42
)

xgb_model.fit(X_train, Y_train)

Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

joblib.dump(xgb_model, 'uttarakhand_fire_model.pkl')
print("\n✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.")

--- Step 1: Creating Mock Data for ML Training ---
✅ Mock Data created. Total ML Samples: 5000

--- Step 2: Training XGBoost Model on Mock Data ---


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Model AUC on Test Data: 0.4710

✅ ML Model Trained aur 'uttarakhand_fire_model.pkl' mein save ho gaya.


In [17]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import joblib

print("--- Step 1: Creating Hyper-Realistic Synthetic Data (Full Factors) ---")
N_samples = 20000 # Samples badhaye for complexity

# Seed set kiya
np.random.seed(42)

# --- 1. Topography & Fire Intensity ---
slopes = np.random.uniform(1, 45, N_samples)
frp_vals = np.random.uniform(10, 150, N_samples)

# --- 2. Weather Factors ---
temps = np.random.uniform(15, 35, N_samples)
humidity = np.random.uniform(20, 90, N_samples)
wind_speed = np.random.uniform(1, 15, N_samples)

# --- 3. Fuel Factors (NDVI/Dryness Proxy) ---
# Low Fuel Moisture/High Dryness (Jitna kam, utna sookha)
fuel_dryness = np.random.uniform(0.1, 0.9, N_samples)

# --- 4. Human/Proximity Factors ---
pop_density = np.random.lognormal(2, 1, N_samples)
# Distance to nearest road (Jitna kam distance, utna zyada risk)
dist_to_road = np.random.uniform(100, 5000, N_samples)

# --- Synthetic Label Creation Logic (ALL Factors Combined) ---
prob_base = 0.02 # Base chance

# 1. Slope Factor: High slope (>25)
prob_slope = (slopes > 25) * 0.15
# 2. Dryness/Weather Factor: High temp (>30) AND Low Humidity (<30)
prob_dry_weather = ((temps > 30) & (humidity < 30)) * 0.20
# 3. Fuel Factor: Very low moisture (fuel_dryness < 0.2)
prob_dry_fuel = (fuel_dryness < 0.2) * 0.15
# 4. Wind Factor: High wind (>10)
prob_wind = (wind_speed > 10) * 0.10
# 5. Human Factor: Near road (<500m) OR High population density (>10)
prob_human = ((dist_to_road < 500) | (pop_density > 10)) * 0.18

# Final probability nikalte hain
final_prob = np.clip(prob_base + prob_slope + prob_dry_weather + prob_dry_fuel + prob_wind + prob_human, 0.0, 1.0)
is_burned_label = (np.random.rand(N_samples) < final_prob).astype(int)

df_ml = pd.DataFrame({
    'X_frp': frp_vals,
    'slope': slopes,
    'temp': temps,
    'humidity': humidity,
    'wind_speed': wind_speed,
    'fuel_dryness': fuel_dryness,
    'pop_density': pop_density,
    'dist_to_road': dist_to_road,
    'Y_is_burned': is_burned_label
})

print(f"✅ Full-Factor Synthetic Data created. Total ML Samples: {len(df_ml)}")

# --- Step 2: ML Model Training (XGBoost) ---
print("\n--- Step 2: Training XGBoost Model with ALL 8 Factors ---")

# AB FEATURES MEIN SAARE 8 FACTORS SHAMIL HAIN
features = ['X_frp', 'slope', 'temp', 'humidity', 'wind_speed', 'fuel_dryness', 'pop_density', 'dist_to_road']
target = 'Y_is_burned'
X = df_ml[features].fillna(0)
Y = df_ml[target].astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

xgb_model = xgb.XGBClassifier(
    objective='binary:logistic', n_estimators=100, max_depth=8, # Max_depth badhaya for complex logic
    use_label_encoder=False, eval_metric='logloss', random_state=42
)

xgb_model.fit(X_train, Y_train)

Y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
print(f"Model AUC on Test Data: {roc_auc_score(Y_test, Y_pred_proba):.4f}")

joblib.dump(xgb_model, 'uttarakhand_fire_model_ultimate.pkl')
print("\n✅ Ultimate Synthetic Model Trained and saved as 'uttarakhand_fire_model_ultimate.pkl'.")

--- Step 1: Creating Hyper-Realistic Synthetic Data (Full Factors) ---
✅ Full-Factor Synthetic Data created. Total ML Samples: 20000

--- Step 2: Training XGBoost Model with ALL 8 Factors ---


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Model AUC on Test Data: 0.6640

✅ Ultimate Synthetic Model Trained and saved as 'uttarakhand_fire_model_ultimate.pkl'.
