In [1]:
import pandas as pd
import requests
from tqdm import tqdm
import time
import math

# --- STEP 1: Load dataset ---
df = pd.read_csv("/content/drive/MyDrive/paldendataset.csv")

# --- STEP 2: Remove time from date ---
df['acq_date'] = df['acq_date'].astype(str).str.split().str[0]
df['acq_date'] = pd.to_datetime(df['acq_date'], format='%Y-%m-%d', errors='coerce')
df = df.dropna(subset=['acq_date']).reset_index(drop=True)

# --- STEP 3: Round coordinates to NASA grid (important!) ---
df['latitude'] = df['latitude'].round(1)
df['longitude'] = df['longitude'].round(1)

# --- STEP 4: Prepare new columns ---
df['temperature'] = None
df['humidity'] = None
df['wind_speed'] = None
df['precipitation'] = None
df['elevation'] = None
df['vpd'] = None

# --- STEP 5: NASA POWER daily weather fetch ---
def get_weather(lat, lon, date):
    base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
    params = {
        "parameters": "T2M,RH2M,WS2M,PRECTOTCORR",
        "community": "AG",
        "longitude": lon,
        "latitude": lat,
        "start": date.strftime('%Y%m%d'),
        "end": date.strftime('%Y%m%d'),
        "format": "JSON"
    }

    try:
        response = requests.get(base_url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            key = date.strftime('%Y%m%d')
            param = data["properties"]["parameter"]
            return {
                "temperature": param["T2M"].get(key),
                "humidity": param["RH2M"].get(key),
                "wind_speed": param["WS2M"].get(key),
                "precipitation": param["PRECTOTCORR"].get(key)
            }
    except Exception as e:
        print("Weather fetch error:", e)
    return None


# --- STEP 6: NASA POWER elevation fetch (metadata endpoint) ---
def get_elevation(lat, lon):
    base_url = "https://power.larc.nasa.gov/api/point"
    params = {
        "parameters": "ELEVATION",
        "community": "AG",
        "longitude": lon,
        "latitude": lat,
        "format": "JSON"
    }

    try:
        response = requests.get(base_url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            return data["features"][0]["properties"]["parameter"]["ELEVATION"]
    except Exception as e:
        print("Elevation fetch error:", e)
    return None


# --- STEP 7: Calculate VPD ---
def calc_vpd(temp_c, rh):
    if pd.isna(temp_c) or pd.isna(rh):
        return None
    es = 0.6108 * math.exp((17.27 * temp_c) / (temp_c + 237.3))  # saturation vapor pressure (kPa)
    ea = es * (rh / 100.0)  # actual vapor pressure (kPa)
    return round(es - ea, 3)


# --- STEP 8: Enrichment loop ---
for idx, row in tqdm(df.iterrows(), total=len(df)):
    lat, lon, date = row['latitude'], row['longitude'], row['acq_date']

    # Weather (daily)
    weather = get_weather(lat, lon, date)
    if weather:
        df.at[idx, 'temperature'] = weather["temperature"]
        df.at[idx, 'humidity'] = weather["humidity"]
        df.at[idx, 'wind_speed'] = weather["wind_speed"]
        df.at[idx, 'precipitation'] = weather["precipitation"]

    # Elevation (only once per coordinate)
    if pd.isna(df.at[idx, 'elevation']):
        elev = get_elevation(lat, lon)
        df.at[idx, 'elevation'] = elev

    # Compute VPD
    if weather and weather["temperature"] is not None and weather["humidity"] is not None:
        df.at[idx, 'vpd'] = calc_vpd(weather["temperature"], weather["humidity"])

    time.sleep(0.4)  # respect API rate limit

# --- STEP 9: Save enriched data ---
df.to_csv("fire_dataset_enriched_weather_final.csv", index=False)
print("✅ Enrichment complete. Saved as 'fire_dataset_enriched_weather_final.csv'")

100%|██████████| 10000/10000 [6:25:07<00:00,  2.31s/it]

✅ Enrichment complete. Saved as 'fire_dataset_enriched_weather_final.csv'





In [2]:
import pandas as pd
import requests
from tqdm import tqdm
import time
import math
from google.colab import drive

# --- MOUNT GOOGLE DRIVE ---
drive.mount('/content/drive')

# --- STEP 1: Load dataset ---
df = pd.read_csv("/content/drive/MyDrive/paldendataset.csv")

# --- STEP 2: Clean and format date ---
df['acq_date'] = df['acq_date'].astype(str).str.split().str[0]
df['acq_date'] = pd.to_datetime(df['acq_date'], format='%Y-%m-%d', errors='coerce')
df = df.dropna(subset=['acq_date']).reset_index(drop=True)

# --- STEP 3: Round coordinates to NASA grid ---
df['latitude'] = df['latitude'].round(1)
df['longitude'] = df['longitude'].round(1)

# --- STEP 4: Prepare new columns ---
df['temperature'] = None
df['humidity'] = None
df['wind_speed'] = None
df['precipitation'] = None
df['elevation'] = None
df['vpd'] = None


# --- STEP 5: NASA POWER daily weather fetch ---
def get_weather(lat, lon, date):
    base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
    params = {
        "parameters": "T2M,RH2M,WS2M,PRECTOTCORR",
        "community": "AG",
        "longitude": lon,
        "latitude": lat,
        "start": date.strftime('%Y%m%d'),
        "end": date.strftime('%Y%m%d'),
        "format": "JSON"
    }

    try:
        response = requests.get(base_url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            key = date.strftime('%Y%m%d')
            param = data["properties"]["parameter"]
            return {
                "temperature": param["T2M"].get(key),
                "humidity": param["RH2M"].get(key),
                "wind_speed": param["WS2M"].get(key),
                "precipitation": param["PRECTOTCORR"].get(key)
            }
    except Exception as e:
        print("Weather fetch error:", e)
    return None


# --- STEP 6: Elevation fetch (Open-Elevation API) ---
def get_elevation(lat, lon):
    try:
        url = f"https://api.open-elevation.com/api/v1/lookup?locations={lat},{lon}"
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        result = response.json()['results'][0]['elevation']
        return result
    except Exception as e:
        print(f"Elevation fetch error for ({lat},{lon}): {e}")
        return None


# --- STEP 7: Calculate VPD ---
def calc_vpd(temp_c, rh):
    if pd.isna(temp_c) or pd.isna(rh):
        return None
    es = 0.6108 * math.exp((17.27 * temp_c) / (temp_c + 237.3))  # saturation vapor pressure (kPa)
    ea = es * (rh / 100.0)  # actual vapor pressure (kPa)
    return round(es - ea, 3)


# --- STEP 8: Enrichment loop ---
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Enriching dataset"):
    lat, lon, date = row['latitude'], row['longitude'], row['acq_date']

    # Fetch weather
    weather = get_weather(lat, lon, date)
    if weather:
        df.at[idx, 'temperature'] = weather["temperature"]
        df.at[idx, 'humidity'] = weather["humidity"]
        df.at[idx, 'wind_speed'] = weather["wind_speed"]
        df.at[idx, 'precipitation'] = weather["precipitation"]

    # Fetch elevation
    if pd.isna(df.at[idx, 'elevation']):
        elev = get_elevation(lat, lon)
        df.at[idx, 'elevation'] = elev

    # Compute VPD
    if weather and weather["temperature"] is not None and weather["humidity"] is not None:
        df.at[idx, 'vpd'] = calc_vpd(weather["temperature"], weather["humidity"])

    time.sleep(0.5)  # respect API rate limit


# --- STEP 9: Save enriched dataset in Google Drive ---
save_path = "/content/drive/MyDrive/fire_dataset_enriched_weather_final.csv"
df.to_csv(save_path, index=False)
print(f"✅ Enrichment complete. Saved to: {save_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Enriching dataset: 100%|██████████| 10000/10000 [5:52:59<00:00,  2.12s/it]

✅ Enrichment complete. Saved to: /content/drive/MyDrive/fire_dataset_enriched_weather_final.csv



