# Data Sources for Final Project

Data was collected from the following sources:
- Fire detection areas
    - NASA Fire Information for Resource Management System: https://firms.modaps.eosdis.nasa.gov/usfs/active_fire/
    - Automated retrieval of files in the 2015-2025 time range (code is given below for implementation)
- General forest related data
    - Global Forest Watch tree cover and fire alert data downloaded from dashboards: https://www.globalforestwatch.org/
- Weather data
    - Climate Data Store API (code is given below for implementation)

## Fire Detection Areas (FIRMS)

In [None]:
# Import necessary packages
import os
import pandas as pd
import requests
from io import StringIO

os.makedirs("firms_raw", exist_ok=True)

# Bouding box for Colorado
lat_min, lat_max = 36.992426, 41.003444
lon_min, lon_max = -109.060253, -102.041524

all_data = []

# MODIS data (monthly files)
# Fire records before VIIRS launch in 2018
for year in range(2015, 2018):
    for month in range(1, 13):
        month_str = f"{month:02d}"
        filename = f"modis_{year}_United_States.csv" 
        url = f"https://firms.modaps.eosdis.nasa.gov/data/country/MODIS_C6_1/USA/{year}/{filename}"
        try:
            r = requests.get(url)
            if r.status_code == 200 and len(r.text) > 100:
                df = pd.read_csv(StringIO(r.text))
                df.columns = df.columns.str.lower()
                if 'acq_date' in df.columns:
                    df['acq_date'] = pd.to_datetime(df['acq_date'])
                df_co = df[
                    (df['latitude'] >= lat_min) & (df['latitude'] <= lat_max) &
                    (df['longitude'] >= lon_min) & (df['longitude'] <= lon_max)
                ]
                all_data.append(df_co)
            else:
                print("No file found")
        except Exception as e:
            print(f"Fetch error {filename}: {e}")

# VIIRS NOAA-20 2018–2025 (one file per year)
for year in range(2018, 2026):
    filename = f"viirs-jpss1_{year}_United_States.csv"
    url = f"https://firms.modaps.eosdis.nasa.gov/data/country/VIIRS_NOAA20_NRT/USA/{year}/{filename}"
    try:
        r = requests.get(url)
        if r.status_code == 200 and len(r.text) > 100:
            df = pd.read_csv(StringIO(r.text))
            df.columns = df.columns.str.lower()
            if 'acq_date' in df.columns:
                df['acq_date'] = pd.to_datetime(df['acq_date'])
            df_co = df[
                (df['latitude'] >= lat_min) & (df['latitude'] <= lat_max) &
                (df['longitude'] >= lon_min) & (df['longitude'] <= lon_max)
            ]
            all_data.append(df_co)
        else:
            print("VIIRS file missing")
    except Exception as e:
        print("Fetch error")

# Combine all
if all_data:
    df_all = pd.concat(all_data, ignore_index=True)
    # Keep only relevant columns
    cols_to_keep = [c for c in ['latitude','longitude','acq_date','frp','confidence','daynight'] if c in df_all.columns]
    df_all = df_all[cols_to_keep]

    # Fix the confidence column type
    if 'confidence' in df_all.columns:
        df_all = df_all[df_all['confidence'] != 'l']
        df_all['confidence'] = df_all['confidence'].astype(str)

    # Save to a Parquet
    df_all.to_parquet("firms_colorado_2015_2025.parquet", index=False)
    print("Final Colorado dataset saved:", df_all.shape)
else:
    print("No data found.")




VIIRS file missing
Final Colorado dataset saved: (84110, 6)


## Global Forest Watch
Two `csvs` downloaded from online dashboard. One for treecover loss from fires in the period 2015-2025, and one for any VIIRS alerts during the same time period. 

## Climate Data Store

In [None]:
#%pip install cdsapi
import cdsapi

c = cdsapi.Client()

import cdsapi

dataset = "reanalysis-era5-land"
request = {
    "variable": [
        "soil_temperature_level_1",
        "snowmelt",
        "volumetric_soil_water_layer_1",
        "surface_net_solar_radiation",
        "leaf_area_index_high_vegetation"
    ],
    "year": "2015",
    "month": "08",
    "day": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12",
        "13", "14", "15",
        "16", "17", "18",
        "19", "20", "21",
        "22", "23", "24",
        "25", "26", "27",
        "28", "29", "30",
        "31"
    ],
    "time": [
        "00:00", "01:00", "02:00",
        "03:00", "04:00", "05:00",
        "06:00", "07:00", "08:00",
        "09:00", "10:00", "11:00",
        "12:00", "13:00", "14:00",
        "15:00", "16:00", "17:00",
        "18:00", "19:00", "20:00",
        "21:00", "22:00", "23:00"
    ],
    "data_format": "grib",
    "download_format": "unarchived",
    "area": [41, -109, -36, -102]
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()

2025-09-08 16:27:48,872 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-09-08 16:27:49,377 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-09-08 16:27:49,847 INFO Request ID is 40470ad6-4e81-409b-b888-1997ecb7bcb4
2025-09-08 16:27:51,011 INFO status has been updated to accepted
2025-09-08 16:36:13,419 INFO status has been updated to successful


cad0674beb219880390648a089128796.grib:   0%|          | 0.00/143M [00:00<?, ?B/s]

'cad0674beb219880390648a089128796.grib'