<a href="https://colab.research.google.com/github/ankshah131/localsolve-open/blob/main/wildfires/la_wildfires/Ingest_FIRMS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
# Let's set your map key that was emailed to you. It should look something like 'abcdef1234567890abcdef1234567890'
MAP_KEY = 'd62477789af77695549777c63ccf5c76'

# now let's check how many transactions we have
import pandas as pd
url = 'https://firms.modaps.eosdis.nasa.gov/mapserver/mapkey_status/?MAP_KEY=' + MAP_KEY
try:
  df = pd.read_json(url,  typ='series')
  display(df)
except:
  # possible error, wrong MAP_KEY value, check for extra quotes, missing letters
  print ("There is an issue with the query. \nTry in your browser: %s" % url)

Unnamed: 0,0
transaction_limit,5000
current_transactions,0
transaction_interval,10 minutes


In [155]:
import datetime
from datetime import date
import logging
import os
import time
from tempfile import mkdtemp
from typing import List, Tuple
import geopandas as gpd
import pandas as pd
import urllib
import shapely
from shapely.geometry import Polygon

logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)


class Settings:
    """Class for storing config and global variables for this ingest script."""
    temp_dir: str = './Shapefiles/'
    today = date.today()
    output_shp_filename = 'FIRMS_' + str(today) + '.shp'
    firms_wfs_url_prefix: str = 'https://firms.modaps.eosdis.nasa.gov/mapserver/wfs/<country>'
    firms_wfs_url_suffix: str = '?SERVICE=WFS&REQUEST=GetFeature&VERSION=2.0.0&TYPENAME=ms:fires_<satellite>_24hrs&' + \
                                'STARTINDEX=0&COUNT=1000&SRSNAME=urn:ogc:def:crs:EPSG::4326&BBOX=-90,-180,90,180,urn:ogc:def:crs:EPSG::4326&outputformat=csv'

    firms_wfs_request_sleep_secs: int = 5
    firms_api_map_keys: str = 'd62477789af77695549777c63ccf5c76'


config = Settings()


def ingest(user_country):
    """Main function to get new active wildfire data and save it as a shapefile."""
    logger.info("Processing new FIRMS VIIRS Merged Ultra Real Time wildfire data for the last 24 hours")
    country = user_country
    get_new_data(country)
    logger.info("Done!")
    return


def get_new_data(user_country):
    """Download the latest VIIRS active URT 24-hour fire data for the specified bounding box."""
    logger.info("Collecting newest VIIRS detections from the FIRMS WFS")
    region_sat_df_list = []
    country = user_country

    satellites = ['snpp', 'noaa20']

    for satellite in satellites:
        logger.info(f"Downloading the last 24 hours of detections from {satellite}")
        for i in range(1, 6):  # Retry mechanism
            try:
                url = f"{config.firms_wfs_url_prefix.replace('<country>',country)}/{config.firms_api_map_keys}/{config.firms_wfs_url_suffix.replace('<satellite>', satellite)}"
                region_sat_df = pd.read_csv(url)
                break
            except (urllib.error.HTTPError, urllib.error.URLError, ConnectionError) as e:
                if i == 5:
                    logger.exception("Unable to retrieve newest data from the FIRMS API")
                    raise e
                else:
                    logger.warning(f"Retrying {i}/5 after failure to retrieve data from FIRMS API")
                    time.sleep(30)
        region_sat_df_list.append(region_sat_df)
        time.sleep(config.firms_wfs_request_sleep_secs)

    new_fires_df = pd.concat(region_sat_df_list, ignore_index=True)
    logger.info(f"Number of detections pre-deduplication: {len(new_fires_df)}")
    new_fires_df.drop_duplicates(inplace=True, ignore_index=True)
    logger.info(f"Number of detections post-deduplication: {len(new_fires_df)}")

    logger.info("Processing WFS data and writing to shapefile")
    new_fires_df.columns = map(str.upper, new_fires_df.columns)
    new_fires_df['ACQ_TIME'] = new_fires_df['ACQ_TIME'].astype(int).astype(str).str.zfill(4)
    new_fires_df['ACQ_DATE'] = new_fires_df['ACQ_DATE'].astype(str)
    confidence_value_map_dict = {
        'h': 'high',
        'n': 'nominal',
        'l': 'low'
    }
    new_fires_df['CONFIDENCE'] = new_fires_df['CONFIDENCE'].map(confidence_value_map_dict)
    new_fires_df['ACQ_DATETIME'] = new_fires_df['ACQ_DATETIME'].apply(str)
    new_fires_df.ACQ_DATETIME = new_fires_df.ACQ_DATETIME.str.replace("\\+00", 'Z', regex=True)
    new_fires_df.ACQ_DATETIME = new_fires_df.ACQ_DATETIME.str.replace(' ', 'T')
    new_fires_df.ACQ_DATETIME = new_fires_df.ACQ_DATETIME.str.replace('/', '-')
    new_fires_df.rename(columns={'ACQ_DATETIME': 'ACQ_DT',
                                 'BRIGHTNESS_2': 'BRIGHT_2'},
                        inplace=True)
    if 'UNNAMED: 1' in new_fires_df.columns:
        columns_to_drop = ['UNNAMED: 1', 'WKT']
    else:
        columns_to_drop = ['WKT']
    new_fires_df.drop(columns=columns_to_drop, inplace=True)
    new_fires_gdf = gpd.GeoDataFrame(new_fires_df,
                                     geometry=gpd.points_from_xy(new_fires_df.LONGITUDE, new_fires_df.LATITUDE))

    # Create a bounding box polygon
    bbox = Polygon([(-118.951721, 32.75004), (-118.951721, 34.823302), (-117.646374, 34.823302), (-117.646374, 32.75004)])

    # Find points within the bounding box
    points_in_bbox = new_fires_gdf[new_fires_gdf.within(bbox)]

    points_in_bbox.crs = 'EPSG:4326'
    output_file_path = os.path.join(config.temp_dir, config.output_shp_filename)
    points_in_bbox.to_file(filename=output_file_path)
    logger.info(f"Shapefile written to: {output_file_path}")

    return


if __name__ == '__main__':
    # User-provided bounding box (example: -90, -180, 90, 180)
    country = 'USA_contiguous_and_Hawaii'  # Example for the contiguous USA
    ingest(country)
