In [10]:
from datetime import datetime, timedelta
from io import StringIO
import geopandas as gpd
import requests
import pandas as pd
from sklearn.cluster import DBSCAN
import simplekml
from shapely.geometry import MultiPoint
from google.cloud import bigquery

def get_firms_data(api_key, bbox, product, days_of_data = 2, date=None):
    '''
    Connect with FIRMS API to access data from a specified date, bbox, product, and range of days
    and return it as a GeoDataFrame. If no date is specified, defaults to today.
    
    :param api_key: str, from NASA email, provided in cron job's request headers
    :param bbox: str, bbox of the region of interest in the format "minLongitude,minLatitude,maxLongitude,maxLatitude", provided in cron job's request headers
    :param date: str, date in '%Y-%m-%d' format. If not provided, defaults to today.
    :return: GeoDataFrame of fire detection data with columns corresponding to the FIRMS API response
    '''
    
    base_url = 'https://firms.modaps.eosdis.nasa.gov/api/area/csv/'

    # # Simplify to get today's worth of data
    # date = datetime.now()  # Get today's date
    # formatted_date = date.strftime('%Y-%m-%d')  # Format date to '%Y-%m-%d'
    # print(formatted_date)
    url = f'{base_url}{api_key}/{product}/{bbox}/{days_of_data}'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception if the request was unsuccessful
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching data: {e}")
    else:
        data = StringIO(response.text)  # Convert text response to file-like object
        df = pd.read_csv(data)  # Read data into a DataFrame


    # Convert the DataFrame to a GeoDataFrame, setting the geometry from the latitude and longitude columns
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

    # Drop unnecessary columns
    columns_to_keep = ['latitude', 'longitude', 'confidence', 'geometry', 'acq_date', 'acq_time']
    gdf = gdf[columns_to_keep]

    return gdf

def filter_last_24_hours(gdf):
    """
    Filter the GeoDataFrame to include only rows from the last 24 hours.
    
    :param gdf: GeoDataFrame with 'acq_date' and 'acq_time' columns
    :return: GeoDataFrame with rows from the last 24 hours
    """
    # Convert 'acq_time' to a string and pad it with zeros to ensure it has four digits
    gdf['acq_time'] = gdf['acq_time'].astype(str).str.zfill(4)

    # Extract the hours and minutes from 'acq_time'
    gdf['hour'] = gdf['acq_time'].str[:2]
    gdf['minute'] = gdf['acq_time'].str[2:]

    # Combine 'acq_date', 'hour', and 'minute' into a single datetime column
    gdf['datetime'] = pd.to_datetime(gdf['acq_date'] + ' ' + gdf['hour'] + ':' + gdf['minute'])

    # Sort the GeoDataFrame by 'datetime'
    gdf = gdf.sort_values('datetime')
    print(len(gdf))
    # Get the latest time in the GeoDataFrame
    latest_time = gdf['datetime'].max()
    print(latest_time)
    # Get the time 24 hours before the latest time
    one_day_before_latest = latest_time - pd.Timedelta(days=1)

    # Filter rows from the last 24 hours based on the latest time
    gdf = gdf[gdf['datetime'] >= one_day_before_latest]
    print(len(gdf))
    return gdf


In [19]:

def testing_data_function(csv_file_path):
    '''
    Convert a CSV file to a GeoDataFrame, using the longitude and latitude columns.
    This function is intended for testing data transformations without accessing the FIRMS API.
    
    :param csv_file_path: str, path to the CSV file
    :return: GeoDataFrame of fire detection data with columns corresponding to the FIRMS API response
    '''
    
    try:
        # Read data from the CSV file into a DataFrame
        df = pd.read_csv(csv_file_path)
    except FileNotFoundError:
        print(f"File {csv_file_path} not found.")
        return None
    
    # Convert the DataFrame to a GeoDataFrame, setting the geometry from the latitude and longitude columns
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
    
    # Drop unnecessary columns
    columns_to_keep = ['latitude', 'longitude', 'confidence', 'geometry', 'acq_date', 'acq_time']
    gdf = gdf[columns_to_keep]
    
    return gdf



def convert_landsat_confidence_column(landsat_gdf):
    """
    Convert the 'confidence' column in a Landsat GeoDataFrame to the same system as VIIRS.
    :param landsat_gdf: GeoDataFrame containing Landsat fire data with a 'confidence' column
    :return: GeoDataFrame with the 'confidence' column converted to categorical values (l='low', n='nominal', h='high')
    """
    def categorize_confidence(value):
        if value == 'H':
            return 'h'
        elif value == 'M':
            return 'n'
        else:  # Assuming any value not 'H' or 'M' is 'L'
            return 'l'

    landsat_gdf['confidence'] = landsat_gdf['confidence'].apply(categorize_confidence)
    return landsat_gdf


def convert_modis_confidence_column(modis_gdf):
    """
    Convert the 'confidence' column in a MODIS GeoDataFrame to categorical values based on confidence level.
    This changes the confidence categorization to match VIIRS
    :param modis_gdf: GeoDataFrame containing MODIS fire data with a 'confidence' column
    :return: GeoDataFrame with the 'confidence' column converted to categorical values ('low', 'nominal', 'high')
    """
    def categorize_confidence(value):
        if value > 50:
            return 'h'
        elif 25 < value <= 50:
            return 'n'
        else:
            return 'l'

    modis_gdf['confidence'] = modis_gdf['confidence'].apply(categorize_confidence)
    return modis_gdf

def cluster_fires(gdf, eps=0.01, min_samples=1):
    """
    Given a GeoDataFrame of fire points, create spatial clusters
    :param gdf: GeoDataFrame of fire points
    :param eps: The maximum distance between two samples for one to be considered as in the neighborhood of the other
    :param min_samples: The number of samples in a neighborhood for a point to be considered as a core point
    :return: GeoDataFrame of fire points with an additional column 'label' indicating the cluster each point belongs to
    """

    # Perform DBSCAN clustering
    coords = gdf[['longitude', 'latitude']].values
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(coords)

    # Add cluster labels to the dataframe
    gdf['label'] = db.labels_

    return gdf

def filter_clusters(gdf, min_cluster_size=20, min_high_confidence=2):
    """
    Filter out clusters that have fewer points, and fewer high confidence points, than the two thresholds
    :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
    :param min_cluster_size: Minimum number of points in a cluster for it to be kept
    :param min_high_confidence: Minimum number of high confidence points in a cluster for it to be kept
    :return: GeoDataFrame of fire points in clusters that meet both thresholds
    """

    # Count the number of points in each cluster
    cluster_counts = gdf['label'].value_counts()

    # Count the number of high confidence points in each cluster
    high_confidence_counts = gdf.loc[gdf['confidence'] == 'h']['label'].value_counts()

    # Filter out small clusters and clusters with too few high confidence points
    valid_clusters = cluster_counts[(cluster_counts >= min_cluster_size) & (high_confidence_counts >= min_high_confidence)].index
    gdf = gdf[gdf['label'].isin(valid_clusters)]

    return gdf

def create_cluster_polygons(gdf):
    """
    Given a GeoDataFrame of clustered fire points, create a polygon for each cluster
    :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
    :return: Tuple containing the most frequently occurring acquisition date and a GeoJSON string where each feature represents a cluster and the geometry property contains the polygon around the cluster
    """
    # Group the GeoDataFrame by the cluster labels
    grouped = gdf.groupby('label')

    # For each cluster, create a MultiPoint object from the fire points, then create a polygon from the convex hull of the points
    polygons = grouped.apply(lambda df: MultiPoint(df.geometry.tolist()).convex_hull)

    # Create a new GeoDataFrame from the polygons
    polygon_gdf = gpd.GeoDataFrame({'geometry': polygons})

    # Convert the GeoDataFrame to a GeoJSON string
    polygon_geojson = polygon_gdf.to_json()

    # Convert the most frequently occurring acquisition date to datetime
    most_common_acq_date = pd.to_datetime(gdf['acq_date'].mode()[0])

    return most_common_acq_date, polygon_geojson


########KML VERSION?#########

# def create_cluster_polygons(gdf, output_kml_path):
#     """
#     Given a GeoDataFrame of clustered fire points, create a KML file with a polygon for each cluster
#     and include the most common acquisition date in the name of each polygon.
    
#     :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
#     :param output_kml_path: str, the path where the KML file will be saved
#     :return: The most frequently occurring acquisition date
#     """
#     # Group the GeoDataFrame by the cluster labels
#     grouped = gdf.groupby('label')

#     # Initialize a simplekml object
#     kml = simplekml.Kml()

#     # Convert the most frequently occurring acquisition date to datetime
#     most_common_acq_date = pd.to_datetime(gdf['acq_date'].mode()[0]).date()

#     # For each cluster, create a MultiPoint object from the fire points, then create a polygon from the convex hull of the points
#     for label, group in grouped:
#         multipoint = MultiPoint(group.geometry.tolist())
#         polygon = multipoint.convex_hull

#         # Create a polygon in the KML file
#         pol = kml.newpolygon(name=f"Cluster {label} - {most_common_acq_date}",
#                              outerboundaryis=[(point.x, point.y) for point in polygon.exterior.coords])

#         # Optional: Style the polygon
#         pol.style.polystyle.color = simplekml.Color.changealphaint(200, simplekml.Color.red)

#     # Save the KML file
#     kml.save(output_kml_path)

#     return most_common_acq_date

def upload_to_bigquery(acq_date, polygon_geojson):
    """
    Uploads the polygon GeoJSON data to BigQuery.

    :param acq_date: The most frequently occurring acquisition date. There will only ever be two dates in the GDF.
    :param polygon_geojson: The GeoJSON string where each feature represents a cluster and the geometry property contains the polygon around the cluster.
    """
    # Initialize a BigQuery client
    client = bigquery.Client()

    # Specify your dataset and table
    dataset_id = 'geojson_predictions'
    table_id = 'combined_firms_mask'

    # Get the table
    table = client.dataset(dataset_id).table(table_id)
    table = client.get_table(table)

    # Convert acq_date to string for bigquery
    acq_date = acq_date.strftime('%Y-%m-%dT%H:%M:%SZ')

    # Prepare the row to be inserted
    row = {
        'prediction_date': acq_date,
        'viirs_mask_geojson': polygon_geojson,
        'datetime_added': datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'),  # UTC timestamp of the current moment
    }

    # Insert the row
    errors = client.insert_rows_json(table, [row])

    # Check if any errors occurred
    if errors:
        print('Errors:', errors)
    else:
        print('Row inserted successfully.')

def VIIRS_GEOJSON_UPDATE(request):
    # Get the request parameters from the cron job request that is sent to the cloud funtion
    # The GCP cron job is where the API key and bbox are specified
    request_json = request.get_json(silent=True)

    api_key = request_json['api_key']
    bbox = request_json['bbox']
    # Delete request_json as it's no longer needed
    del request_json

    # Get the VIIRS data
    viirs_data = get_viirs_data(api_key, bbox)

    # Filter out points from the last 24 hours
    viirs_data = filter_last_24_hours(viirs_data)

    # Cluster the fire points
    clustered_fires = cluster_fires(viirs_data)
    # Delete viirs_data as it's no longer needed
    del viirs_data

    # Filter out small clusters and clusters with too few high confidence points
    filtered_clusters = filter_clusters(clustered_fires)
    # Delete clustered_fires as it's no longer needed
    del clustered_fires

    # Create a polygon for each cluster
    acq_date, polygon_geojson = create_cluster_polygons(filtered_clusters)
    # Delete filtered_clusters as it's no longer needed
    del filtered_clusters

    # Upload the polygon to BigQuery
    upload_to_bigquery(acq_date, polygon_geojson)
    # Delete acq_date and polygon_geojson as they're no longer needed
    del acq_date, polygon_geojson

    return 'Successfully processed and uploaded data', 200

In [12]:
file_paths = [
    "/Users/adamhunter/Documents/school projs/firenet/data/VIIRS_SNPP_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/VIIRS_NOAA21_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/VIIRS_NOAA20_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/MODIS_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/LANDSAT_NRT_sample.csv"
]

gdfs = [testing_data_function(file_path) for file_path in file_paths]


In [35]:
products = ["VIIRS_SNPP_NRT", "VIIRS_NOAA21_NRT", "VIIRS_NOAA20_NRT", "MODIS_NRT"]
gdfs = [get_firms_data(api_key='58ee6e88ea288308039c476b13723cb7', bbox='world', product=product) for product in products]
gdfs = [filter_last_24_hours(gdf) for gdf in gdfs]


Error occurred while fetching data: 403 Client Error: Forbidden for url: https://firms.modaps.eosdis.nasa.gov/api/area/csv/58ee6e88ea288308039c476b13723cb7/MODIS_NRT/world/2


UnboundLocalError: cannot access local variable 'df' where it is not associated with a value

In [None]:
gdfs[4]

Unnamed: 0,latitude,longitude,confidence,geometry,acq_date,acq_time,hour,minute,datetime
1538,58.197173,-116.389199,M,POINT (-116.38920 58.19717),2024-03-27,1852,18,52,2024-03-27 18:52:00
1543,58.197445,-116.389704,H,POINT (-116.38970 58.19745),2024-03-27,1852,18,52,2024-03-27 18:52:00
1542,58.197442,-116.389194,L,POINT (-116.38919 58.19744),2024-03-27,1852,18,52,2024-03-27 18:52:00
1541,58.197435,-116.387663,H,POINT (-116.38766 58.19743),2024-03-27,1852,18,52,2024-03-27 18:52:00
1540,58.197432,-116.387153,H,POINT (-116.38715 58.19743),2024-03-27,1852,18,52,2024-03-27 18:52:00
...,...,...,...,...,...,...,...,...,...
6819,39.149510,-122.251184,H,POINT (-122.25118 39.14951),2024-03-28,1851,18,51,2024-03-28 18:51:00
6818,39.149507,-122.250837,H,POINT (-122.25084 39.14951),2024-03-28,1851,18,51,2024-03-28 18:51:00
6817,39.149505,-122.250490,H,POINT (-122.25049 39.14950),2024-03-28,1851,18,51,2024-03-28 18:51:00
6830,39.149789,-122.252570,H,POINT (-122.25257 39.14979),2024-03-28,1851,18,51,2024-03-28 18:51:00


In [None]:

gdfs[3] = convert_modis_confidence_column(gdfs[3])

In [None]:
combined_gdf = pd.concat(gdfs, ignore_index=True)
# Cluster the combined data points
clustered_combined_gdf = cluster_fires(combined_gdf)
# Filter out small clusters and clusters with too few points or no high confidence point
filtered_combined_clusters = filter_clusters(clustered_combined_gdf, min_high_confidence=10)
# Create a polygon for each cluster
acq_date_combined, polygon_geojson_combined = create_cluster_polygons(filtered_combined_clusters)

polygon_geojson_combined


'{"type": "FeatureCollection", "features": [{"id": "254", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-69.95877, 6.74098], [-69.96222, 6.74152], [-69.96825, 6.74782], [-69.96896, 6.75313], [-69.95418, 6.78744], [-69.95005, 6.79232], [-69.93665, 6.79045], [-69.90966, 6.76074], [-69.91018, 6.75744], [-69.91451, 6.75468], [-69.93123, 6.74735], [-69.95877, 6.74098]]]}}, {"id": "1032", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-68.06816, 9.27557], [-68.07787, 9.27952], [-68.08348, 9.28513], [-68.08278, 9.33421], [-68.08051, 9.34852], [-68.07536, 9.3541], [-68.0705, 9.35469], [-68.06287, 9.31938], [-68.06413, 9.28757], [-68.06816, 9.27557]]]}}, {"id": "1253", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.86449, 18.01477], [-90.86875, 18.01982], [-90.86962, 18.02125], [-90.87404, 18.03249], [-90.87388, 18.05196], [-90.85578, 18.05546], [-90.85478, 18.05461], [-9

In [None]:
filtered_combined_clusters

Unnamed: 0,latitude,longitude,confidence,geometry,acq_date,acq_time,hour,minute,datetime,label
581,6.743820,-69.957660,n,POINT (-69.95766 6.74382),2024-03-27,1836,18,36,2024-03-27 18:36:00,254
582,6.744260,-69.954370,n,POINT (-69.95437 6.74426),2024-03-27,1836,18,36,2024-03-27 18:36:00,254
583,6.744700,-69.951070,n,POINT (-69.95107 6.74470),2024-03-27,1836,18,36,2024-03-27 18:36:00,254
584,6.745140,-69.947760,h,POINT (-69.94776 6.74514),2024-03-27,1836,18,36,2024-03-27 18:36:00,254
585,6.745580,-69.944470,h,POINT (-69.94447 6.74558),2024-03-27,1836,18,36,2024-03-27 18:36:00,254
...,...,...,...,...,...,...,...,...,...,...
156445,39.149512,-122.251531,h,POINT (-122.25153 39.14951),2024-03-28,1851,18,51,2024-03-28 18:51:00,29555
156446,39.149510,-122.251184,h,POINT (-122.25118 39.14951),2024-03-28,1851,18,51,2024-03-28 18:51:00,29555
156447,39.149507,-122.250837,h,POINT (-122.25084 39.14951),2024-03-28,1851,18,51,2024-03-28 18:51:00,29555
156448,39.149505,-122.250490,h,POINT (-122.25049 39.14950),2024-03-28,1851,18,51,2024-03-28 18:51:00,29555


In [None]:
import json
import folium

# Convert the GeoJSON string to a dictionary
polygon_geojson_combined_dict = json.loads(polygon_geojson_combined)

# Create a map centered at an average location
map_center = [filtered_combined_clusters['latitude'].mean(), filtered_combined_clusters['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=5)

# Add polygons to the map
for feature in polygon_geojson_combined_dict['features']:
    folium.GeoJson(feature,
                   style_function=lambda x: {'fillColor': 'orange', 'color': 'orange'}).add_to(m)
m