In [1]:
from datetime import datetime, timedelta
from io import StringIO
import geopandas as gpd
import requests
import pandas as pd
from sklearn.cluster import DBSCAN
import simplekml
from shapely.geometry import MultiPoint
from google.cloud import bigquery

def get_firms_data(api_key, bbox, product, days_of_data = 2, date=None):
    '''
    Connect with FIRMS API to access data from a specified date, bbox, product, and range of days
    and return it as a GeoDataFrame. If no date is specified, defaults to today.
    
    :param api_key: str, from NASA email, provided in cron job's request headers
    :param bbox: str, bbox of the region of interest in the format "minLongitude,minLatitude,maxLongitude,maxLatitude", provided in cron job's request headers
    :param date: str, date in '%Y-%m-%d' format. If not provided, defaults to today.
    :return: GeoDataFrame of fire detection data with columns corresponding to the FIRMS API response
    '''
    
    base_url = 'https://firms.modaps.eosdis.nasa.gov/api/area/csv/'

    # # Simplify to get today's worth of data
    # date = datetime.now()  # Get today's date
    # formatted_date = date.strftime('%Y-%m-%d')  # Format date to '%Y-%m-%d'
    # print(formatted_date)
    url = f'{base_url}{api_key}/{product}/{bbox}/{days_of_data}'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception if the request was unsuccessful
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching data: {e}")
    else:
        data = StringIO(response.text)  # Convert text response to file-like object
        df = pd.read_csv(data)  # Read data into a DataFrame


    # Convert the DataFrame to a GeoDataFrame, setting the geometry from the latitude and longitude columns
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

    # Drop unnecessary columns
    columns_to_keep = ['latitude', 'longitude', 'confidence', 'geometry', 'acq_date', 'acq_time']
    gdf = gdf[columns_to_keep]

    # Add a column indicating the product
    gdf['product'] = product

    return gdf

def filter_last_24_hours(gdf):
    """
    Filter the GeoDataFrame to include only rows from the last 24 hours.
    
    :param gdf: GeoDataFrame with 'acq_date' and 'acq_time' columns
    :return: GeoDataFrame with rows from the last 24 hours
    """
    # Convert 'acq_time' to a string and pad it with zeros to ensure it has four digits
    gdf['acq_time'] = gdf['acq_time'].astype(str).str.zfill(4)

    # Extract the hours and minutes from 'acq_time'
    gdf['hour'] = gdf['acq_time'].str[:2]
    gdf['minute'] = gdf['acq_time'].str[2:]

    # Combine 'acq_date', 'hour', and 'minute' into a single datetime column
    gdf['datetime'] = pd.to_datetime(gdf['acq_date'] + ' ' + gdf['hour'] + ':' + gdf['minute'])

    # Sort the GeoDataFrame by 'datetime'
    gdf = gdf.sort_values('datetime')
    print(len(gdf))
    # Get the latest time in the GeoDataFrame
    latest_time = gdf['datetime'].max()
    print(latest_time)
    # Get the time 24 hours before the latest time
    one_day_before_latest = latest_time - pd.Timedelta(days=1)

    # Filter rows from the last 24 hours based on the latest time
    gdf = gdf[gdf['datetime'] >= one_day_before_latest]
    print(len(gdf))
    return gdf


In [17]:

def testing_data_function(csv_file_path):
    '''
    Convert a CSV file to a GeoDataFrame, using the longitude and latitude columns.
    This function is intended for testing data transformations without accessing the FIRMS API.
    
    :param csv_file_path: str, path to the CSV file
    :return: GeoDataFrame of fire detection data with columns corresponding to the FIRMS API response
    '''
    
    try:
        # Read data from the CSV file into a DataFrame
        df = pd.read_csv(csv_file_path)
    except FileNotFoundError:
        print(f"File {csv_file_path} not found.")
        return None
    
    # Convert the DataFrame to a GeoDataFrame, setting the geometry from the latitude and longitude columns
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
    
    # Drop unnecessary columns
    columns_to_keep = ['latitude', 'longitude', 'confidence', 'geometry', 'acq_date', 'acq_time']
    gdf = gdf[columns_to_keep]
    
    return gdf



def convert_landsat_confidence_column(landsat_gdf):
    """
    Convert the 'confidence' column in a Landsat GeoDataFrame to the same system as VIIRS.
    :param landsat_gdf: GeoDataFrame containing Landsat fire data with a 'confidence' column
    :return: GeoDataFrame with the 'confidence' column converted to categorical values (l='low', n='nominal', h='high')
    """
    def categorize_confidence(value):
        if value == 'H':
            return 'h'
        elif value == 'M':
            return 'n'
        else:  # Assuming any value not 'H' or 'M' is 'L'
            return 'l'

    landsat_gdf['confidence'] = landsat_gdf['confidence'].apply(categorize_confidence)
    return landsat_gdf


def convert_modis_confidence_column(modis_gdf):
    """
    Convert the 'confidence' column in a MODIS GeoDataFrame to categorical values based on confidence level.
    This changes the confidence categorization to match VIIRS
    :param modis_gdf: GeoDataFrame containing MODIS fire data with a 'confidence' column
    :return: GeoDataFrame with the 'confidence' column converted to categorical values ('low', 'nominal', 'high')
    """
    def categorize_confidence(value):
        if value > 50:
            return 'h'
        elif 25 < value <= 50:
            return 'n'
        else:
            return 'l'

    modis_gdf['confidence'] = modis_gdf['confidence'].apply(categorize_confidence)
    return modis_gdf

def cluster_fires(gdf, eps=0.01, min_samples=1):
    """
    Given a GeoDataFrame of fire points, create spatial clusters
    :param gdf: GeoDataFrame of fire points
    :param eps: The maximum distance between two samples for one to be considered as in the neighborhood of the other
    :param min_samples: The number of samples in a neighborhood for a point to be considered as a core point
    :return: GeoDataFrame of fire points with an additional column 'label' indicating the cluster each point belongs to
    """

    # Perform DBSCAN clustering
    coords = gdf[['longitude', 'latitude']].values
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(coords)

    # Add cluster labels to the dataframe
    gdf['label'] = db.labels_

    return gdf

def filter_clusters(gdf, min_cluster_size=20, min_high_confidence=2):
    """
    Filter out clusters that have fewer points, and fewer high confidence points, than the two thresholds
    :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
    :param min_cluster_size: Minimum number of points in a cluster for it to be kept
    :param min_high_confidence: Minimum number of high confidence points in a cluster for it to be kept
    :return: GeoDataFrame of fire points in clusters that meet both thresholds
    """

    # Count the number of points in each cluster
    cluster_counts = gdf['label'].value_counts()

    # Count the number of high confidence points in each cluster
    high_confidence_counts = gdf.loc[gdf['confidence'] == 'h']['label'].value_counts()

    # Filter out small clusters and clusters with too few high confidence points
    valid_clusters = cluster_counts[(cluster_counts >= min_cluster_size) & (high_confidence_counts >= min_high_confidence)].index
    gdf = gdf[gdf['label'].isin(valid_clusters)]

    return gdf

def filter_clusters_with_product_confidence(gdf, min_cluster_size=40, required_high_confidence_per_product=3):
    """
    Filter out clusters that have fewer points than the threshold, and ensure at least one high confidence point
    from each product exists within the cluster.
    
    :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
    :param min_cluster_size: Minimum number of points in a cluster for it to be kept
    :param required_high_confidence_per_product: Minimum number of high confidence points from each product in a cluster for it to be kept
    :return: GeoDataFrame of fire points in clusters that meet both thresholds
    """

    # Count the number of points in each cluster
    cluster_counts = gdf['label'].value_counts()

    # Filter out clusters smaller than the minimum size
    valid_clusters_by_size = cluster_counts[cluster_counts >= min_cluster_size].index

    # Filter for high confidence points
    high_confidence_gdf = gdf[gdf['confidence'] == 'h']

    # Ensure at least one high confidence point from each product within the cluster
    valid_clusters_by_product_confidence = high_confidence_gdf.groupby('label')['product'].nunique()
    valid_clusters_by_product_confidence = valid_clusters_by_product_confidence[valid_clusters_by_product_confidence >= required_high_confidence_per_product].index

    # Find the intersection of clusters that meet both criteria
    valid_clusters = set(valid_clusters_by_size) & set(valid_clusters_by_product_confidence)

    # Filter the GeoDataFrame to include only valid clusters
    gdf = gdf[gdf['label'].isin(valid_clusters)]

    return gdf

def create_cluster_polygons(gdf):
    """
    Given a GeoDataFrame of clustered fire points, create a polygon for each cluster
    :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
    :return: Tuple containing the most frequently occurring acquisition date and a GeoJSON string where each feature represents a cluster and the geometry property contains the polygon around the cluster
    """
    # Group the GeoDataFrame by the cluster labels
    grouped = gdf.groupby('label')

    # For each cluster, create a MultiPoint object from the fire points, then create a polygon from the convex hull of the points
    polygons = grouped.apply(lambda df: MultiPoint(df.geometry.tolist()).convex_hull)

    # Create a new GeoDataFrame from the polygons
    polygon_gdf = gpd.GeoDataFrame({'geometry': polygons})

    # Convert the GeoDataFrame to a GeoJSON string
    polygon_geojson = polygon_gdf.to_json()

    # Convert the most frequently occurring acquisition date to datetime
    most_common_acq_date = pd.to_datetime(gdf['datetime'].mode()[0])

    return most_common_acq_date, polygon_geojson


########KML VERSION?#########

def create_cluster_polygons_kml(gdf, output_kml_path):
    """
    Given a GeoDataFrame of clustered fire points, create a KML file with a polygon for each cluster
    and include the most common acquisition date in the name of each polygon.
    
    :param gdf: GeoDataFrame of fire points with 'label' column indicating the cluster each point belongs to
    :param output_kml_path: str, the path where the KML file will be saved
    :return: The most frequently occurring acquisition date
    """
    # Group the GeoDataFrame by the cluster labels
    grouped = gdf.groupby('label')

    # Initialize a simplekml object
    kml = simplekml.Kml()

    # Convert the most frequently occurring acquisition date to datetime
    most_common_acq_date = pd.to_datetime(gdf['acq_date'].mode()[0]).date()

    # For each cluster, create a MultiPoint object from the fire points, then create a polygon from the convex hull of the points
    for label, group in grouped:
        multipoint = MultiPoint(group.geometry.tolist())
        polygon = multipoint.convex_hull

        # Create a polygon in the KML file
        pol = kml.newpolygon(name=f"Cluster {label} - {most_common_acq_date}",
                             outerboundaryis=[(point.x, point.y) for point in polygon.exterior.coords])

        # Optional: Style the polygon
        pol.style.polystyle.color = simplekml.Color.changealphaint(200, simplekml.Color.red)

    # Save the KML file
    kml.save(output_kml_path)

    return most_common_acq_date

def upload_to_bigquery(acq_date, polygon_geojson):
    """
    Uploads the polygon GeoJSON data to BigQuery.

    :param acq_date: The most frequently occurring acquisition date. There will only ever be two dates in the GDF.
    :param polygon_geojson: The GeoJSON string where each feature represents a cluster and the geometry property contains the polygon around the cluster.
    """
    # Initialize a BigQuery client
    client = bigquery.Client()

    # Specify your dataset and table
    dataset_id = 'geojson_predictions'
    table_id = 'combined_firms_mask'

    # Get the table
    table = client.dataset(dataset_id).table(table_id)
    table = client.get_table(table)

    # Convert acq_date to string for bigquery
    acq_date = acq_date.strftime('%Y-%m-%dT%H:%M:%SZ')

    # Prepare the row to be inserted
    row = {
        'prediction_date': acq_date,
        'geojson_mask': polygon_geojson,
        'datetime_added': datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'),  # UTC timestamp of the current moment
    }

    # Insert the row
    errors = client.insert_rows_json(table, [row])

    # Check if any errors occurred
    if errors:
        print('Errors:', errors)
    else:
        print('Row inserted successfully.')

def FIRMS_GEOJSON_UPDATE(request):
    # Extract request parameters from the cron job request
    request_json = request.get_json(silent=True)
    api_key = request_json.get('api_key')
    bbox = request_json.get('bbox')

    # Retrieve VIIRS data using the provided API key and bounding box
    products = ["VIIRS_SNPP_NRT", "VIIRS_NOAA21_NRT", "VIIRS_NOAA20_NRT", "MODIS_NRT"]
    gdfs = [get_firms_data(api_key=api_key, bbox=bbox, product=product) for product in products]
    gdfs = [filter_last_24_hours(gdf) for gdf in gdfs]
    gdfs[3] = convert_modis_confidence_column(gdfs[3])

    combined_gdf = pd.concat(gdfs, ignore_index=True)
    # Cluster the combined data points
    clustered_combined_gdf = cluster_fires(combined_gdf)
    # Filter out small clusters and clusters with too few points or no high confidence point
    # filtered_combined_clusters = filter_clusters(clustered_combined_gdf, min_high_confidence=10)
    filtered_combined_clusters = filter_clusters_with_product_confidence(clustered_combined_gdf, min_cluster_size=50,required_high_confidence_per_product=3)
    # Create a polygon for each cluster
    acq_date, polygon_geojson = create_cluster_polygons(filtered_combined_clusters)

    # Upload the generated polygon GeoJSON to BigQuery
    upload_to_bigquery(acq_date, polygon_geojson)

    return 'Successfully processed and uploaded data', 200


In [3]:
file_paths = [
    "/Users/adamhunter/Documents/school projs/firenet/data/VIIRS_SNPP_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/VIIRS_NOAA21_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/VIIRS_NOAA20_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/MODIS_NRT_sample.csv",
    "/Users/adamhunter/Documents/school projs/firenet/data/LANDSAT_NRT_sample.csv"
]

gdfs = [testing_data_function(file_path) for file_path in file_paths]


In [18]:
products = ["VIIRS_SNPP_NRT", "VIIRS_NOAA21_NRT", "VIIRS_NOAA20_NRT"]
gdfs = [get_firms_data(api_key='58ee6e88ea288308039c476b13723cb7', bbox='world', product=product) for product in products]
gdfs = [filter_last_24_hours(gdf) for gdf in gdfs]


78035
2024-04-17 13:48:00
53875
74523
2024-04-17 14:12:00
41790
78714
2024-04-17 14:08:00
55264


In [9]:
gdfs[3]

Unnamed: 0,latitude,longitude,confidence,geometry,acq_date,acq_time,product,hour,minute,datetime
4956,-6.00032,-48.01727,53,POINT (-48.01727 -6.00032),2024-04-16,1321,MODIS_NRT,13,21,2024-04-16 13:21:00
4957,-1.29614,-47.90541,59,POINT (-47.90541 -1.29614),2024-04-16,1321,MODIS_NRT,13,21,2024-04-16 13:21:00
4967,-8.39167,-44.05327,17,POINT (-44.05327 -8.39167),2024-04-16,1323,MODIS_NRT,13,23,2024-04-16 13:23:00
4973,-7.17791,-45.79205,36,POINT (-45.79205 -7.17791),2024-04-16,1323,MODIS_NRT,13,23,2024-04-16 13:23:00
4972,-7.55270,-44.53049,51,POINT (-44.53049 -7.55270),2024-04-16,1323,MODIS_NRT,13,23,2024-04-16 13:23:00
...,...,...,...,...,...,...,...,...,...,...
12610,42.62333,2.97323,66,POINT (2.97323 42.62333),2024-04-17,1312,MODIS_NRT,13,12,2024-04-17 13:12:00
12611,42.72911,2.95527,55,POINT (2.95527 42.72911),2024-04-17,1312,MODIS_NRT,13,12,2024-04-17 13:12:00
12612,51.03413,2.29863,50,POINT (2.29863 51.03413),2024-04-17,1314,MODIS_NRT,13,14,2024-04-17 13:14:00
12613,51.48260,6.73551,50,POINT (6.73551 51.48260),2024-04-17,1314,MODIS_NRT,13,14,2024-04-17 13:14:00


In [10]:

gdfs[3] = convert_modis_confidence_column(gdfs[3])

In [37]:
combined_gdf = pd.concat(gdfs, ignore_index=True)
# Cluster the combined data points
clustered_combined_gdf = cluster_fires(combined_gdf)
# Filter out small clusters and clusters with too few points or no high confidence point
# filtered_combined_clusters = filter_clusters(clustered_combined_gdf, min_high_confidence=10)
filtered_combined_clusters = filter_clusters_with_product_confidence(clustered_combined_gdf, min_cluster_size=15,required_high_confidence_per_product=1)
# Create a polygon for each cluster
acq_date_combined, polygon_geojson_combined = create_cluster_polygons(filtered_combined_clusters)

polygon_geojson_combined


'{"type": "FeatureCollection", "features": [{"id": "2", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-12.23584, 10.04889], [-12.26246, 10.06411], [-12.26327, 10.06597], [-12.26397, 10.06942], [-12.22784, 10.10338], [-12.22737, 10.09956], [-12.22746, 10.08587], [-12.22895, 10.07608], [-12.22916, 10.07501], [-12.23584, 10.04889]]]}}, {"id": "53", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-13.04076, 10.08877], [-13.04527, 10.09173], [-13.04559, 10.09536], [-13.04585, 10.09899], [-13.0309, 10.10876], [-13.0328, 10.09374], [-13.0366, 10.08945], [-13.04076, 10.08877]]]}}, {"id": "55", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-12.78401, 10.35075], [-12.78531, 10.35388], [-12.78196, 10.35804], [-12.77857, 10.36221], [-12.77447, 10.36287], [-12.77131, 10.35629], [-12.77523, 10.35197], [-12.78401, 10.35075]]]}}, {"id": "216", "type": "Feature", "properties": {}, "g

In [38]:
filtered_combined_clusters

Unnamed: 0,latitude,longitude,confidence,geometry,acq_date,acq_time,product,hour,minute,datetime,label
2,10.05728,-12.24180,l,POINT (-12.24180 10.05728),2024-04-16,1357,VIIRS_SNPP_NRT,13,57,2024-04-16 13:57:00,2
20,10.06541,-12.23526,n,POINT (-12.23526 10.06541),2024-04-16,1357,VIIRS_SNPP_NRT,13,57,2024-04-16 13:57:00,2
21,10.06074,-12.24237,l,POINT (-12.24237 10.06074),2024-04-16,1357,VIIRS_SNPP_NRT,13,57,2024-04-16 13:57:00,2
24,10.06687,-12.23562,l,POINT (-12.23562 10.06687),2024-04-16,1357,VIIRS_SNPP_NRT,13,57,2024-04-16 13:57:00,2
25,10.06660,-12.25937,h,POINT (-12.25937 10.06660),2024-04-16,1357,VIIRS_SNPP_NRT,13,57,2024-04-16 13:57:00,2
...,...,...,...,...,...,...,...,...,...,...,...
150923,15.17688,-11.38487,l,POINT (-11.38487 15.17688),2024-04-17,1408,VIIRS_NOAA20_NRT,14,08,2024-04-17 14:08:00,10752
150924,15.17864,-11.39649,l,POINT (-11.39649 15.17864),2024-04-17,1408,VIIRS_NOAA20_NRT,14,08,2024-04-17 14:08:00,10752
150925,15.17917,-11.39279,l,POINT (-11.39279 15.17917),2024-04-17,1408,VIIRS_NOAA20_NRT,14,08,2024-04-17 14:08:00,10752
150926,15.17971,-11.38909,l,POINT (-11.38909 15.17971),2024-04-17,1408,VIIRS_NOAA20_NRT,14,08,2024-04-17 14:08:00,10752


In [39]:
import simplekml
import json
from google.cloud import storage
from datetime import datetime

def convert_geojson_to_kml_and_upload(geojson_string, bucket_name='popex_active_fire_kmls'):
    # Convert the GeoJSON string to a Python dictionary
    geojson = json.loads(geojson_string)
    
    # Create a new KML object
    kml = simplekml.Kml()
    
    # Iterate over each feature in the GeoJSON and add it to the KML
    for feature in geojson.get('features', []):
        geometry_type = feature['geometry']['type']
        coordinates = feature['geometry']['coordinates']
        
        if geometry_type == 'Polygon':
            pol = kml.newpolygon(name=feature.get('id', 'No ID'))
            pol.outerboundaryis = coordinates[0]  # Assuming no holes
        
    kml_file_name = f"firms_polygons_{acq_date_combined}.kml"
    
    # Save the KML to a temporary file
    temp_kml_path = f"/tmp/{kml_file_name}"
    kml.save(temp_kml_path)
    
    # Upload the KML file to the specified Google Cloud Storage bucket
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(kml_file_name)
    
    blob.upload_from_filename(temp_kml_path)
    
    print(f"Uploaded {kml_file_name} to {bucket_name}.")



In [40]:
# convert_geojson_to_kml_and_upload(polygon_geojson_combined)

In [41]:
import json
import folium

# Convert the GeoJSON string to a dictionary
polygon_geojson_combined_dict = json.loads(polygon_geojson_combined)

# Create a map centered at an average location
map_center = [filtered_combined_clusters['latitude'].mean(), filtered_combined_clusters['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=5)

# Add polygons to the map
for feature in polygon_geojson_combined_dict['features']:
    folium.GeoJson(feature,
                   style_function=lambda x: {'fillColor': 'orange', 'color': 'orange'}).add_to(m)
m

In [42]:
# class MockRequest:
#     def __init__(self, json_data):
#         self.json_data = json_data

#     def get_json(self, silent=False):
#         return self.json_data

# # Simulate passing JSON arguments to the Google Cloud Function
# request_json = {
#     "bbox": "world",
#     "api_key": "58ee6e88ea288308039c476b13723cb7"
# }

# # Create a mock request object with your JSON data
# mock_request = MockRequest(request_json)

# # Now call your function with the mock request
# FIRMS_GEOJSON_UPDATE(mock_request)