In [None]:
 #%conda env update -f env.yml

[![Open In SageMaker Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/SatelliteVu/SatelliteVu-AWS-Disaster-Response-Hackathon/blob/main/dataset_preparation/2_PredictionDataCreation.ipynb)

In [1]:
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
import os
from pathlib import Path
from random import sample
import shutil
import subprocess

import geopandas as gpd
import numpy as np
import pandas as pd
import requests
import s3fs
from shapely.geometry import box
import rasterio
from rasterio.errors import RasterioIOError

In [2]:
from src.constants import FIRMS_API_KEY, DEFAULT_PARAMS
from src.data_sources import (cluster_fires, 
                              create_chip_bounds, 
                              ndvi_from_topleft, 
                              landcover_from_topleft, 
                              atmospheric_from_topleft, 
                              fires_from_topleft,
                              elevation_from_topleft)

In [None]:

if not FIRMS_API_KEY:
    raise ValueError('FIRMS_API_KEY empty, please ensure your environment variable set')

# Input parameters

In [17]:
output_fp = '/home/studio-lab-user/sagemaker-studiolab-notebooks/hackathon_data/predict_test'
# os.environ['AWS_S3_BUCKET'] = 's3://wildfireneu'
# output_s3 = os.environ['AWS_S3_BUCKET'] + '/predict_test_2'

# North America
bbox= [-168.7,24.8,-51.8,74.2]

# dates to search for fires
begin_date = datetime(2021,12,1)
end_date   = datetime(2021,12,15)

In [None]:

fs = s3fs.S3FileSystem(key=os.environ['AWS_ACCESS_KEY_ID'], secret=os.environ['AWS_SECRET_ACCESS_KEY'])
print("done")

done


# Load fire data

In [24]:
def load_fires(begin_date, end_date, bbox, day_range=10):
    """
    Given input parameters, search NASA firms API for fires and return GeoDataFrame of fire points
    :param begin_date: datetime
    :param end_date: datetime
    :param bbox: list of floats; left, bottom, right, top
    :param day_range: int for the number of days to search API for
    :return: gpd.GeoDataFrame of fire points
    """
    if day_range > 10:
        # firms api allows max of 10 day range see https://firms.modaps.eosdis.nasa.gov/api/area/
        raise ValueError('"day_range" must be less than or equal to 10')

    start_dates = (pd.date_range(start=begin_date, end=end_date, freq=f"{day_range}D") + pd.Timedelta(f'{day_range}d')).to_pydatetime().tolist()
    if len(start_dates) == 0:
        raise ValueError('No dates to search for, check "begin_date" and "end_date" are formated correctly')
        
    # get min date of the VIIRS_SNPP_NRT so that we can decide based on the date range which FIRMS product(s) we need
    viirs_nrt = pd.read_csv(f'https://firms.modaps.eosdis.nasa.gov/api/data_availability/csv/{FIRMS_API_KEY}/VIIRS_SNPP_NRT')
    viirs_nrt_start = datetime.strptime(viirs_nrt.iloc[0].min_date, '%Y-%m-%d')

    df_fires = pd.DataFrame()
    for start_date in start_dates:
        mapkey_status = requests.get(f'https://firms.modaps.eosdis.nasa.gov/mapserver/mapkey_status/?MAP_KEY={FIRMS_API_KEY}')
        if mapkey_status.json()['current_transactions'] > 460:
            # TODO: tenacity retry wait_exponential:
            raise ValueError('Not enough free transactions left with FIRMS API for given key')

        # split requests by date for VIIRS_SNPP_SP/VIIRS_SNPP_NRT
        if start_date > viirs_nrt_start:
            nrt_request_url = f'https://firms.modaps.eosdis.nasa.gov/api/area/csv/{FIRMS_API_KEY}/VIIRS_SNPP_NRT/{",".join([str(i) for i in bbox])}/{day_range}/{start_date.strftime("%Y-%m-%d")}'
            df_fires=df_fires.append(pd.read_csv(nrt_request_url),ignore_index=True)
        if (start_date - timedelta(days=day_range)) < viirs_nrt_start:
            sp_request_url = f'https://firms.modaps.eosdis.nasa.gov/api/area/csv/{FIRMS_API_KEY}/VIIRS_SNPP_SP/{",".join([str(i) for i in bbox])}/{day_range}/{start_date.strftime("%Y-%m-%d")}'
            df_fires=df_fires.append(pd.read_csv(sp_request_url),ignore_index=True)

    # drop fires after end_date
    df_fires = df_fires[((df_fires['acq_date'].astype('datetime64[ns]') <= end_date) & (df_fires['acq_date'].astype('datetime64[ns]') >= begin_date))]
    gdf_fires = gpd.GeoDataFrame(df_fires, geometry=gpd.points_from_xy(df_fires.longitude, df_fires.latitude), crs='EPSG:4326')
    return gdf_fires

In [25]:
gdf_fires = load_fires(begin_date, end_date, bbox)
gdf_fires

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): firms.modaps.eosdis.nasa.gov:443
DEBUG:urllib3.connectionpool:https://firms.modaps.eosdis.nasa.gov:443 "GET /mapserver/mapkey_status/?MAP_KEY=8b3579dbaf217e12dc49960662286f22 HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): firms.modaps.eosdis.nasa.gov:443
DEBUG:urllib3.connectionpool:https://firms.modaps.eosdis.nasa.gov:443 "GET /mapserver/mapkey_status/?MAP_KEY=8b3579dbaf217e12dc49960662286f22 HTTP/1.1" 200 None
DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name


Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type,geometry
0,41.61243,-87.32779,318.28,0.41,0.45,2021-12-11,717,N,VIIRS,n,2,284.21,2.49,N,3,POINT (-87.32779 41.61243)
1,41.61656,-87.32710,305.80,0.41,0.45,2021-12-11,717,N,VIIRS,n,2,283.73,1.58,N,3,POINT (-87.32710 41.61656)
2,41.61703,-87.33204,316.36,0.41,0.45,2021-12-11,717,N,VIIRS,n,2,283.63,1.58,N,3,POINT (-87.33204 41.61703)
3,41.62444,-87.14648,300.47,0.40,0.44,2021-12-11,717,N,VIIRS,n,2,283.55,0.73,N,0,POINT (-87.14648 41.62444)
4,41.62857,-87.36546,313.27,0.41,0.45,2021-12-11,717,N,VIIRS,n,2,281.88,1.46,N,3,POINT (-87.36546 41.62857)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3459,45.57653,-118.59241,331.43,0.39,0.36,2021-12-15,2049,N,VIIRS,n,2,274.07,12.20,D,0,POINT (-118.59241 45.57653)
3460,45.57795,-118.58245,327.51,0.39,0.36,2021-12-15,2049,N,VIIRS,n,2,274.66,15.83,D,0,POINT (-118.58245 45.57795)
3461,46.07195,-118.89924,327.61,0.39,0.36,2021-12-15,2049,N,VIIRS,n,2,280.98,2.89,D,0,POINT (-118.89924 46.07195)
3462,46.07525,-118.90021,328.44,0.39,0.36,2021-12-15,2049,N,VIIRS,n,2,280.82,2.89,D,0,POINT (-118.90021 46.07525)


In [26]:
# create clusters
print('Clustering fires')
df_fire_clustered = cluster_fires(gdf_fires)

# create chip bounds
print('Creating chip bounds')
Path(output_fp).mkdir(parents=True, exist_ok=True)
manifest = create_chip_bounds(df_fire_clustered)
manifest

Clustering fires
Creating chip bounds


Unnamed: 0,idx,left,bottom,right,top,epsg,date
0,0,274125.0,3922875.0,306000.0,3954750.0,32614,2021-12-15
1,1,738000.0,3933375.0,769875.0,3965250.0,32613,2021-12-15
2,2,247500.0,4047750.0,279375.0,4079625.0,32614,2021-12-15
3,3,546750.0,4059750.0,578625.0,4091625.0,32614,2021-12-15


# Process Chips

In [None]:
def process_chip(chip, fs, output_fp, output_s3, fires,  training=True):
    """
    Given a chips metadata, load all of the training data and write numpy files, finally upload results to S3
    :param chip: records.csv chip to process data for
    :param fs: s3fs.S3FileSystem
    :param output_fp: local directory to write data to
    :param output_s3: local directory to write data to
    :param fires: gpd.GeoDataFrame or path to vector file containing fire point data
    :param cog_footprints: gpd.GeoDataFrame of the dem footprints
    :param training: bool, if true then will load/write next days fires
    """
    print("start processing")
    
    chip_idx, left, bottom, top, right, epsg, chip_date = chip["idx"], chip["left"], chip["bottom"], chip["top"], chip["right"], chip["epsg"], chip["date"]
    print("chip read completed")
#     if os.path.exists(output_fp + f'/{chip_idx}'):
#         return
    
#     # check not already on s3
#     if len(fs.ls(f'{output_s3}/{chip_idx}')) != 0:
#         return
    
    print(f'Processing chip: {chip_idx}')
    
    # create output dir if it doesnt already exist
    output_dir = Path(output_fp).joinpath(str(chip_idx))
    output_dir.mkdir(parents=True, exist_ok=True)

    # # load modis
    # try:
    #     ndvi = ndvi_from_topleft([top, left], epsg, chip_date)
    #     np.save(output_dir.joinpath('ndvi.npy'), ndvi)
    #     print("ndvi completed")
    # except RasterioIOError:
    #     # modis missing from bucket
    #     shutil.rmtree(output_dir)
    #     print("failed to load ndvi")
    
    
    # Fetch NDVI data
    try:
        print(f"Fetching NDVI data for chip {chip_idx}")
        ndvi = ndvi_from_topleft([top, left], epsg, chip_date,resolution=375)

        if ndvi is not None:
            np.save(output_dir.joinpath('ndvi.npy'), ndvi)
            print("NDVI data completed")
        else:
            print("No NDVI data available for this chip")
    except Exception as e:
        print(f"Error fetching NDVI data: {e}")    
    
    
    # save bbox to geojson
    bounds_utm = rasterio.coords.BoundingBox(left=left, right=right, bottom=bottom, top=top)
    gpd.GeoSeries([box(*bounds_utm)]).set_crs(epsg).to_file(output_dir.joinpath('bbox.geojson'))
    
    # # load fires
    # todays_fires = fires_from_topleft([top, left], epsg, chip_date, fires=fires)
    # np.save(output_dir.joinpath(f'todays_fires.npy'), todays_fires.bool)
    # np.save(output_dir.joinpath(f'todays_frp.npy'), todays_fires.frp)
    # print("today fire completed")
    
    
#     # load fires
#     todays_day_fires = fires_from_topleft([top, left], epsg, chip_date, fires=fires[fire["daynight"] == "D"')
#     np.save(output_dir.joinpath(f'todays_fires.npy'), todays_fires.bool)
#     np.save(output_dir.joinpath(f'todays_frp.npy'), todays_fires.frp)
#     print("today fire completed") 
    
    
#     # load tomorrows fires if training
    
#     if training:
#         tomorrows_date = (datetime.strptime(chip_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')
#         tomorrows_fires = fires_from_topleft([top, left], epsg, tomorrows_date, fires=fires)
#         np.save(output_dir.joinpath(f'tomorrows_fires.npy'), tomorrows_fires.bool)
#         np.save(output_dir.joinpath(f'tomorrows_frp.npy'), tomorrows_fires.frp)
    
    # Load today's day fires
    todays_day_fires = fires_from_topleft([top, left], epsg, chip_date, fires=fires[fires["daynight"] == "D"])
    np.save(output_dir.joinpath(f'todays_day_fires.npy'), todays_day_fires.bool)
    np.save(output_dir.joinpath(f'todays_day_frp.npy'), todays_day_fires.frp)
    print("Today's day fire data completed")

    # Load today's night fires
    todays_night_fires = fires_from_topleft([top, left], epsg, chip_date, fires=fires[fires["daynight"] == "N"])
    np.save(output_dir.joinpath(f'todays_night_fires.npy'), todays_night_fires.bool)
    np.save(output_dir.joinpath(f'todays_night_frp.npy'), todays_night_fires.frp)
    print("Today's night fire data completed")

    # Load tomorrow's fires if training
    if training:
        tomorrows_date = (datetime.strptime(chip_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')

        # Load tomorrow's day fires
        tomorrows_day_fires = fires_from_topleft([top, left], epsg, tomorrows_date, fires=fires[fires["daynight"] == "D"])
        np.save(output_dir.joinpath(f'tomorrows_day_fires.npy'), tomorrows_day_fires.bool)
        np.save(output_dir.joinpath(f'tomorrows_day_frp.npy'), tomorrows_day_fires.frp)
        print("Tomorrow's day fire data completed")

        # Load tomorrow's night fires
        tomorrows_night_fires = fires_from_topleft([top, left], epsg, tomorrows_date, fires=fires[fires["daynight"] == "N"])
        np.save(output_dir.joinpath(f'tomorrows_night_fires.npy'), tomorrows_night_fires.bool)
        np.save(output_dir.joinpath(f'tomorrows_night_frp.npy'), tomorrows_night_fires.frp)
        print("Tomorrow's night fire data completed")

    # dem = elevation_from_topleft([top, left], epsg)
    # np.save(output_dir.joinpath('elevation.npy'), dem)
    # Load DEM
    try:
        print(f"Fetching elevation data for top_left: {[top, left]}, EPSG: {epsg}")
        dem = elevation_from_topleft([top, left], epsg, resolution=30)
        np.save(output_dir.joinpath('elevation.npy'), dem)
        print("DEM data completed")
    except Exception as e:
        print(f"Error loading DEM data: {e}")
      

    
#     # load landcover
#     landcover = landcover_from_topleft([top, left], epsg)
#     np.save(output_dir.joinpath('landcover.npy'), landcover)
#     print("landcover completed")

    
#     # load atmospheric
#     atmos = atmospheric_from_topleft([top, left], epsg, chip_date, DEFAULT_PARAMS)
#     for var in list(atmos.data_vars):
#         data_arr = getattr(atmos, var).values[0]
#         np.save(output_dir.joinpath(f'{var}.npy'), data_arr)
# Load landcover
    try:
        landcover = landcover_from_topleft([top, left], epsg)
        np.save(output_dir.joinpath('landcover.npy'), landcover)
        print("Landcover data completed")
    except Exception as e:
        print(f"Error loading landcover data: {e}")

    # Load atmospheric data
    # try:
    #     atmos = atmospheric_from_topleft([top, left], epsg, chip_date, DEFAULT_PARAMS)
    #     for var in list(atmos.data_vars):
    #         data_arr = getattr(atmos, var).values[0]
    #         np.save(output_dir.joinpath(f'{var}.npy'), data_arr)
    #     print("Atmospheric data completed")
    # except Exception as e:
    #     print(f"Error loading atmospheric data: {e}")
    # Load atmospheric data
#     try:
#         atmos = atmospheric_from_topleft([top, left], epsg, chip_date, DEFAULT_PARAMS, resolution=375)

#         # Assuming atmos is a dictionary, process the data based on keys, not 'data_vars'
#         for param, data in atmos.items():  # Loop through each key-value pair in the dictionary
#             if data:  # Check if there is valid data for the parameter
#                 # Save the data (assuming data is in the correct format for saving)
#                 np.save(output_dir.joinpath(f'{param}.npy'), data)
#                 print(f"{param} data completed")
#             else:
#                 print(f"No data available for {param}")

#     except Exception as e:
#         print(f"Error loading atmospheric data: {e}")

# Load atmospheric data
    try:
        atmos = atmospheric_from_topleft([top, left], epsg, chip_date, DEFAULT_PARAMS, resolution=375)

        # Assuming atmos is a dictionary, process the data based on keys
        for param, data in atmos.items():  # Loop through each key-value pair in the dictionary
            try:
                # Save the data directly
                np.save(output_dir.joinpath(f'{param}.npy'), data)
                print(f"{param} data completed")
            except Exception as e:
                print(f"Error processing parameter {param}: {e}")

    except Exception as e:
        print(f"Error loading atmospheric data: {e}")


    fs.upload(str(output_dir), x
              f'{output_s3}/{chip_idx}/',
             recursive=True)
    shutil.rmtree(output_dir)

In [27]:
def process_chip_to_local(chip, output_fp, fires, training=True):
    """
    Given a chip's metadata, load all of the training data and write numpy files locally.

    :param chip: records.csv chip to process data for
    :param output_fp: local directory to write data to
    :param fires: gpd.GeoDataFrame or path to vector file containing fire point data
    :param training: bool, if true, then will load/write next day's fires
    """
    print("Start processing")

    chip_idx, left, bottom, top, right, epsg, chip_date = chip["idx"], chip["left"], chip["bottom"], chip["top"], chip["right"], chip["epsg"], chip["date"]
    print("Chip read completed")

    print(f'Processing chip: {chip_idx}')
    
    # Create output dir if it doesn't already exist
    output_dir = Path(output_fp).joinpath(str(chip_idx))
    output_dir.mkdir(parents=True, exist_ok=True)

#     # Fetch NDVI data
#     try:
#         print(f"Fetching NDVI data for chip {chip_idx}")
#         ndvi = ndvi_from_topleft([top, left], epsg, chip_date, resolution=375)

#         if ndvi is not None:
#             np.save(output_dir.joinpath('ndvi.npy'), ndvi)
#             print("NDVI data completed")
#         else:
#             print("No NDVI data available for this chip")
#     except Exception as e:
#         print(f"Error fetching NDVI data: {e}")

    # Save bounding box to GeoJSON
    bounds_utm = rasterio.coords.BoundingBox(left=left, right=right, bottom=bottom, top=top)
    gpd.GeoSeries([box(*bounds_utm)]).set_crs(epsg).to_file(output_dir.joinpath('bbox.geojson'))

    # Load today's day fires
    # try:
    #     todays_day_fires = fires_from_topleft([top, left], epsg, chip_date, fires=fires[fires["daynight"] == "D"])
    #     np.save(output_dir.joinpath(f'todays_day_fires.npy'), todays_day_fires.bool)
    #     np.save(output_dir.joinpath(f'todays_day_frp.npy'), todays_day_fires.frp)
    #     print("Today's day fire data completed")
    # except Exception as e:
    #     print(f"Error loading today's day fires: {e}")

    # Load today's night fires
#     try:
#         todays_night_fires = fires_from_topleft([top, left], epsg, chip_date, fires=fires[fires["daynight"] == "N"])
#         np.save(output_dir.joinpath(f'todays_night_fires.npy'), todays_night_fires.bool)
#         np.save(output_dir.joinpath(f'todays_night_frp.npy'), todays_night_fires.frp)
#         print("Today's night fire data completed")
#     except Exception as e:
#         print(f"Error loading today's night fires: {e}")

#     # Load tomorrow's fires if training
#     if training:
#         try:
#             tomorrows_date = (datetime.strptime(chip_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')

#             # Load tomorrow's day fires
#             tomorrows_day_fires = fires_from_topleft([top, left], epsg, tomorrows_date, fires=fires[fires["daynight"] == "D"])
#             np.save(output_dir.joinpath(f'tomorrows_day_fires.npy'), tomorrows_day_fires.bool)
#             np.save(output_dir.joinpath(f'tomorrows_day_frp.npy'), tomorrows_day_fires.frp)
#             print("Tomorrow's day fire data completed")

#             # Load tomorrow's night fires
#             tomorrows_night_fires = fires_from_topleft([top, left], epsg, tomorrows_date, fires=fires[fires["daynight"] == "N"])
#             np.save(output_dir.joinpath(f'tomorrows_night_fires.npy'), tomorrows_night_fires.bool)
#             np.save(output_dir.joinpath(f'tomorrows_night_frp.npy'), tomorrows_night_fires.frp)
#             print("Tomorrow's night fire data completed")
#         except Exception as e:
#             print(f"Error loading tomorrow's fires: {e}")

    #Fetch DEM data
    try:
        print(f"Fetching elevation data for top_left: {[top, left]}, EPSG: {epsg}")
        dem = elevation_from_topleft([top, left], epsg, resolution=30)
        np.save(output_dir.joinpath('elevation.npy'), dem)
        print("DEM data completed")
    except Exception as e:
        print(f"Error loading DEM data: {e}")

    # Fetch landcover data
#     try:
#         #landcover = landcover_from_topleft([top, left], epsg, chip_date,resolution = 375) 
#         landcover = landcover_from_topleft([top, left], epsg)

#         np.save(output_dir.joinpath('landcover.npy'), landcover)
#         print("Landcover data completed")
#     except Exception as e:
#         print(f"Error loading landcover data: {e}")

    # Fetch atmospheric data
    # try:
    #     atmos = atmospheric_from_topleft([top, left], epsg, chip_date, DEFAULT_PARAMS, resolution=375)
    #     for param, data in atmos.items():  # Loop through each key-value pair in the dictionary
    #         try:
    #             np.save(output_dir.joinpath(f'{param}.npy'), data)
    #             print(f"{param} data completed")
    #         except Exception as e:
    #             print(f"Error processing parameter {param}: {e}")
    # except Exception as e:
    #     print(f"Error loading atmospheric data: {e}")

    print(f"Data saved locally at {output_dir}")
output_fp = "sagemaker-studiolab-notebooks/SatelliteVu-AWS-Disaster-Response-Hackathon/dataset_preparation/src/"  # Path to save locally
chips = list(manifest.T.to_dict().values())
for chip in chips:
    process_chip_to_local(chip,  output_fp,  gdf_fires,  training=True)

DEBUG:fiona._env:GDAL_DATA found in environment.
DEBUG:fiona._env:PROJ_LIB found in environment.
DEBUG:fiona.ogrext:Removing GeoJSON file
DEBUG:fiona._env:GDAL_DATA found in environment.
DEBUG:fiona._env:PROJ_LIB found in environment.
DEBUG:fiona._env:GDAL_DATA found in environment.
DEBUG:fiona._env:PROJ_LIB found in environment.
DEBUG:fiona.ogrext:Created layer OgrGeoJSON
DEBUG:fiona.ogrext:Writing started
DEBUG:fiona._env:GDAL_DATA found in environment.
DEBUG:fiona._env:PROJ_LIB found in environment.
DEBUG:fiona.ogrext:Transaction supported: 0
DEBUG:fiona.ogrext:Flushed data source cache
DEBUG:fiona.collection:Flushed buffer
DEBUG:fiona.collection:Stopped session
DEBUG:rasterio.env:Entering env context: <rasterio.env.Env object at 0x7f997e7b33a0>
ERROR 4: `/vsis3/copernicus-dem-90m/Copernicus_DSM_COG_30_N35_00_W102_00_DEM/Copernicus_DSM_COG_30_N35_00_W102_00_DEM.tif' does not exist in the file system, and is not recognized as a supported dataset name.
DEBUG:rasterio.env:Starting oute

Start processing
Chip read completed
Processing chip: 0
Fetching elevation data for top_left: [3954750.0, 274125.0], EPSG: 32614
Error loading DEM data: 486a8d99-e439-41f5-8377-c6176f744931_temp.vrt: No such file or directory
Data saved locally at sagemaker-studiolab-notebooks/SatelliteVu-AWS-Disaster-Response-Hackathon/dataset_preparation/src/0
Start processing
Chip read completed
Processing chip: 1
Fetching elevation data for top_left: [3965250.0, 738000.0], EPSG: 32613
Error loading DEM data: 641cf48d-d6a5-45a2-9920-a2df8767d754_temp.vrt: No such file or directory
Data saved locally at sagemaker-studiolab-notebooks/SatelliteVu-AWS-Disaster-Response-Hackathon/dataset_preparation/src/1
Start processing
Chip read completed
Processing chip: 2
Fetching elevation data for top_left: [4079625.0, 247500.0], EPSG: 32614
Error loading DEM data: 397977a1-ab97-478e-a6a3-bea20bac1773_temp.vrt: No such file or directory
Data saved locally at sagemaker-studiolab-notebooks/SatelliteVu-AWS-Disaster-R

DEBUG:rasterio.env:Entering env context: <rasterio.env.Env object at 0x7f99626a79a0>
ERROR 4: `/vsis3/copernicus-dem-90m/Copernicus_DSM_COG_30_N36_00_W099_00_DEM/Copernicus_DSM_COG_30_N36_00_W099_00_DEM.tif' does not exist in the file system, and is not recognized as a supported dataset name.
DEBUG:rasterio.env:Starting outermost env
DEBUG:rasterio.env:No GDAL environment exists
DEBUG:rasterio.env:New GDAL environment <rasterio._env.GDALEnv object at 0x7f9963240f70> created
DEBUG:rasterio._env:GDAL_DATA found in environment.
DEBUG:rasterio._env:PROJ_LIB found in environment.
DEBUG:rasterio._env:Started GDALEnv: self=<rasterio._env.GDALEnv object at 0x7f9963240f70>.
DEBUG:rasterio.env:Entered env context: <rasterio.env.Env object at 0x7f99626a79a0>
DEBUG:rasterio._base:Sharing flag: 0
DEBUG:rasterio.env:Exiting env context: <rasterio.env.Env object at 0x7f99626a79a0>
DEBUG:rasterio.env:Cleared existing <rasterio._env.GDALEnv object at 0x7f9963240f70> options
DEBUG:rasterio._env:Stopped 

Error loading DEM data: 5d97580b-a05c-4ed0-be0b-59fbed502115_temp.vrt: No such file or directory
Data saved locally at sagemaker-studiolab-notebooks/SatelliteVu-AWS-Disaster-Response-Hackathon/dataset_preparation/src/3


In [None]:
chips = list(manifest.T.to_dict().values())
print(f'Chips total = {len(chips)}')

In [None]:
# query s3 to see which chips already processed and remove from list
processed_chips = fs.ls(output_s3)
processed_chips 

In [None]:
processed_ids = [int(x.split('/')[-1]) for x in processed_chips if x.split('/')[-1].isdigit()]
processed_ids

In [None]:
# query s3 to see which chips already processed and remove from list
processed_chips = fs.ls(output_s3)
processed_ids = [int(x.split('/')[-1]) for x in processed_chips if x.split('/')[-1].isdigit()]
print(f'Processed = {len(processed_ids)}')
to_process = [x for x in chips if x['idx'] not in processed_ids]
print(f'To process = {len(to_process)}')

In [None]:
#!pip install fiona==1.9.6 --force-reinstall


In [None]:
%%time
import math
os.environ['AWS_NO_SIGN_REQUEST'] = 'True'
print("start")

#cog_footprints = gpd.GeoDataFrame.from_file('s3://copernicus-dem-90m/tileList.txt')

# with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
#     future_work = [
#         executor.submit(process_chip, chip, fs, output_fp, output_s3, gdf_fires, cog_footprints, training=False) for chip in to_process
#     ]
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
    future_work = [
        executor.submit(process_chip, chip, fs, output_fp, output_s3, gdf_fires,  training=True) for chip in to_process
    ]

# Explore some of the processed chips

In [None]:
import matplotlib.pyplot as plt

for chip in [x for x in fs.ls(output_s3) if x.split('/')[-1].isdigit()]:
    print(chip.split('/')[-1])
    
    try:
        tf = np.load(fs.open(chip + '/todays_fires.npy'))
    except FileNotFoundError:
        continue
        
#     fig, (ax1, ax3, ax4, ax5, ax6) = plt.subplots(1, 5, figsize=(20,20))

#     im = ax1.imshow(tf)
#     ax1.title.set_text('todays_fires')
    
#     lc = np.load(fs.open(chip + '/landcover.npy'))
#     im = ax3.imshow(lc)
#     ax3.title.set_text('land cover')

#     el = np.load(fs.open(chip + '/elevation.npy'))
#     im = ax4.imshow(el)
#     ax4.title.set_text('elevation')
    
#     nd = np.load(fs.open(chip + '/ndvi.npy'))
#     im = ax5.imshow(nd)
#     ax5.title.set_text('ndvi')
    
#     sa = np.load(fs.open(chip + '/surface_air_pressure.npy'))
#     im = ax6.imshow(sa)
#     ax6.title.set_text('surface_air_pressure')

    fig, (ax1, ax3, ax4) = plt.subplots(1, 3, figsize=(20,20))

    im = ax1.imshow(tf)
    ax1.title.set_text('todays_fires')
    
    lc = np.load(fs.open(chip + '/landcover.npy'))
    im = ax3.imshow(lc)
    ax3.title.set_text('land cover')

    el = np.load(fs.open(chip + '/elevation.npy'))
    im = ax4.imshow(el)
    ax4.title.set_text('elevation')
    
#     nd = np.load(fs.open(chip + '/ndvi.npy'))
#     im = ax5.imshow(nd)
#     ax5.title.set_text('ndvi')
    
#     sa = np.load(fs.open(chip + '/surface_air_pressure.npy'))
#     im = ax6.imshow(sa)
#     ax6.title.set_text('surface_air_pressure')

    
    plt.show()

In [None]:
import matplotlib.pyplot as plt

for chip in [x for x in fs.ls(output_s3) if x.split('/')[-1].isdigit()]:
    print(f"Chip ID: {chip.split('/')[-1]}")
    
    try:
        # Load fire and additional data
        day_fire = np.load(fs.open(chip + '/todays_day_fires.npy'))
        night_fire = np.load(fs.open(chip + '/todays_night_fires.npy'))
        tmr_day_fire = np.load(fs.open(chip + '/tomorrows_day_fires.npy'))
        tmr_night_fire = np.load(fs.open(chip + '/tomorrows_night_fires.npy'))
        landcover = np.load(fs.open(chip + '/landcover.npy'))
        elevation = np.load(fs.open(chip + '/elevation.npy'))
        temperature = np.load(fs.open(chip + '/wind_v.npy'),allow_pickle=True)
        
    except FileNotFoundError as e:
        print(f"Missing file for chip {chip.split('/')[-1]}: {e}")
        continue
    
    # Create subplots
    fig, axs = plt.subplots(2, 4, figsize=(24, 12))

    # Plot today's fires (day and night)
    axs[0, 0].imshow(day_fire)
    axs[0, 0].set_title("Today's Day Fire")
    axs[0, 1].imshow(night_fire)
    axs[0, 1].set_title("Today's Night Fire")

    # Plot tomorrow's fires (day and night)
    axs[0, 2].imshow(tmr_day_fire)
    axs[0, 2].set_title("Tomorrow's Day Fire")
    axs[0, 3].imshow(tmr_night_fire)
    axs[0, 3].set_title("Tomorrow's Night Fire")

    # Plot additional bands
    axs[1, 1].imshow(landcover)
    axs[1, 1].set_title("Landcover")
    axs[1, 2].imshow(elevation)
    axs[1, 2].set_title("Elevation")
    axs[1, 0].imshow(temperature,cmap='viridis')
    axs[1, 0].set_title("wind_v")

    # Adjust layout
    for ax in axs.flat:
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

for chip in [x for x in fs.ls(output_s3) if x.split('/')[-1].isdigit()]:
    print(f"Chip ID: {chip.split('/')[-1]}")
    
    try:
        # Load fire and additional data
        day_fire = np.load(fs.open(chip + '/todays_day_fires.npy'))
        night_fire = np.load(fs.open(chip + '/todays_night_fires.npy'))
        tmr_day_fire = np.load(fs.open(chip + '/tomorrows_day_fires.npy'))
        tmr_night_fire = np.load(fs.open(chip + '/tomorrows_night_fires.npy'))
        landcover = np.load(fs.open(chip + '/landcover.npy'))
        elevation = np.load(fs.open(chip + '/elevation.npy'))
        temperature = np.load(fs.open(chip + '/temperature.npy'),allow_pickle=True)
                # Inspect temperature data
        print(f"Temperature dtype: {temperature.dtype}")
        print(f"Temperature shape: {temperature.shape}")
                # Ensure it's numeric and handle missing values
        # If it's a dictionary, extract the actual data
        if isinstance(temperature, dict):
            print(f"Temperature dictionary keys: {temperature.keys()}")
            temperature = temperature.get('desired_key')  # Replace 'desired_key' with the correct key for temperature data
        
        # Ensure it's numeric and handle missing values
        if isinstance(temperature, np.ndarray):
            temperature = np.nan_to_num(temperature, nan=0.0)  # Replace NaNs with 0
            
        specific_humidity = np.load(fs.open(chip + '/specific_humidity.npy'),allow_pickle=True)
        pressure = np.load(fs.open(chip + '/pressure.npy'),allow_pickle=True)
        wind_u = np.load(fs.open(chip + '/wind_u.npy'),allow_pickle=True)
        wind_v = np.load(fs.open(chip + '/wind_v.npy'),allow_pickle=True)
        
    except FileNotFoundError as e:
        print(f"Missing file for chip {chip.split('/')[-1]}: {e}")
        continue
    
    # Create subplots
    fig, axs = plt.subplots(3, 4, figsize=(24, 18))

    # Plot today's fires (day and night)
    axs[0, 0].imshow(day_fire)
    axs[0, 0].set_title("Today's Day Fire")
    axs[0, 1].imshow(night_fire)
    axs[0, 1].set_title("Today's Night Fire")

    # Plot tomorrow's fires (day and night)
    axs[0, 2].imshow(tmr_day_fire)
    axs[0, 2].set_title("Tomorrow's Day Fire")
    axs[0, 3].imshow(tmr_night_fire)
    axs[0, 3].set_title("Tomorrow's Night Fire")

    # Plot additional bands
    axs[1, 0].imshow(landcover)
    axs[1, 0].set_title("Landcover")
    axs[1, 1].imshow(elevation)
    axs[1, 1].set_title("Elevation")
    axs[1, 2].imshow(temperature)
    axs[1, 2].set_title("Temperature")
    axs[1, 3].imshow(specific_humidity)
    axs[1, 3].set_title("Specific Humidity")

    # Plot atmospheric bands
    axs[2, 0].imshow(pressure)
    axs[2, 0].set_title("Pressure")
    axs[2, 1].imshow(wind_u)
    axs[2, 1].set_title("Wind U Component")
    axs[2, 2].imshow(wind_v)
    axs[2, 2].set_title("Wind V Component")

    # Adjust layout
    for ax in axs.flat:
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()


In [None]:
import ee

# Initialize the Earth Engine API
ee.Initialize()

# Test by printing a simple dataset description
print(ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY').first().getInfo())


In [None]:
!pip install earthengine-api --quiet


In [None]:
import ee

# Initialize the Earth Engine API with your project ID
ee.Initialize(project='cultivated-card-441523-g2')

# Test the connection
print(ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY').first().getInfo())


In [None]:
import ee
import numpy as np
import matplotlib.pyplot as plt

# Initialize the Earth Engine API
ee.Initialize(project='cultivated-card-441523-g2')

# Define your AOI (Area of Interest)
# Example coordinates, replace with your own coordinates
top_left = [37.7749, -122.4194]  # Latitude, Longitude
aoi = ee.Geometry.Point([39, 74]).buffer(500)

# Define parameters (e.g., 'temperature_2m')
params = ['pr']

# Function to fetch and plot atmospheric data
def fetch_and_plot_atmospheric_data(start_date, end_date, aoi, params, resolution=500):
    # Filter for the given date range and region
    weather = ee.ImageCollection("IDAHO_EPSCOR/GRIDMET") \
        .filterDate(start_date, end_date) \
        .filterBounds(aoi)

    # Fetch each parameter, e.g., temperature
    for param in params:
        try:
            param_data = weather.select(param).reproject(crs='EPSG:4326', scale=resolution)


        
        except Exception as e:
            print(f"Error fetching {param}: {e}")

# Call the function with your date range and AOI
fetch_and_plot_atmospheric_data("2022-01-01", "2022-01-02", aoi, params)


In [None]:
def fetch_and_plot_atmospheric_data(start_date, end_date, aoi, params, resolution=500):
    # Filter for the given date range and region
    weather = ee.ImageCollection("IDAHO_EPSCOR/GRIDMET") \
        .filterDate(start_date, end_date) \
        .filterBounds(aoi)

    for param in params:
        try:
            # Select the parameter and calculate mean
            param_data = weather.select(param).mean().reproject(crs='EPSG:4326', scale=resolution)

            # Aggregate data using reduceRegion
            data = param_data.reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=aoi,
                scale=resolution,
                maxPixels=1e9
            ).getInfo()

            # Check if the parameter exists in the data
            if param in data:
                value = data[param]
                print(f"{param} mean value: {value}")
                
                # Plot the mean value as a placeholder visualization
                plt.bar([param], [value], color='blue')
                plt.title(f"{param} Mean Value\n{start_date} to {end_date}")
                plt.ylabel(param)
                plt.show()
            else:
                raise KeyError(f"Parameter '{param}' not found in the data")

        except Exception as e:
            print(f"Error fetching {param}: {e}")


In [None]:
import ee
import numpy as np
import matplotlib.pyplot as plt

# Initialize Earth Engine
ee.Initialize(project='cultivated-card-441523-g2')

# Define AOI and parameters
aoi = ee.Geometry.Point([-97.7431, 30.2672]).buffer(100000)  # Example AOI in Texas
params = ['specific_humidity', 'temperature','wind_u']  # Specific humidity

def fetch_and_plot_heatmap(start_date, end_date, aoi, params, resolution=505):
    weather = ee.ImageCollection("NASA/NLDAS/FORA0125_H002") \
        .filterDate(start_date, end_date) \
        .filterBounds(aoi)

    for param in params:
        try:
            # Select parameter and reproject to 500-meter resolution
            param_data = weather.select(param).mean().reproject(
                crs='EPSG:4326', scale=resolution
            )

            # Extract data as a NumPy array
            data = param_data.sampleRectangle(region=aoi).getInfo()
            if param not in data['properties']:
                print(f"No data found for parameter '{param}'.")
                continue

            # Convert the raster data into a NumPy array
            raster = np.array(data['properties'][param])

            # Plot the heatmap
            plt.imshow(raster, cmap='viridis')
            plt.colorbar(label=param)
            plt.title(f"{param} from {start_date} to {end_date} at {resolution}m resolution")
            plt.show()

        except Exception as e:
            print(f"Error fetching {param}: {e}")

# Call the function with 500-meter resolution
fetch_and_plot_heatmap("2022-01-01", "2022-01-08", aoi, params)


In [None]:
# Retrieve dataset metadata
import ee
ee.Initialize(project='cultivated-card-441523-g2')  # Use your project ID
metadata = ee.ImageCollection("NASA/VIIRS/002/VNP13A1").first().getInfo()
print("Dataset Metadata:", metadata)


In [None]:
import ee
import numpy as np
import matplotlib.pyplot as plt

# Initialize Earth Engine
ee.Initialize(project='cultivated-card-441523-g2')

def fetch_and_plot_vegetation_heatmap(start_date, end_date, aoi, params, resolution=463):
    # Load the vegetation dataset
    vegetation = ee.ImageCollection('NASA/VIIRS/002/VNP13A1') \
        .filterDate(start_date, end_date) \
        .filterBounds(aoi)
    
    for param in params:
        try:
            # Select the vegetation index (e.g., NDVI)
            param_data = vegetation.select(param).mean().reproject(
                crs='EPSG:4326', scale=resolution
            )
            
            # Extract data as a NumPy array
            data = param_data.sampleRectangle(region=aoi).getInfo()
            if param not in data['properties']:
                print(f"No data found for parameter '{param}'.")
                continue

            # Apply scaling factor (if required)
            raster = np.array(data['properties'][param]) * 0.0001  # Adjust scale as needed

            # Plot the heatmap
            plt.imshow(raster, cmap='viridis')
            plt.colorbar(label=f"{param} (scaled)")
            plt.title(f"{param} from {start_date} to {end_date} at {resolution}m resolution")
            plt.show()

        except Exception as e:
            print(f"Error fetching {param}: {e}")

# Example Call
fetch_and_plot_vegetation_heatmap(
    start_date="2022-01-01",
    end_date="2022-01-06",
    aoi = ee.Geometry.Point([-97.7431, 30.2672]).buffer(10000),  # Example AOI in Texas
    params=['NDVI', 'EVI'],  # Example vegetation indices
    resolution=463
)


In [None]:
import rasterio

try:
    with rasterio.open("/vsis3/esa-worldcover/v100/2020/map/ESA_WorldCover_10m_2020_v100_S27E030_Map.tif") as src:
        print("Bounds:", src.bounds)
        print("CRS:", src.crs)
        print("Metadata:", src.meta)
except Exception as e:
    print(f"Failed to read file: {e}")


In [None]:
import logging
from rasterio.env import Env

logging.basicConfig(level=logging.DEBUG)

with Env(aws_unsigned=True):
    try:
        with rasterio.open("/vsis3/esa-worldcover/v100/2020/ESA_WorldCover_10m_2020_v100_Map_AWS.vrt") as src:
            print(src.meta)
    except Exception as e:
        print(f"Error: {e}")
