In [4]:
import pandas as pd
import xarray as xr
import numpy as np
import scipy.interpolate
import cv2

import scipy.ndimage as ndimage

In [None]:
# zip files - clone to remote - access via VM
# drag files into VM?

**Table of Contents**
1. <a href=#burned>MCD64A1 Burned Area</a>
2. <a href=#evapo>MOD16A2: EvapoTranspiration Data</a>
3. <a href=#veg>VNP13 Vegetation Indices</a>
4. <a href=#fire>VIIRS VNP14 Thermal Anomaly / Fire</a>
5. <a href=#fwi>Global Fire Weather Database: Fire Weather Index</a>
6. <a href=#merge>Merging VIIRS Fire and GFWD </a>

<a id=burned></a>
### MCD64A1 Burned Area
Sournce: https://lpdaac.usgs.gov/products/mcd64a1v006/

In [177]:
burned = xr.open_mfdataset('../../finalproj_data/time_slice/MCD64A1.006_500m_aid0001.nc', combine = 'by_coords')
burnt = burned.sel(time = slice('2019-11-01','2019-11-01'))

In [178]:
# burnt['crs']
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [179]:
burnt = burnt.drop_vars(['Burn_Date_Uncertainty', 'First_Day', 'Last_Day','crs', 'QA'])
burnt = burnt.squeeze('time')

In [None]:
# Reduce dimensionality in xarray

In [181]:
burnt = burnt.to_dataframe()
burnt.reset_index(inplace = True)

MemoryError: Unable to allocate array with shape (12623, 16707) and data type float32

In [14]:
burnt.drop('time', axis = 1, inplace = True)

In [16]:
burnt = burnt[burnt['Burn_Date'].isna()], axis = 0, inplace = True)

KeyError: "['lat' 'lon' 'Burn_Date'] not found in axis"

In [60]:
burnt['water'] = burnt['Burn_Date'] == -2.0

In [62]:
burnt['unburned'] = burnt['Burn_Date'] == 0.0

In [None]:
burnt['burned'] = burnt[(burnt['Burn_Date'] != 0) & (burnt['Burn_Date'] != -2.0) & (burnt['Burn_Date'].notnull())]

In [19]:
# burnt.to_parquet('../../finalproj_data/parquet/burnt.parquet')

In [None]:
# Compare lat/lon of burnt to lat/lon of ONE

In [15]:
print(burnt['lat'].max(), ONE['lat'].max())

-48.23124999567919 -48.23124999567919


Unique values:
    array([  0.,  nan,  -2., 305., 307., 322., 327., 320., 323., 326., 328.,
       324., 321., 319., 329., 313., 316., 306., 308., 309., 317., 312.,
       311., 333., 334., 332., 331., 310., 325., 318., 314., 315., 330.] <br>
14.6% of 210,892,461 data points are null values. <br>
0 is unburned, -2 is water, other values are days

<a id=evapo></a>
### MOD16A2: EvapoTranspiration Data 

In [172]:
# Load Data

ONE = xr.open_mfdataset('../../finalproj_data/time_slice/MOD16A2.006_500m_aid0001.nc', combine='by_coords') 
# (lat: 12623, lon: 16707, time: 1)
# 1.687374337 GB
# Source: https://lpdaac.usgs.gov/products/mod16a2v006/
# Probably only going to take Total Evapotranspiration (ET_500m)
# Ignore for now - add if time. 


In [173]:
# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Coordinates:
#     time     object 2019-12-11 00:00:00
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0
ONE = ONE.drop_vars(['crs', 'ET_QC_500m'])
ONE = ONE.squeeze('time')

In [174]:
ONE

In [175]:
mod = ONE.to_dataframe()
mod.reset_index(inplace = True)

MemoryError: Unable to allocate array with shape (210892461,) and data type int64

In [None]:
mod.drop('time', axis = 1, inplace = True)

In [None]:
# Spatially aggregate the data, first by rounding and creating a new column
mod = mod.round({'lat':2, 'lon':4})

In [176]:
# Then by averaging ET values for the same lat, lon points


In [25]:
# ONE.to_parquet('../../finalproj_data/parquet/modis.parquet')

<a id=veg></a>
### VNP13 Vegetation Indices
Source: https://lpdaac.usgs.gov/products/vnp13a2v001/

In [None]:
TWO = xr.open_mfdataset('../../finalproj_data/time_slice/VNP13A2.001_1km_aid0001.nc', combine = 'by_coords') #  (lat: 6312, lon: 8354, time: 1)
# Slice by time dimension so that this dataset is 1 dimension in time. 
TWO = TWO.sel(time = slice('2019-12-11', '2019-12-11'))
#2.953022425

In [26]:
# TWO
# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [25]:
TWO = TWO.drop_vars(['crs'])
TWO = TWO.squeeze('time')

In [26]:
len(TWO['_1_km_16_days_EVI'])

6312

In [28]:
# TWO = TWO.to_dataframe()
# TWO.reset_index(inplace = True)

In [29]:
# TWO.drop('time', axis = 1, inplace = True)

In [30]:
# TWO.to_parquet('../../finalproj_data/parquet/vnp13.parquet')

<a id=fire></a>
### VIIRS VNP14 Thermal Anomaly / Fire

Source: https://lpdaac.usgs.gov/products/vnp14a1v001/

In [30]:
THREE = xr.open_mfdataset('../../finalproj_data/time_slice/VNP14A1.001_1km_aid0001.nc', combine = 'by_coords') # (lat: 6312, lon: 8354, time: 1)
#1.054726297 GB


# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [31]:
THREE = THREE.drop_vars(['crs', 'QA', 'sample'])
THREE = THREE.squeeze('time')
THREE = THREE.drop('time')

In [32]:
# try dropping null values in xarray
viirs = THREE.dropna(dim = 'lon', subset=['FireMask'])
viirs = viirs.dropna(dim = 'lat', subset=['FireMask'])

In [33]:
# clean xarray dataset
viirs

In [34]:
# To pandas dataframe

viirs_df = viirs.to_dataframe()
viirs_df.reset_index(inplace = True)
viirs_df['MaxFRP'].fillna(0, inplace = True)

In [10]:
# # FOR DOWNSCALING: 
# viirs_df = viirs_df.round({'lat': 2, 'lon': 4})
# len(viirs_df.groupby(['lat','lon'])['FireMask'].value_counts())
# # Even when rounded, the length is not the same...

In [29]:
# iterate through index 
# len(viirs_df.groupby(['lat','lon'])['FireMask'].value_counts().index)

In [35]:
# viirs.to_parquet('../../finalproj_data/parquet/vnp14.parquet')

<a id=fwi></a>
### Global Fire Weather Database: Fire Weather Index

In [157]:
# Source: https://data.giss.nasa.gov/impacts/gfwed/
# See data_collection1 for more information. 

# GFWD - FWI --> Dataset labels to identify "high risk of fire" based on FWI calculations.
# Convention to label DataSet in caps and DataArray in lowercase

#Use xarray to open .nc file, combining by coordinates. 
GFWD = xr.open_mfdataset("../../finalproj_data/satellitedata/GFWD/FWI.GEOS-5.Monthly.Default.201912.nc", combine = 'by_coords')

In [158]:
GFWD = GFWD.squeeze('time')
GFWD = GFWD.drop('time')

In [159]:
# Geoslicing based on coordinates for 'burnt'

# lat min : -48.23125 
# lat max: 4.36041667

#lon max = 165.93541665
#lon min = 96.32708332

fwi_ = GFWD.where(GFWD.lat >= -48.23125, drop = True)
fwi_ = fwi_.where(fwi_.lat <= 4.36041667, drop = True)
fwi_ = fwi_.where((fwi_.lon >=96.32708332), drop = True)
fwi = fwi_.where((fwi_.lon <= 165.93541665), drop = True)

In [160]:
# Clean xarray fwi

In [161]:
# To pandas Dataframe

fwi_df = fwi.to_dataframe()
fwi_df.reset_index(inplace = True)

  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv


In [162]:
fwi_df['fire_weather'] = 0

In [163]:
# Create categorical values from FWI numerical
fwi_df.loc[(fwi_df['GEOS-5_FWI'] < 5), 'fire_weather'] = 'fwi_low'
fwi_df.loc[(fwi_df['GEOS-5_FWI'] > 5) & (fwi_df['GEOS-5_FWI'] < 8), 'fire_weather'] = 'fwi_moderate'
fwi_df.loc[(fwi_df['GEOS-5_FWI'] > 8) & (fwi_df['GEOS-5_FWI'] < 16), 'fire_weather'] = 'fwi_high'
fwi_df.loc[(fwi_df['GEOS-5_FWI'] > 16) & (fwi_df['GEOS-5_FWI'] < 29), 'fire_weather'] = 'fwi_veryhigh'
fwi_df.loc[(fwi_df['GEOS-5_FWI'] > 29), 'fire_weather'] = 'fwi_extreme'
fwi_df.drop('GEOS-5_FWI', axis = 1, inplace = True)

<img src='../images/fire_danger.png'>

In [164]:
fwi_cat = fwi_df.to_xarray()

### Upscaling! 

In [165]:
# Resizing
# These dimensions in viirs: # 6312*1899 = 11986488

# These original dims in fwi: 210 x 222 = 46620

#https://corteva.github.io/rioxarray/stable/examples/convert_to_raster.html
# https://rasterio.readthedocs.io/en/latest/topics/resampling.html
# for fwi:
    # longitude in 4 decimal degrees - https://en.wikipedia.org/wiki/Decimal_degrees
    # lat in 2 decimal degrees
# for viirs
    # long and lat both have 8 decimal degrees

In [166]:
# list of all variables that need to be upscaled and appended to viirs xarray
fwi_df_vars = ['GEOS-5_DC', 'GEOS-5_DMC', 'GEOS-5_FFMC', 'GEOS-5_ISI', 'GEOS-5_BUI', 'GEOS-5_FWI', 'GEOS-5_DSR']

In [167]:
# desired output shape for each variable (6312, 1899)
# current shape for each variable (210, 222)
# Scaling Factor
6312/210, 1899/222

(30.057142857142857, 8.554054054054054)

In [168]:
def upscale_variable(variable):
    """This function upscales a given input variable in an xarray Dataset, 
    and outputs the variable as an array with the same lat and lon dimensions as viirs variables"""
    var = fwi_cat[variable].values
    var_ = ndimage.zoom(var, (30.057142857142857, 8.554054054054054), mode = 'nearest', order = 0)
    return var_

In [169]:
upscaled = [] #list of output arrays
for name in fwi_df_vars:
    upscaled.append(upscale_variable(name))

RuntimeError: sequence argument must have length equal to input rank

In [170]:
viirs = viirs.assign(DC= (('lat','lon'),upscaled[0]))
viirs = viirs.assign(DMC= (('lat','lon'),upscaled[1]))
viirs = viirs.assign(FFMC= (('lat','lon'),upscaled[2]))
viirs = viirs.assign(ISI= (('lat','lon'),upscaled[3]))
viirs = viirs.assign(BUI= (('lat','lon'),upscaled[4]))
viirs = viirs.assign(FWI= (('lat','lon'),upscaled[5]))
viirs = viirs.assign(DSR= (('lat','lon'),upscaled[6]))

IndexError: list index out of range

In [171]:
merged = viirs.to_dataframe()
merged.reset_index(inplace = True)

In [172]:
(merged.to_xarray()).to_netcdf('../../finalproj_data/satellitedata/merged.nc')