In [24]:
import pandas as pd
import xarray as xr
import numpy as np
import scipy.interpolate

import scipy.ndimage as ndimage
%run ../pyfiles/data_cleaning.py

**Table of Contents**
1. <a href=#burned>MCD64A1 Burned Area</a>
2. <a href=#evapo>MOD16A2: EvapoTranspiration Data</a>
3. <a href=#veg>VNP13 Vegetation Indices</a>
4. <a href=#fire>VIIRS VNP14 Thermal Anomaly / Fire</a>
5. <a href=#fwi>Global Fire Weather Database: Fire Weather Index</a>
6. <a href=#merge>Merging Final Dataset</a>

<a id=burned></a>
### MCD64A1 Burned Area
Source: https://lpdaac.usgs.gov/products/mcd64a1v006/

In [25]:
burned = xr.open_mfdataset('../../finalproj_data/time_slice/MCD64A1.006_500m_aid0001.nc', combine = 'by_coords')
burnt = burned.sel(time = slice('2019-11-01','2019-11-01'))

In [26]:
# burnt['crs']
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [27]:
burnt = burnt.drop_vars(['Burn_Date_Uncertainty', 'First_Day', 'Last_Day','crs', 'QA'])
burnt = burnt.squeeze('time')
burnt = burnt.drop('time')

Unique values:
    array([  0.,  nan,  -2., 305., 307., 322., 327., 320., 323., 326., 328.,
       324., 321., 319., 329., 313., 316., 306., 308., 309., 317., 312.,
       311., 333., 334., 332., 331., 310., 325., 318., 314., 315., 330.] <br>
14.6% of 210,892,461 data points are null values. <br>
0 is unburned, -2 is water, other values are days

<a id=evapo></a>
### MOD16A2: EvapoTranspiration Data 

In [28]:
# Load Data

EVAP = xr.open_mfdataset('../../finalproj_data/time_slice/MOD16A2.006_500m_aid0001.nc', combine='by_coords') 
# (lat: 12623, lon: 16707, time: 1)
# 1.687374337 GB
# Source: https://lpdaac.usgs.gov/products/mod16a2v006/
# Probably only going to take Total Evapotranspiration (ET_500m)
# Ignore for now - add if time. 


In [29]:
# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Coordinates:
#     time     object 2019-12-11 00:00:00
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0
EVAP = EVAP.drop_vars(['crs', 'ET_QC_500m'])
EVAP = EVAP.squeeze('time')
EVAP = EVAP.drop('time')

In [30]:
EVAP

<a id=veg></a>
### VNP13 Vegetation Indices
Source: https://lpdaac.usgs.gov/products/vnp13a2v001/

In [31]:
VEG = xr.open_mfdataset('../../finalproj_data/time_slice/VNP13A2.001_1km_aid0001.nc', combine = 'by_coords') #  (lat: 6312, lon: 8354, time: 1)
# Slice by time dimension so that this dataset is 1 dimension in time. 
VEG = VEG.sel(time = slice('2019-12-11', '2019-12-11'))
#2.953022425

In [32]:
# TWO
# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [33]:
VEG = VEG.drop_vars(['crs'])
VEG = VEG.squeeze('time')
VEG = VEG.drop('time')

In [34]:
VEG

<a id=fire></a>
### VIIRS VNP14 Thermal Anomaly / Fire

Source: https://lpdaac.usgs.gov/products/vnp14a1v001/

In [35]:
THREE = xr.open_mfdataset('../../finalproj_data/time_slice/VNP14A1.001_1km_aid0001.nc', combine = 'by_coords') # (lat: 6312, lon: 8354, time: 1)
#1.054726297 GB


# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [36]:
THREE = THREE.drop_vars(['crs', 'QA', 'sample'])
THREE = THREE.squeeze('time')
viirs = THREE.drop('time')

In [37]:
# clean xarray dataset
viirs

In [16]:
# # To pandas dataframe

# viirs_df = viirs.to_dataframe()
# viirs_df.reset_index(inplace = True)
# viirs_df['MaxFRP'].fillna(0, inplace = True)

<a id=fwi></a>
### Global Fire Weather Database: Fire Weather Index

In [66]:
# Source: https://data.giss.nasa.gov/impacts/gfwed/
# See data_collection1 for more information. 

# GFWD - FWI --> Dataset labels to identify "high risk of fire" based on FWI calculations.
# Convention to label DataSet in caps and DataArray in lowercase

#Use xarray to open .nc file, combining by coordinates. 
GFWD = xr.open_mfdataset("../../finalproj_data/satellitedata/GFWD/FWI.GEOS-5.Monthly.Default.201912.nc", combine = 'by_coords')

In [67]:
GFWD = GFWD.squeeze('time')
GFWD = GFWD.drop('time')

In [68]:
# Geoslicing based on coordinates for 'burnt'

# lat min : -48.23125 
# lat max: 4.36041667

#lon max = 165.93541665
#lon min = 96.32708332

fwi_ = GFWD.where(GFWD.lat >= -48.23125, drop = True)
fwi_ = fwi_.where(fwi_.lat <= 4.36041667, drop = True)
fwi_ = fwi_.where((fwi_.lon >=96.32708332), drop = True)
fwi = fwi_.where((fwi_.lon <= 165.93541665), drop = True)

<a id=merge></a>

## Merge Datasets

**Downscale MCD64A1 Burned Area (burnt)**

In [41]:
# Reduce dimensionality
# Original: (lat: 12623, lon: 16707)
# Final: (lat: 6312, lon: 8354)
# Scaling Factors (take the inverse)
12623/6312, 16707/8354

(1.9998415716096325, 1.9998802968637779)

In [42]:
burned_area = scale_variable(burnt, 'Burn_Date', (1/1.9998415716096325, 1/1.9998802968637779))

In [16]:
burned_area.shape

(6312, 8354)

**No need to scale: MOD16A2: EvapoTranspiration Data (EVAP)**

In [44]:
# Original (lat: 12623, lon: 16707)
# Final: (lat: 6312, lon: 8354)
# Scaling Factors (take the inverse)
12623/6312, 16707/8354

(1.9998415716096325, 1.9998802968637779)

In [45]:
et_500m = scale_variable(EVAP, 'ET_500m', (1/1.9998415716096325, 1/1.9998802968637779))

**No need to scale: VNP13 Vegetation Indices (VEG)**

In [48]:
# Original (lat: 6312, lon: 8354)
# Final: (lat: 6312, lon: 8354)
# Scaling Factors: (take inverse)
6312/6312, 8354/8354

(1.0, 1.0)

In [49]:
veg_vars = list(VEG.variables)[2:]
veg_vars

['_1_km_16_days_EVI',
 '_1_km_16_days_EVI2',
 '_1_km_16_days_NDVI',
 '_1_km_16_days_NIR_reflectance',
 '_1_km_16_days_SWIR1_reflectance',
 '_1_km_16_days_SWIR2_reflectance',
 '_1_km_16_days_SWIR3_reflectance',
 '_1_km_16_days_blue_reflectance',
 '_1_km_16_days_composite_day_of_the_year',
 '_1_km_16_days_green_reflectance',
 '_1_km_16_days_red_reflectance',
 '_1_km_16_days_VI_Quality',
 '_1_km_16_days_pixel_reliability']

**Upscale Global Fire Weather Database: Fire Weather Index (fwi)**

In [100]:
# Original: (lat: 210, lon: 222)
#Final:  (lat: 6312, lon: 8354)
# Scaling Factor
6312/210, 8354/222

(30.057142857142857, 37.630630630630634)

In [101]:
# list of all variables that need to be upscaled and appended to viirs xarray
fwi_vars = list(fwi.variables)[:-2]

In [102]:
fwi_arrays = [] #list of output arrays
for name in fwi_vars:
    fwi_arrays.append(scale_variable(fwi, name, (30.057142857142857, 37.630630630630634)))

  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv


In [103]:
# need to flip these so they are properly aligned

final_arrays = []
for array in fwi_arrays:
    final_arrays.append(np.flip(array, axis = 0))

In [104]:
final_arrays[0].shape

(6312, 8354)

**Merge All Variables to VIIRS**

In [105]:
viirs = viirs.assign(burned = (('lat','lon'),burned_area))

viirs = viirs.assign(et_500m = (('lat','lon'),et_500m))

viirs = viirs.assign(_1_km_16_days_EVI = (('lat','lon'),VEG['_1_km_16_days_EVI']))
viirs = viirs.assign(_1_km_16_days_EVI2 = (('lat','lon'),VEG['_1_km_16_days_EVI2']))
viirs = viirs.assign(_1_km_16_days_NDVI = (('lat','lon'),VEG['_1_km_16_days_NDVI']))
viirs = viirs.assign(_1_km_16_days_NIR_reflectance = (('lat','lon'),VEG['_1_km_16_days_NIR_reflectance']))
viirs = viirs.assign(_1_km_16_days_SWIR1_reflectance = (('lat','lon'),VEG['_1_km_16_days_SWIR1_reflectance']))
viirs = viirs.assign(_1_km_16_days_SWIR2_reflectance = (('lat','lon'),VEG['_1_km_16_days_SWIR2_reflectance']))
viirs = viirs.assign(_1_km_16_days_SWIR3_reflectance = (('lat','lon'),VEG['_1_km_16_days_SWIR3_reflectance']))
viirs = viirs.assign(_1_km_16_days_blue_reflectance = (('lat','lon'),VEG['_1_km_16_days_blue_reflectance']))
viirs = viirs.assign(_1_km_16_days_composite_day_of_the_year = (('lat','lon'),VEG['_1_km_16_days_composite_day_of_the_year']))
viirs = viirs.assign(_1_km_16_days_green_reflectance = (('lat','lon'),VEG['_1_km_16_days_green_reflectance']))
viirs = viirs.assign(_1_km_16_days_red_reflectance = (('lat','lon'),VEG['_1_km_16_days_red_reflectance']))
viirs = viirs.assign(_1_km_16_days_VI_Quality = (('lat','lon'),VEG['_1_km_16_days_VI_Quality']))
viirs = viirs.assign(_1_km_16_days_pixel_reliability = (('lat','lon'),VEG['_1_km_16_days_pixel_reliability']))

viirs = viirs.assign(_DC = (('lat','lon'),final_arrays[0]))
viirs = viirs.assign(_DMC= (('lat','lon'),final_arrays[1]))
viirs = viirs.assign(_FFMC= (('lat','lon'),final_arrays[2]))
viirs = viirs.assign(_ISI= (('lat','lon'),final_arrays[3]))
viirs = viirs.assign(_BUI= (('lat','lon'),final_arrays[4]))
viirs = viirs.assign(_FWI= (('lat','lon'),final_arrays[5]))
viirs = viirs.assign(_DSR= (('lat','lon'),final_arrays[6]))

In [None]:
viirs

In [106]:
viirs.to_netcdf('../../finalproj_data/final.nc')

### Create a Labels Column in the Cloud (using pandas)

<img src='../images/fire_danger.png'>

In [90]:
merged['fire_weather'] = 0

In [91]:
# Create categorical values from FWI numerical
merged.loc[(merged['FWI'] < 5), 'fire_weather'] = 'fwi_low'
merged.loc[(merged['FWI'] > 5) & (merged['FWI'] < 8), 'fire_weather'] = 'fwi_moderate'
merged.loc[(merged['FWI'] > 8) & (merged['FWI'] < 16), 'fire_weather'] = 'fwi_high'
merged.loc[(merged['FWI'] > 16) & (merged['FWI'] < 29), 'fire_weather'] = 'fwi_veryhigh'
merged.loc[(merged['FWI'] > 29), 'fire_weather'] = 'fwi_extreme'
merged.drop('FWI', axis = 1, inplace = True)

In [93]:
merged['label'] = 0

In [None]:
## You will probably have to run this part in the cloud

In [96]:
for row in range(0, len(merged)):
    if merged.loc[row,'fire_weather'] == 0:
        merged.loc[row,'label'] = merged.loc[row,'FireMask']
    else:
        None

KeyboardInterrupt: 