In [2]:
import pandas as pd
import xarray as xr
import numpy as np
from scipy import stats

**Table of Contents**
1. <a href=#burned>MCD64A1 Burned Area</a>
2. <a href=#evapo>MOD16A2: EvapoTranspiration Data</a>
3. <a href=#veg>VNP13 Vegetation Indices</a>
4. <a href=#fire>VIIRS VNP14 Thermal Anomaly / Fire</a>
5. <a href=#fwi>Global Fire Weather Database: Fire Weather Index</a>
6. <a href=#merge>Merging VIIRS Fire and GFWD </a>

<a id=burned></a>
### MCD64A1 Burned Area
Sournce: https://lpdaac.usgs.gov/products/mcd64a1v006/

In [177]:
burned = xr.open_mfdataset('../../finalproj_data/time_slice/MCD64A1.006_500m_aid0001.nc', combine = 'by_coords')
burnt = burned.sel(time = slice('2019-11-01','2019-11-01'))

In [178]:
# burnt['crs']
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [179]:
burnt = burnt.drop_vars(['Burn_Date_Uncertainty', 'First_Day', 'Last_Day','crs', 'QA'])
burnt = burnt.squeeze('time')

In [None]:
# Reduce dimensionality in xarray

In [181]:
burnt = burnt.to_dataframe()
burnt.reset_index(inplace = True)

MemoryError: Unable to allocate array with shape (12623, 16707) and data type float32

In [14]:
burnt.drop('time', axis = 1, inplace = True)

In [16]:
burnt = burnt[burnt['Burn_Date'].isna()], axis = 0, inplace = True)

KeyError: "['lat' 'lon' 'Burn_Date'] not found in axis"

In [60]:
burnt['water'] = burnt['Burn_Date'] == -2.0

In [62]:
burnt['unburned'] = burnt['Burn_Date'] == 0.0

In [None]:
burnt['burned'] = burnt[(burnt['Burn_Date'] != 0) & (burnt['Burn_Date'] != -2.0) & (burnt['Burn_Date'].notnull())]

In [19]:
# burnt.to_parquet('../../finalproj_data/parquet/burnt.parquet')

In [None]:
# Compare lat/lon of burnt to lat/lon of ONE

In [15]:
print(burnt['lat'].max(), ONE['lat'].max())

-48.23124999567919 -48.23124999567919


Unique values:
    array([  0.,  nan,  -2., 305., 307., 322., 327., 320., 323., 326., 328.,
       324., 321., 319., 329., 313., 316., 306., 308., 309., 317., 312.,
       311., 333., 334., 332., 331., 310., 325., 318., 314., 315., 330.] <br>
14.6% of 210,892,461 data points are null values. <br>
0 is unburned, -2 is water, other values are days

<a id=evapo></a>
### MOD16A2: EvapoTranspiration Data 

In [172]:
# Load Data

ONE = xr.open_mfdataset('../../finalproj_data/time_slice/MOD16A2.006_500m_aid0001.nc', combine='by_coords') 
# (lat: 12623, lon: 16707, time: 1)
# 1.687374337 GB
# Source: https://lpdaac.usgs.gov/products/mod16a2v006/
# Probably only going to take Total Evapotranspiration (ET_500m)
# Ignore for now - add if time. 


In [173]:
# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Coordinates:
#     time     object 2019-12-11 00:00:00
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0
ONE = ONE.drop_vars(['crs', 'ET_QC_500m'])
ONE = ONE.squeeze('time')

In [174]:
ONE

In [175]:
mod = ONE.to_dataframe()
mod.reset_index(inplace = True)

MemoryError: Unable to allocate array with shape (210892461,) and data type int64

In [None]:
mod.drop('time', axis = 1, inplace = True)

In [None]:
# Spatially aggregate the data, first by rounding and creating a new column
mod = mod.round({'lat':2, 'lon':4})

In [176]:
# Then by averaging ET values for the same lat, lon points


In [25]:
# ONE.to_parquet('../../finalproj_data/parquet/modis.parquet')

<a id=veg></a>
### VNP13 Vegetation Indices
Source: https://lpdaac.usgs.gov/products/vnp13a2v001/

In [None]:
TWO = xr.open_mfdataset('../../finalproj_data/time_slice/VNP13A2.001_1km_aid0001.nc', combine = 'by_coords') #  (lat: 6312, lon: 8354, time: 1)
# Slice by time dimension so that this dataset is 1 dimension in time. 
TWO = TWO.sel(time = slice('2019-12-11', '2019-12-11'))
#2.953022425

In [26]:
# TWO
# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [25]:
TWO = TWO.drop_vars(['crs'])
TWO = TWO.squeeze('time')

In [26]:
len(TWO['_1_km_16_days_EVI'])

6312

In [28]:
# TWO = TWO.to_dataframe()
# TWO.reset_index(inplace = True)

In [29]:
# TWO.drop('time', axis = 1, inplace = True)

In [30]:
# TWO.to_parquet('../../finalproj_data/parquet/vnp13.parquet')

<a id=fire></a>
### VIIRS VNP14 Thermal Anomaly / Fire

Source: https://lpdaac.usgs.gov/products/vnp14a1v001/

In [49]:
THREE = xr.open_mfdataset('../../finalproj_data/time_slice/VNP14A1.001_1km_aid0001.nc', combine = 'by_coords') # (lat: 6312, lon: 8354, time: 1)
#1.054726297 GB


# <xarray.DataArray 'crs' ()>
# array(-127, dtype=int8)
# Attributes:
#     grid_mapping_name:            latitude_longitude
#     _CoordinateAxisTypes:         GeoX GeoY
#     epsg_code:                    4326
#     horizontal_datum_name:        WGS84
#     semi_major_axis:              6378137
#     inverse_flattening:           298.257223563
#     longitude_of_prime_meridian:  0.0

In [50]:
THREE = THREE.drop_vars(['crs', 'QA'])
THREE = THREE.squeeze('time')
THREE = THREE.drop('time')

In [51]:
# try dropping null values in xarray
viirs = THREE.dropna(dim = 'lon', subset=['FireMask'])
viirs = viirs.dropna(dim = 'lat', subset=['FireMask'])

In [52]:
6312*1899

11986488

In [53]:
viirs

In [54]:
# To pandas dataframe

viirs = viirs.to_dataframe()
viirs.reset_index(inplace = True)

In [55]:
viirs.drop('sample', axis = 1, inplace = True)
viirs['MaxFRP'].fillna(0, inplace = True)

In [56]:
viirs

Unnamed: 0,lat,lon,FireMask,MaxFRP
0,4.362500,150.120833,3.0,0.0
1,4.362500,150.129167,3.0,0.0
2,4.362500,150.137500,3.0,0.0
3,4.362500,150.145833,3.0,0.0
4,4.362500,150.154167,3.0,0.0
...,...,...,...,...
11986483,-48.229167,165.904167,3.0,0.0
11986484,-48.229167,165.912500,3.0,0.0
11986485,-48.229167,165.920833,3.0,0.0
11986486,-48.229167,165.929167,3.0,0.0


In [94]:
viirs = viirs.round({'lat':2, 'lon':4})

In [145]:
fm = viirs.loc[:,['lat','lon','FireMask']]
frp = viirs.loc[:,['lat','lon','MaxFRP']]

In [146]:
def take_mode(x):
    y = x.value_counts().index[0]
    return y

In [None]:
test = fm.groupby(['lat','lon'], as_index = False)['FireMask'].apply(lambda x: take_mode(x))

In [143]:
len(pd.DataFrame(fm.groupby(['lat','lon']).max()))

9988740

KeyboardInterrupt: 

Unnamed: 0,lat,lon,FireMask,MaxFRP
0,-48.23,150.1208,4.0,0.0
1,-48.23,150.1292,4.0,0.0
2,-48.23,150.1375,4.0,0.0
3,-48.23,150.1458,4.0,0.0
4,-48.23,150.1542,4.0,0.0
...,...,...,...,...
9988735,4.36,165.9042,3.0,0.0
9988736,4.36,165.9125,3.0,0.0
9988737,4.36,165.9208,3.0,0.0
9988738,4.36,165.9292,3.0,0.0


MemoryError: Unable to allocate array with shape (3, 11986488) and data type float64

In [None]:
52730448

In [35]:
# THREE.to_parquet('../../finalproj_data/parquet/vnp14.parquet')

<a id=fwi></a>
### Global Fire Weather Database: Fire Weather Index

In [73]:
# Source: https://data.giss.nasa.gov/impacts/gfwed/
# See data_collection1 for more information. 

# GFWD - FWI --> Dataset labels to identify "high risk of fire" based on FWI calculations.
# Convension to label DataSet in caps and DataArray in lowercase

#Use xarray to open .nc file, combining by coordinates. 
FWI = xr.open_mfdataset("../../finalproj_data/satellitedata/GFWD/FWI.GEOS-5.Monthly.Default.201912.nc", combine = 'by_coords')

In [74]:
FWI = FWI.squeeze('time')
FWI = FWI.drop('time')

In [75]:
# Geoslicing based on coordinates for 'burnt'

# lat min : -48.23125 
# lat max: 4.36041667

#lon max = 165.93541665
#lon min = 96.32708332

fwi = FWI.where((FWI['lat'] >= -48.23125) & (FWI['lat'] <= 4.36041667))
fwi = fwi.where((fwi['lon'] >=96.32708332) & (fwi['lon'] <= 165.93541665))

#Double checked and this slicing in xarray worked

In [76]:
# To pandas Dataframe

fwi = fwi.to_dataframe()
fwi.reset_index(inplace = True)

  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv


In [77]:
# For FWI['GEOS-5_FWI']

# min: 0.003872012021020055
# max: 96.01458740234375

In [80]:
fwi['fire_weather'] = 0

In [81]:
# Create categorical values from FWI numerical
fwi.loc[(fwi['GEOS-5_FWI'] < 5), 'fire_weather'] = 'fwi_low'
fwi.loc[(fwi['GEOS-5_FWI'] > 5) & (fwi['GEOS-5_FWI'] < 8), 'fire_weather'] = 'fwi_moderate'
fwi.loc[(fwi['GEOS-5_FWI'] > 8) & (fwi['GEOS-5_FWI'] < 16), 'fire_weather'] = 'fwi_high'
fwi.loc[(fwi['GEOS-5_FWI'] > 16) & (fwi['GEOS-5_FWI'] < 29), 'fire_weather'] = 'fwi_veryhigh'
fwi.loc[(fwi['GEOS-5_FWI'] > 29), 'fire_weather'] = 'fwi_extreme'
fwi.drop('GEOS-5_FWI', axis = 1, inplace = True)

In [82]:
# Drop all null values for FWI
fwi = fwi[fwi['fire_weather']!=0]

In [84]:
fwi.to_csv('../../finalproj_data/csv/fwi.csv')

In [106]:
# Note that lat is 2 decimal points and lon is four decimal points. 

<img src='../images/fire_danger.png'>

In [119]:
# fwi.to_parquet('../../finalproj_data/parquet/labeled_fwi.parquet')

### TEST MERGE

Given fwi and viirs in xarray

In [85]:
test_upsampling = fwi.merge(viirs, how = 'outer', on = 'lat')
test_upsampling

Unnamed: 0,lat,lon_x,GEOS-5_DC,GEOS-5_DMC,GEOS-5_FFMC,GEOS-5_ISI,GEOS-5_BUI,GEOS-5_DSR,fire_weather,lon_y,FireMask,MaxFRP
0,-43.500000,146.2500,32.464867,4.450133,54.313038,1.743645,6.464416,0.205657,fwi_low,,,
1,-43.500000,146.5625,33.639297,5.282477,55.677544,1.844144,7.366685,0.243763,fwi_low,,,
2,-43.500000,146.8750,273.527863,15.401765,76.589508,5.178259,26.425797,2.170682,fwi_high,,,
3,-43.500000,147.1875,224.455963,5.666484,71.412170,4.144137,10.467385,0.555368,fwi_low,,,
4,-43.250000,145.9375,30.258886,4.500226,53.325954,1.862201,6.482756,0.252616,fwi_low,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
11998588,-48.229167,,,,,,,,,165.904167,3.0,0.0
11998589,-48.229167,,,,,,,,,165.912500,3.0,0.0
11998590,-48.229167,,,,,,,,,165.920833,3.0,0.0
11998591,-48.229167,,,,,,,,,165.929167,3.0,0.0
