In [1]:
import xarray as xr
import pandas as pd
import numpy as np
from geopandas import GeoDataFrame
from shapely.geometry import Point

import netCDF4
from netCDF4 import Dataset
# Viz tools
import matplotlib.pyplot as plt

### Satellite Data from MODIS on Burned Areas

In [6]:
burned = xr.open_mfdataset('../../finalproj_data/time_slice/MCD64A1.006_500m_aid0001.nc', combine = 'by_coords')
# Time stamps from Oct 1 and Nov 1. 
# Slice to take Nov 1. 
burnt = burned.sel(time = slice('2019-11-01','2019-11-01'))

In [9]:
burnt['Burn_Date'].values

array([[[ 0.,  0.,  0., ..., -2., -2., -2.],
        [ 0.,  0.,  0., ..., -2., -2., -2.],
        [ 0.,  0.,  0., ..., -2., -2., -2.],
        ...,
        [nan, nan, nan, ..., -2., -2., -2.],
        [nan, nan, nan, ..., -2., -2., -2.],
        [nan, nan, nan, ..., -2., -2., -2.]]], dtype=float32)

### Satellite Data from VIIRS and MODIS: Fire Labels

In [2]:
## Satellite Data from VIIRS and MODIS,including fire labels

# These three datasets have different **time** dimensions. 
# The same SPATIAL latitude and longitude dimensions. 

ONE = xr.open_mfdataset('../../finalproj_data/time_slice/MOD16A2.006_500m_aid0001.nc', combine='by_coords') 
# (lat: 12623, lon: 16707, time: 1)
# 1.687374337 GB

TWO = xr.open_mfdataset('../../finalproj_data/time_slice/VNP13A2.001_1km_aid0001.nc', combine = 'by_coords') #  (lat: 6312, lon: 8354, time: 1)
# Slice by time dimension so that this dataset is 1 dimension in time. 
TWO_ = TWO.sel(time = slice('2019-12-11', '2019-12-11'))
#2.953022425

THREE = xr.open_mfdataset('../../finalproj_data/time_slice/VNP14A1.001_1km_aid0001.nc', combine = 'by_coords') # (lat: 6312, lon: 8354, time: 1)
#1.054726297 GB

FOUR = xr.open_mfdataset('../../finalproj_data/time_slice/VNP43MA3.001_1km_aid0001.nc', combine = 'by_coords') # (lat: 6312, lon: 8354, time: 1)
# 7.593301849 GB

In [3]:
# Will want to try converting each dataset separately into pandas
# At least XX GB .nbytes / 1e9 when decoded as float
# When xarray writes a dataframe, every variable first gets expanded to use all dimensions. So 3 x 108 GB in memory. 
# And pandas probably needs a memory copy to create the dataframe - another 3 x 108 GB. 
# Since FOUR is the largest, try running first stinky model without this data and see what comes out. 

In [4]:
modis = ONE.to_dataframe()
modis.reset_index(inplace = True)

In [5]:
viirs1 = TWO_.to_dataframe()

In [6]:
viirs2 = THREE.to_dataframe()

In [18]:
#http://xarray.pydata.org/en/stable/generated/xarray.combine_by_coords.html
# Decided to transform each to a dataframe, and then to geopandas, and then spatially join. 

In [16]:
modis.columns

Index(['lat', 'lon', 'time', 'crs', 'ET_500m', 'ET_QC_500m'], dtype='object')

In [None]:
geometry = [Point(xy) for xy in zip(modis.lon, modis.lat)]
modis = modis.drop(['lon', 'lat'], axis=1)
crs = {'init': 'epsg:4326'}
modis_gdf = GeoDataFrame(modis, crs=crs, geometry=geometry)

In [4]:
# .attrs stores metadata
# .dims shows number of dimensions
# .coords coordinates are empty?
# .values similar to a numpy array that you can modify in place

### Fire Weather Index

In [2]:
# GFWD - FWI --> Dataset labels to identify "high risk of fire" based on FWI calculations.
# Convension to label DataSet in caps and DataArray in lowercase

#Use xarray to open .nc file, combining by coordinates. 
DEC = xr.open_mfdataset("../../finalproj_data/satellitedata/GFWD/FWI.GEOS-5.Monthly.Default.201912.nc", combine = 'by_coords')

In [14]:
DEC.attrs

{'History:': 'history',
 'History': '02-Jan-2020 13:10:48',
 'Source:': 'source',
 'Source': 'Robert Field',
 'Title:': 'title',
 'Title': 'Global Fire Weather Database',
 'Center:': 'center',
 'Center': 'NASA GISS / Columbia University',
 'Name:': 'Descriptive name for configuration',
 'Name': 'Default',
 'minLat:': 'Min latitude for analysis',
 'minLat': -58.0,
 'maxLat:': 'Max latitude for analysis',
 'maxLat': 75.0,
 'minLandFrac:': 'Minimum grid cell land fraction for analysis',
 'minLandFrac': 0.1,
 'minT:': 'Mask out anything with mean annual Tsurf less than this',
 'minT': -10.0,
 'minPrec:': 'Mask out anything with mean annual prec less than this',
 'minPrec': 0.25,
 'snoDThresh:': 'Minimum depth (m) for there to be considered snow on ground at any given time',
 'snoDThresh': 0.01,
 'snowCoverDaysCalc:': 'Number of days prior to spring over which to determine if winter had substantial snow cover',
 'snowCoverDaysCalc': 60.0,
 'minWinterSnoD:': 'Minimum mean depth (m) during pa

In [3]:
fwi = DEC.to_dataframe()

  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv
  condition |= data == fv


In [9]:
fwi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GEOS-5_DC,GEOS-5_DMC,GEOS-5_FFMC,GEOS-5_ISI,GEOS-5_BUI,GEOS-5_FWI,GEOS-5_DSR
lat,lon,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
-58.0,-180.0000,1.0,,,,,,,
-58.0,-179.6875,1.0,,,,,,,
-58.0,-179.3750,1.0,,,,,,,
-58.0,-179.0625,1.0,,,,,,,
-58.0,-178.7500,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...
75.0,178.4375,1.0,,,,,,,
75.0,178.7500,1.0,,,,,,,
75.0,179.0625,1.0,,,,,,,
75.0,179.3750,1.0,,,,,,,
