In [None]:
%%capture 
!apt install gdal-bin python-gdal python3-gdal 
%pip install pandas fiona shapely pyproj
!apt install python3-rtree 
!pip install geopandas==0.10.0
!pip install descartes
!pip install scipy 
!pip install statsmodels
!pip install matplotlib
!pip install gdal
!pip install numpy
!pip install scikit-learn
!pip install statsmodels
!pip install pytz 
!pip install xarray

In [None]:
#Here we are importing the packages we need. 
import geopandas as gpd
import pandas as pd 
from geopandas.tools import sjoin
from shapely.geometry import LineString
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import shape
from descartes import PolygonPatch
import time
import math
import scipy.stats as stats
import numpy as np
import os, sys
from pyproj import CRS, Transformer
import fiona

import statsmodels.api as sm
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
import matplotlib as mpl
from math import floor

from shapely.ops import unary_union

import warnings
warnings.filterwarnings('ignore')

from osgeo import ogr, gdal,osr

from datetime import datetime
from datetime import timedelta
from pytz import timezone
import xarray as xr

  import pandas.util.testing as tm


In [None]:
def extract(path,code_type,lat,lon,dats):

    ds_disk = xr.open_dataset(path)

    if code_type != "pcp" and code_type != "wind" and code_type != "temp" and \
    code_type != "dtemp":
        dc_vals = ds_disk[code_type]
    else:
        dc_vals = ds_disk
        time_var = dc_vals.variables['time']    

    xmax = 360
    xmin = 0
    ymin = 90.125
    ymax = -90.125
    lons_original = lon
    lon = lon % 360 #convert to 360

    fire_id = []
    causes1 = []
    dates1 = []
    z_vals = []
    lat_vals = []
    lon_vals = [] 

    for lon,lon_o,lat,dat in zip([lon]*len(dats),[lons_original]*len(dats),[lat]*len(dats),dats):

        if len(str(dat)) > 4: #not nan
            day_of_year = datetime.strptime(str(dat), '%Y-%m-%d %H:%M:%S').timetuple().tm_yday
            if code_type == "wind":
                index = str(dat)[0:10] + 'T17:00:00'
                index = day_of_year-60
                tlt2 = dc_vals.variables['u10'][index,:,:].values #'tp' for pcp
                tlt3 = dc_vals.variables['v10'][index,:,:].values #'tp' for pcp
                tlt2 = 3.6 * np.sqrt(np.power(tlt2,2) + np.power(tlt3,2))
          
            elif code_type != "pcp" and code_type != "wind" and code_type != "temp" and code_type != "dtemp":
                tlt2 = dc_vals[day_of_year].values
            else:

                index = day_of_year-60
                
                if code_type == 'temp':
                    
                    tlt2 = dc_vals.variables['t2m'][index,:,:].values #'tp' for pcp
                if code_type == 'dtemp':
                    tlt2 = dc_vals.variables['d2m'][index,:,:].values #'tp' for pcp
                if code_type == 'pcp':
                    print(dat)
                    indexes = []
                    darray = [] 
                    for xd in range(0,24):
                        if xd < 4:
                            tz = timezone('EST')
                            comm_dat = datetime(int(dat[0:4]), 3, 1, 0, 0, tzinfo=tz)
                            format_dat = datetime.strptime(dat, '%Y-%m-%d %H:%M:%S')-timedelta(1)
                            format_dat = format_dat.replace(tzinfo=tz)
                            d = format_dat - comm_dat
                            dt = int(d.total_seconds() / 60 / 60) 
                            index = dt
                            indexes.append(index)
                        else:
                            tz = timezone('EST')
                            comm_dat = datetime(int(dat[0:4]), 3, 1, 0, 0, tzinfo=tz)
                            format_dat = datetime.strptime(dat, '%Y-%m-%d %H:%M:%S')
                            format_dat = format_dat.replace(tzinfo=tz)
                            d = format_dat - comm_dat
                            dt = int(d.total_seconds() / 60 / 60)
                            index = dt
                            indexes.append(index)
                            
                        for idx in indexes:
                            
                            tlt = dc_vals.variables['tp'][idx,:,:].values #'tp' for pcp
                            darray.append(tlt)

                    tlt2 = sum(darray)
                
            mx = lon
            my = lat
            xOrigin = xmin
            yOrigin = ymin
            col = int((mx - xOrigin) / 0.25)
            row = int((yOrigin - my ) / 0.25)
            if not np.isnan(tlt2[row][col]):
                dates1.append(dat)
                lat_vals.append(lat)
                lon_vals.append(lon_o)
                z_vals.append(tlt2[row][col])
            else:
                dates1.append(dat)
                lat_vals.append(lat)
                lon_vals.append(lon_o)
                z_vals.append(np.nan)
        else:
            dates1.append(dat)
            lat_vals.append(lat)
            lon_vals.append(lon_o)
            z_vals.append(np.nan)

    med = np.nanmedian(z_vals)
    p90 = np.percentile(z_vals, 90)
            
    return [med,p90]

In [None]:
def get_netCDF_vals(fire_shapefile, path, code_type, res,factor):
    '''This is a function to get the value inside the fire.
    We will use to calculate the mean, median, max value for a fire.
    
    Parameters
    ----------
        fire_shapefile : GeoDataFrame
            fire perimeter + associated information
        path : string
            path to McElhinny NetCDF files on drive
        code_type : string 
            FWI code
        res : float
            specified resolution for regular grid of points inside fire
        factor : int
            multiplier for number of points considered
            
    Returns
    ----------
        float
            - maximum value in fire, either the closest point to the convex hull of the fire or a sum of the points inside the fire
    '''

    #na_map = gpd.read_file(shapefile)
    #bounds = shapefile.bounds  # Get the bounding box of the shapefile
    bounds = gpd.GeoDataFrame(geometry=[fire_shapefile['geometry']]).bounds 
    repdat = pd.to_datetime(fire_shapefile['REP_DATE'])
    edate = pd.to_datetime(fire_shapefile['REP_DATE']) + pd.DateOffset(days=21)
    all_dat = pd.date_range(repdat,edate-timedelta(days=1),freq='d')
    xmax = np.nanmax(bounds['maxx'])
    xmin = np.nanmin(bounds['minx'])
    ymax = np.nanmax(bounds['maxy'])
    ymin = np.nanmin(bounds['miny'])

    # Calculate the number of rows cols to fill the bounding box at that resolution
    num_col = int((xmax - xmin) / res)
    num_row = int((ymax - ymin) / res)

    # Add the bounding box coords to the dataset so we can extrapolate the interpolation to cover whole area
    yProj_extent = [bounds['maxy'], bounds['miny']]
    xProj_extent = [bounds['maxx'], bounds['minx']]

    # Get the value for lat lon in each cell we just made
    Yi = np.linspace(np.min(yProj_extent), np.max(yProj_extent), num_row*factor)
    Xi = np.linspace(np.min(xProj_extent), np.max(xProj_extent), num_col*factor)

    Xi, Yi = np.meshgrid(Xi, Yi)
    # Because we are not using the lookup file, send in X,Y order
    concat = np.array((Xi.flatten(), Yi.flatten())).T
    send_to_list = concat.tolist()

    meshPoints = [Point(item) for item in send_to_list]
    gdf = gpd.GeoDataFrame(geometry=meshPoints)

    DF = fire_shapefile
    try: #If there is a single polygon in the shapefile
        DF = unary_union(Polygon(DF['geometry'])) #Multipolygon --> Polygon
        poly_define = gpd.GeoDataFrame(geometry=[DF])
        # Get points falling in fire 
        within_fire = gdf[gdf.geometry.within(poly_define['geometry'][0])]
    # Catch the case where multiple polygons not continuous make up the shp
    except (NotImplementedError,AttributeError,TypeError) as e:
        DF = [unary_union(Polygon(geom)) for geom in list(DF['geometry'])]
        poly_define = gpd.GeoDataFrame(geometry=DF)
        # Left spatial join 
        within_fire = sjoin(gdf, poly_define, how='left',op='within')
        # Drop points that are not in the fire 
        within_fire = within_fire[~np.isnan(within_fire['index_right'])]


    inside_fire = []
    lon = []
    lat = [] 
    listP = within_fire


    
    for idx,p in listP.iterrows():
        
        mx,my=np.array(p['geometry'].coords.xy[0])[0], np.array(p['geometry'].coords.xy[1])[0]

        weather_val = extract(path,code_type,my,mx,all_dat)
        inside_fire.append(weather_val)
        lon.append(mx)
        lat.append(my)
        
    
    within_fire['max_val'] = inside_fire
    within_fire['lon'] = lon
    within_fire['lat'] = lat

    if len(within_fire) > 0:
        mean = np.nanmean([x[0] for x in inside_fire])  # get the mean val inside the fire for median in 21 days
        median = np.nanmedian([x[0] for x in inside_fire])
        maximum = np.nanmax([x[0] for x in inside_fire])

        p90_mean = np.nanmean([x[1] for x in inside_fire])  # get the mean val inside the fire for median in 21 days
        p90_med = np.nanmedian([x[1] for x in inside_fire])
        p90_max = np.nanmax([x[1] for x in inside_fire])


    else: 
        mean = np.nan
        median = np.nan
        maximum = np.nan
        p90_mean = np.nan
        p90_med = np.nan
        p90_max = np.nan


    return mean,median,maximum,p90_mean,p90_med,p90_max

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Navigate to folder where data is stored in the drive. 
%cd /content/drive/MyDrive/jameslab/

dirname = '/content/drive/MyDrive/jameslab/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/jameslab


In [None]:
path_ffmc = 'fire_weather/McElhinny/FFMC/'
sa = gpd.read_file('study_area/BC_AB_shapefile.shp')
shp = gpd.read_file('fire_perimeters/NFDB_poly_20210707_epsg4326.shp')
shp = shp[shp['SIZE_HA'] >= 10]
shp_prov = shp[shp['SRC_AGENCY'].isin(['AB','BC'])]
shp_year = shp_prov[shp_prov['YEAR'] == 1985]
shp_year = shp_year[shp_year['YEAR'] <= 2015]
shp_type = shp_year[shp_year['CAUSE'] == 'L']
print(len(shp_type))

ffmc_list = []

for index, fire in shp_type.iterrows():
    print(fire['FIRE_ID'])
    year = int(fire['YEAR'])
    ave,med,max_val,ave90,med90,max_val90 = get_netCDF_vals(fire,path_ffmc+'/'+'fine_fuel_moisture_code_'+str(year)+'.nc','FFMC',0.001,1)
    print(ave)
    print(ave90)
    ffmc_list.append([ave,med,max_val,ave90,med90,max_val90])

shp_type['ffmc_mean'] = [x[0] for x in ffmc_list]
shp_type['ffmc_median'] = [x[1] for x in ffmc_list]
shp_type['ffmc_max'] = [x[2] for x in ffmc_list]

shp_type['ffmc_p90mean'] = [x[3] for x in ffmc_list]
shp_type['ffmc_p90median'] = [x[4] for x in ffmc_list]
shp_type['ffmc_p90max'] = [x[5] for x in ffmc_list]

print(shp_type)

shp_type.to_csv('ffmc_in_cnfdb_fires.txt',sep=',')

110
1985-C20039
87.194405
90.66580963134766
1985-C30017
70.87034
88.16047668457031
1985-C30020
70.71768
88.56130981445312
1985-C30025
70.19395
89.4786605834961
1985-C30027
nan
nan
1985-C50134


KeyboardInterrupt: ignored