In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
%cd /content/drive/MyDrive/weather/

/content/drive/MyDrive/weather


In [1]:
# !git clone https://github.com/clara-risk/fire_weather_interpolate.git  #only run once

%cd /content/drive/MyDrive/weather/fire_weather_interpolate
!git pull

/content/drive/MyDrive/weather/fire_weather_interpolate
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 4 (delta 3), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (4/4), done.
From https://github.com/clara-risk/fire_weather_interpolate
   70aef68..fd9f612  master     -> origin/master
Updating 70aef68..fd9f612
Fast-forward
 fire_weather_interpolate/fwi.py | 2 [32m+[m[31m-[m
 1 file changed, 1 insertion(+), 1 deletion(-)


In [None]:
#Install the packages

%pip install geopandas




In [None]:
%pip install numpy
%pip install pyproj
%pip install matplotlib 
%pip install pandas
%pip install scikit-learn
%pip install shapely 



In [2]:
import geopandas as gpd
import numpy as np
import pyproj
import matplotlib.pyplot as plt
import warnings
import os,sys,time
import math,statistics
import json 
import pandas as pd 

In [4]:
import importlib 

user = "clara-risk"
repo = "fire_weather_interpolate"
src_dir = "fire_weather_interpolate"
pyfiles = ["get_data","gpr", "idw","idew"]

dirname= '/content/drive/MyDrive/weather/'
path = f"{repo}/{src_dir}"
path2 = dirname+repo+'/'+src_dir+'/'

if not path2 in sys.path:
    sys.path.insert(1, path2)

for pyfile in pyfiles:  
  mymodule = importlib.import_module(pyfile)
import get_data as GD
import gpr as gpr
import idw as idw
import idew as idew

In [5]:
dirname = '/content/drive/MyDrive/weather/'
file_path_daily = os.path.join(dirname, 'daily_feather/')
file_path_hourly = os.path.join(dirname, 'hourly_feather/')
shapefile = os.path.join(dirname, 'study_area/QC_ON_albers_dissolve.shp')
file_path_elev = os.path.join(dirname,'lookup_files/elev_csv.csv')

# Get index of elevation in elevation lookup file 
idx_list = GD.get_col_num_list(file_path_elev,'elev')

# Load the daily station lat / lon lookup file & speed-up dictionary with which
# stations have data in which months 
with open(dirname+'json/daily_lookup_file_TEMP.json', 'r') as fp:
   date_dictionary = json.load(fp)

with open(dirname+'json/daily_lat_lon_TEMP.json', 'r') as fp:
   daily_dictionary = json.load(fp)

# Lat / lon of the hourly stations 
with open(dirname+'json/hourly_lat_lon_TEMP.json', 'r') as fp:
   hourly_dictionary = json.load(fp) 

In [None]:
# Now, we make the elevation array for the study area. It outputs with the idew 
# interpolated grid, so just get it from that function 

temperature = GD.get_noon_temp('1956-07-01 13:00',file_path_hourly)
idw1_grid, maxmin, elev_array = idew.IDEW(hourly_dictionary,temperature,
                                          '2018-07-01 13:00','temp',shapefile,
                                          False,file_path_elev,idx_list,1,
                                          False,res=10000)

In [None]:
# Create the list of testing dates

years = [] 
for x in range(1956,1957): #Insert years here 
   years.append(str(x))
overall_dates = []   

for year in years: 
   overall_dates.append((year)+'-07-01 13:00') #July 1 each year

In [None]:
# Imports for the gpr module
from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic,
                                              ExpSineSquared, DotProduct,
                                              ConstantKernel)
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn import metrics
from sklearn.model_selection import ShuffleSplit

In [86]:
# Set up code for running buffered LOO cross-validation on Google colab 
# for GPR (wind)

variables = ['wind']
# Buffer sizes for area where the stations will be deleted during x-validation
buffer_sizes = {'temp': 500, 'rh': 100, 'wind': 20, 'pcp': 20}
# Kernel lookup (pre-fitted)
kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'],
         'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],
         'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],
         'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}

for var in variables:

   error_dict = {}

   for input_date in sorted(overall_dates):
      print('Processing %s'%(input_date))
      start = time.time()
      if var == 'temp': 
         data = GD.get_noon_temp(input_date,file_path_hourly)

      if var == 'rh':
         data = GD.get_relative_humidity(input_date,file_path_hourly)

      if var == 'wind':
         data = GD.get_wind_speed(input_date,file_path_hourly)

      if var == 'pcp': 
         data = GD.get_pcp(input_date[0:10],file_path_daily,date_dictionary)

      end = time.time()
      time_elapsed = (end-start)
      print('Completed getting dictionary, it took %s seconds..'%(time_elapsed))

      start = time.time() 
      if var != 'pcp':
        MAE = gpr.buffer_LOO_gpr(hourly_dictionary,temperature,shapefile,
                                 file_path_elev,elev_array,idx_list,
                                 kernels[var],buffer_sizes[var],True)
      else: 
        MAE = gpr.buffer_LOO_gpr(daily_dictionary,temperature,shapefile,
                                 file_path_elev,elev_array,idx_list,
                                 kernels[var],buffer_sizes[var],True)
      average_list = []
      for key, val in MAE.items():
        average_list.append(val)
      average = np.nanmean(np.array(average_list))
      print('MAE for test date: %s'%(round(average,2)))
      error_dict[input_date] = [MAE,average]
      end = time.time()
      time_elapsed = (end-start)
      print('Completed operation, it took %s seconds..'%(time_elapsed))

df = pd.DataFrame(error_dict)
df = df.transpose()
df.iloc[:,0] = df.iloc[:,0].astype(str).str.strip('[|]')      
file_out = '/content/drive/MyDrive/weather/'   
df.to_csv(file_out+'GPR_buffer_'+var+'.csv', header=None, sep=',')

Processing 1956-07-01 13:00
Completed getting dictionary, it took 1.8253636360168457 seconds..


TypeError: ignored