In [1]:
import tsraster.prep as tr
import tsraster.random as random
import tsraster.model as model
import tsraster.calculate as ca
import numpy as np

import tsraster.model  as md
import pandas as pd
import os
import matplotlib.pyplot as plt
%matplotlib inline
from math import ceil
from tsraster.prep import set_df_mindex
import copy

In [2]:
########################### 

In [3]:
#construct poisson disk mask, which masks out all pixels not selected by raster mask
'''
Create raster of cells to be selected (populated as ones) in a raster of background value zero

:param raster_mask: name of raster mask - provides dimensions for subsample, and also masks unusable areas - 
        Remaining sample area is assumed to be contiguous
:param outFile: path and name of output mask consisting of a rater image with values of 1 for selected pixels, 
        and 0 for all other pixels
:param k: number of attempts to select a point around each reference point before marking it as inactive
:param r: minimum distance (in raster cells) between selected points 
:return:  list which includes an array of all masked & unnmasked cells, and a dictionary of all selected points.
            Also saves the a raster consisting of 0s for all non-selected points, and 1s for all selected points
            to the outFile location.
'''


rasterMask = random.Poisson_Subsample(raster_mask = r"../Data\Examples\buffer\StatePoly_buf.tif",
                                      outFile = r"../Data\Examples\diskTest.tif",
                                      k=50, 
                                      r=5)[0]


In [None]:
#conduct climate feature extraction across all years of interest


'''
Extracts summary statistics(features) from multiYear datasets 
Outputs a series of dataFrames covering distinct (annual or multiYear) time periods as CSV files

:param startYears: list of years on which to start feature extraction
:param featureData_Path: file path to data from which to extract features
:param feature_params: summary statistics(features) to extract from data within each window
:param invar_Data: year-invariate data to join with extracted feature data on an annual scale
:param out_Path: file path to location at which extracted features should be output as a csv
:param window_length: length of window within which to extract features
:param window_offset: number of years by which features pertaining to each year are offset from that year
:param mask:  mask to apply to data prior to feature extraction
:return: no return.  instead, feature data relative to each year of interest is saved as a .csv file at the out_Path location
          under the filename FD_Window_XXXX.csv 
'''
ca.multiYear_Window_Extraction(list(range(1970, 2016, 5)), "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Climate/BCM HIST Final 1000m_1950_2016/",
                    feature_params = {"mean": None,"maximum": None}, 
                    out_Path = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked/',
                    mask = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Examples/buffer/StatePoly_buf.tif" ,
                    length = 5,
                    offset = 0)

In [None]:
#Assembly of all explanatory variables (including features extracted from climate data) 
#into dataFrames corresponding to each period of interest

'''merges annually repeating data into feature data, 
    as well as climate features extracted for each period of interest 
    and time-invariant data
    
    Produces dataFrames for each time period of interest consisting of all explanatory variables 
    that may be incorporated into model
        (Consisting of features extracted from climate data in preceding years, 
        annually repeating data such as estimated housing density,
        and time-invariant data such as rate of lightning strikes or local elevation)
        
        In cases where the periods of interst span more than a single year, 
        mean values within each period of interest will be calculated from annually repeating data
        
    :param startYears: list of years on which to start feature extraction
    :param feature_path: path to feature data
    :param dataDict: dictionary of filepaths to each raster and the corresponding desired data column name
    :param other_Data_path: filepath (including filename) of example file for each annually repeating parameter to be added
                         - replace the 4-digit year within each filename with XXXX in each filePath (i.e. tr_XXXX.csv rather than tr.1981.csv)
    :param dataNameList: list of intended data names for additional data
    :param outPath: filepath for folder in which the output will be placed
    :param length: length of period
    :param feature offset: number of years by which to offset featue data from period of interest (to allow use of climate comnditions in preceding years)
    :param feature length: length of period for features - may desired to differ from period length if based on preceding conditions
    :return: no objects returned.  Instead, each annual dataFrame will be saved as a .csv file in the outPath folder
            with filename CD_XXXX.csv 
'''
    

dataDict = {"C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/CampGrounds/Campgrounds.tif": ["Campground", 32767.0, 0.0], 
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Fire Stations/FireStatDist_Meters.tif": ["FireStation_Dist", -1, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Fire Stations/AirBaseDist_Meters.tif": ["Airfield_Dist", -1, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Roads/PrimSecRoads_Dist.tif": ["Road_Dist", -1, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Electrical/transmissionLines_Dist.tif": ["Elec_Dist", -1, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Elev_30m_Products/Roughness.tif": ["Roughness", -1, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Elev_30m_Products/Heterogeneity_m.tif": ["Topo_Heterogeneity", -1, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Incorporated Cities/2018_Download/Census_Designated_Raster.tif": ["City_Bounds", 255, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/National Parks/nps_boundary_clip_Raster.tif": ["NPS_Bounds", 255, 0.0],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Lightning/WIS Data/Light5yrWIS_Clipped.tif": ["Lightning", -9999, "NoData"],
        "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/elevation srtm/SRTM_GTOPO_u30_mosaic_Clip.tif": ["Elev", -9999, "NoData"]}

    
    

tr.period_Data_Merge(startYears = list(range(1970, 2016, 5)),   
                  feature_path = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked/", 
                 dataDict = dataDict,
                     other_Data_path = ["C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/SERGOM_Housing/Interpolated/bhc_XXXX_linreg.tif"],
                 dataNameList = ["Housing_Density"],
                 outPath = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked_5Len/',
                length = 5,
                feature_offset = 0,
                feature_length = 5)
                    


In [None]:
#Conversion of fire data into annual dataframes

#convert annual fire raster data into annual CSV files, does some minor reformatting for downstream processing
'''convert annual fire data rasters into dataFrames corresponding to each period of interest, and export as .CSV files
    also does some minor reformatting to prevent problems with downstream processing

    
    :param startYears: list of years on which to start feature extraction
    :param length: length of extraction period, beginning with each startYear (set to 1 for annual values)
    :param file_Path: path to target data files (fire data)
    :param out_Path: filepath for folder in which the output will be placed
    :param output_style: determines nature of output 
            set to Count to output number of fires in each pixel within each period
            set to Mean to output mean number of fires/year over the period
            set to Binary to return 1 if burned during the period, 0 otherwise

    :return: no objects returned.  Instead, dataFrames will be saved at location outPath
            using the filname TD_XXXX.csv

'''

tr.target_Data_to_csv_multiYear(list(range(1970, 2015, 5)),
                                length = 5,
                                file_Path = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Actual/Fires/Rasters/",
                                out_Path = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked_5Len/',
                               output_type = "Mean")


In [None]:
#Mask All data files (combined_data and target_data) using output of Poisson Disk Masking (or other desired mask)

'''mask multiple years of data, export the resulting files annually and as multiyear csvs

    :param startYears: years on which to begin
    :param filepath: folder in which files to be masked are located.  Files mult be formatted as folles:
        CD_XXXX_YYYY.csv for combined data, and
        TD_XXXX_YYYY.csv for fire data where XXXX indicates the year on which a period of interest begins, 
                and YYYY indicates the last year within that period of interest
    :param maskFile: filepath to data file used for masking
    :outPath: filepath for folder in which the output will be placed
    :param length: the length of the desired period of interest
    :return: masked dataframes of combined data and target data
'''

combined_Data, target_Data = tr.multiYear_Mask(list(range(1975, 2015, 5)),
                                               filePath = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked_5Len/', 
                                               maskFile = r"../Data\Examples\diskTest.tif", 
                                               outPath = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Masked_5Len/",
                                              length = 5)

In [2]:
#temporary for testing only - read in combined & target data 

combined_Data = pd.read_csv('C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Masked_5Len/CD_1975_2010_Masked_5Len.csv')
target_Data = pd.read_csv('C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Masked_5Len/TD_1975_2010_Masked_5Len.csv')
combined_Data.head()

Unnamed: 0.1,pixel_id,Unnamed: 0,pixel_id.1,time,aet__maximum,aet__mean,cwd__maximum,cwd__mean,Campground,FireStation_Dist,...,Road_Dist,Elec_Dist,Roughness,Topo_Heterogeneity,City_Bounds,NPS_Bounds,Lightning,Elev,Housing_Density,year
0,134588,134588,134588,197501_197912,104.05455,38.798958,75.966362,40.99297,0.0,25000.0,...,0.0,0.0,36.04374,274.0,0.0,0.0,0.073291,72.0,0.75,1975
1,134596,134596,134596,197501_197912,155.080505,58.283031,42.300999,20.542719,0.0,26248.809,...,6708.204,0.0,41.40258,436.0,0.0,0.0,0.073291,307.0,0.0,1975
2,135555,135555,135555,197501_197912,104.839996,35.365177,89.362,42.025677,0.0,35383.613,...,12165.525,0.0,45.1243,506.0,0.0,0.0,0.11469,636.0,0.0,1975
3,136483,136483,136483,197501_197912,164.182999,62.888699,34.011501,15.720217,0.0,26419.69,...,8944.271,0.0,57.85297,634.0,0.0,0.0,0.073291,318.0,0.0,1975
4,136523,136523,136523,197501_197912,144.088501,52.848095,54.3055,22.716408,0.0,57775.426,...,13000.0,0.0,44.113197,1119.0,0.0,0.0,0.182597,1207.0,0.0,1975


In [4]:
#Conduct elastic net regularization

'''Conduct elastic net regressions on data, with k-fold cross-validation conducted independently 
      across both years and pixels. 
      Returns mean model MSE and R2 when predicting fire risk at 
      A) locations outside of the training dataset
      B) years outside of the training dataset
      C) locations and years outside of the training dataset

    Returns a list of objects, consisting of:
      0: Combined_Data file with testing/training groups labeled
      1: Target Data file with testing/training groups labeled
      2: summary dataFrame of MSE and R2 for each model run
          (against holdout data representing either novel locations, novel years, or both)
      3: list of elastic net models for use in predicting Fires in further locations/years
      4: list of list of years not used in model training for each run
      5: parameters selected during hypertuning, to be used in annual predictions
  '''

  #param combined_Data: explanatory factors to be used in predicting fire risk
  #param target_Data: observed fire occurrences
  #param varsToGroupBy: list of (2) column names from combined_Data & target_Data to be used in creating randomized groups
  #param groupVars: list of (2) desired column names for the resulting randomized groups
  #param testGroups: number of distinct groups into which data sets should be divided (for each of two variables) 
  #param DataFields: column names of data to be used in modelling
  #param outPath:  Location in which to export pickle files and summary statistics
  #param params: list of parameters to be used in hypertuning
  


CrossVal_Output = model.elasticNet_2dimTest(combined_Data, target_Data, ["pixel_id", "year"], ["pixel_group", "year_group"], 
                                            testGroups = [10, 3], 
                                            DataFields = ['aet__mean', 'cwd__maximum', 'cwd__mean', 'Campground',
       'FireStation_Dist', 'Airfield_Dist', 'City_Bounds', 'NPS_Bounds',
       'Lightning', 'Elev', 'Housing_Density'], outPath = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/')




pixels_Years MSE Overall:  0.0031630360036847788
pixels_Years R2 Overall:  -235.2166171283783


pixels MSE Overall:  0.0046302221730704415
pixels R2 Overall:  -227.81413208178648


years MSE Overall:  0.003662204597641387
years R2 Overall:  -233.76850680149306




In [5]:
#Combined_Data with groups
CrossVal_Output[0].head()

Unnamed: 0.1,pixel_id,Unnamed: 0,pixel_id.1,time,aet__maximum,aet__mean,cwd__maximum,cwd__mean,Campground,FireStation_Dist,...,Roughness,Topo_Heterogeneity,City_Bounds,NPS_Bounds,Lightning,Elev,Housing_Density,year,pixel_group,year_group
0,134588,134588,134588,197501_197912,104.05455,38.798958,75.966362,40.99297,0.0,25000.0,...,36.04374,274.0,0.0,0.0,0.073291,72.0,0.75,1975,9,0
1,134596,134596,134596,197501_197912,155.080505,58.283031,42.300999,20.542719,0.0,26248.809,...,41.40258,436.0,0.0,0.0,0.073291,307.0,0.0,1975,4,0
2,135555,135555,135555,197501_197912,104.839996,35.365177,89.362,42.025677,0.0,35383.613,...,45.1243,506.0,0.0,0.0,0.11469,636.0,0.0,1975,5,0
3,136483,136483,136483,197501_197912,164.182999,62.888699,34.011501,15.720217,0.0,26419.69,...,57.85297,634.0,0.0,0.0,0.073291,318.0,0.0,1975,5,0
4,136523,136523,136523,197501_197912,144.088501,52.848095,54.3055,22.716408,0.0,57775.426,...,44.113197,1119.0,0.0,0.0,0.182597,1207.0,0.0,1975,6,0


In [6]:
#target Data with groups
CrossVal_Output[1].head()

Unnamed: 0,pixel_id,value,year,pixel_group,year_group
0,134588,0.0,1975,9,0
1,134596,0.0,1975,4,0
2,135555,0.0,1975,5,0
3,136483,0.0,1975,5,0
4,136523,0.0,1975,6,0


In [7]:
# summary MSE and R2 for all runs, against spatially novel, temporally novel, and completely novel data
CrossVal_Output[2].head()

Unnamed: 0,Pixels_Years_MSE,Pixels_MSE,Years_MSE,Pixels_Years_R2,Pixels_R2,Years_R2
0,0.002549,0.005027,0.006741,-183.852539,-237.839879,-232.131489
1,0.005529,0.002535,0.003687,-304.769247,-192.016342,-324.321465
2,-0.002605,0.006618,0.000964,-117.2706,-157.814309,-141.590831
3,0.008178,0.004954,0.006358,-236.228321,-304.617249,-232.44529
4,0.002715,0.006497,0.003655,-406.673984,-298.379209,-359.692282


In [8]:
#List of models
CrossVal_Output[3]

[(ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.7,
        max_iter=1000, normalize=False, positive=False, precompute=False,
        random_state=None, selection='cyclic', tol=0.0001, warm_start=False),
  0.002548767869070101,
  -183.852538559554),
 (ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.7,
        max_iter=1000, normalize=False, positive=False, precompute=False,
        random_state=None, selection='cyclic', tol=0.0001, warm_start=False),
  0.0055289764486881054,
  -304.7692467859652),
 (ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.7,
        max_iter=1000, normalize=False, positive=False, precompute=False,
        random_state=None, selection='cyclic', tol=0.0001, warm_start=False),
  -0.0026053775755709996,
  -117.27060044242097),
 (ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.7,
        max_iter=1000, normalize=False, positive=False, precompute=False,
        random_state=None, selection='cycl

In [9]:
#list of years excluded from training data for each model run
CrossVal_Output[4]

[[2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980],
 [2000, 1975],
 [1995, 2005, 1990],
 [1985, 2010, 1980]]

In [10]:
#parameters selected during hypertuning
CrossVal_Output[5]

{'l1_ratio': 0.48499999999999976, 'alpha': 0.8}

In [11]:
####################

In [12]:
'''Predict fire risk within period of interest using elastic net regularization- train model on combined_Data across all available years except within period of interest,
    and save resulting predictions as csv and as tif to location 'outPath'
    
    :param combined_Data_Training: dataFrame including all desired explanatory factors 
            across all locations & years to be used in training model
    :param target_Data_Training: dataFrame including observed fire occurrences 
            across all locations & years to be used in training model
    :param preMasked_Data_Path: file path to location of files to use in predicting fire risk 
                    (note - these files should not have undergone Poisson disk masking)
    :param outPath: desired output location for predicted fire risk files (csv, pickle, and tif)
    :param year_List: list of years for which predictions are desired
    :param Datafields: list of explanatory factors to be intered into model
    :param mask: filepath of raster mask to be used in masking output predictions, 
            and as an example raster for choosing array shape and projections for .tif output files
    :param params: parameters for elastic net regression (presumably developed from 2dimCrossval)
    :return:  returns a list of all models, accompanied by a list of years being predicted 
            - note - return output is equivalent to data exported as models.pickle
            '''

q = model.elastic_YearPredictor(combined_Data, target_Data, 
                  preMasked_Data_Path = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked_5Len/",
                  outPath = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Test_Preds/",  
                  year_List = list(range(2015, 2020, 5)), 
                    periodLen = 5,
                  DataFields = ['aet__mean', 'cwd__maximum', 'cwd__mean', 'Campground',
       'FireStation_Dist', 'Airfield_Dist', 'City_Bounds', 'NPS_Bounds',
       'Lightning', 'Elev', 'Housing_Density'],
                 mask = r"../Data/Examples/buffer/StatePoly_buf.tif",
                    params = CrossVal_Output[5])

[-0.00419769 -0.0041972  -0.00419661 ... -0.00097092 -0.00100184
 -0.00103253]


In [13]:
'''Conduct random Forest regressions on data, with k-fold cross-validation conducted independently 
      across both years and pixels. 
      Returns mean model MSE and R2 when predicting fire risk at 
      A) locations outside of the training dataset
      B) years outside of the training dataset
      C) locations and years outside of the training dataset

    Returns a list of objects, consisting of:
      0: Combined_Data file with testing/training groups labeled
      1: Target Data file with testing/training groups labeled
      2: summary dataFrame of MSE and R2 for each model run
          (against holdout data representing either novel locations, novel years, or both)
      3: list of random forest models for use in predicting Fires in further locations/years
      4: list of list of years not used in model training for each run
  '''

  #param combined_Data: explanatory factors to be used in predicting fire risk
  #param target_Data: observed fire occurrences
  #param varsToGroupBy: list of (2) column names from combined_Data & target_Data to be used in creating randomized groups
  #param groupVars: list of (2) desired column names for the resulting randomized groups
  #param testGroups: number of distinct groups into which data sets should be divided (for each of two variables) 
  #param DataFields: column names of data to be used in modelling
  #param outPath:  Location in which to export pickle files and summary statistics
  #param params: list of parameters to be used in hypertuning


q = CrossVal_Output = model.RandomForestReg_2dimTest(combined_Data, target_Data, ["pixel_id", "year"], ["pixel_group", "year_group"], 
                                            testGroups = [10, 3], 
                                            DataFields = ['aet__mean', 'cwd__maximum', 'cwd__mean', 'Campground',
       'FireStation_Dist', 'Airfield_Dist', 'City_Bounds', 'NPS_Bounds',
       'Lightning', 'Elev', 'Housing_Density'], outPath = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/')


pixels_Years MSE Overall:  0.021930265361375604
pixels_Years R2 Overall:  -41.56068062536632


pixels MSE Overall:  0.031332130572446715
pixels R2 Overall:  -38.36478623236376


years MSE Overall:  0.023890239933704382
years R2 Overall:  -39.755598205586914




In [14]:
'''Predict fire risk within period of interest using random forest regression- train model on combined_Data across all available years except year of interest
    save resulting predictions as csv and as tif to location 'outPath'
    
    :param combined_Data_Training: dataFrame including all desired explanatory factors 
            across all locations & years to be used in training model
    :param target_Data_Training: dataFrame including observed fire occurrences 
            across all locations & years to be used in training model
    :param preMasked_Data_Path: file path to location of files to use in predicting fire risk 
                    (note - these files should not have undergone Poisson disk masking)
    :param outPath: desired output location for predicted fire risk files (csv, pickle, and tif)
    :param year_List: list of years for which predictions are desired
    :param Datafields: list of explanatory factors to be intered into model
    :param mask: filepath of raster mask to be used in masking output predictions, 
            and as an example raster for choosing array shape and projections for .tif output files
    :param params: parameters for random forest regression (presumably developed from 2dimCrossval)
    :return:  returns a list of all models, accompanied by a list of years being predicted 
            - note - return output is equivalent to data exported as models.pickle
'''

model.randomForestReg_YearPredictor(combined_Data, target_Data, 
                  preMasked_Data_Path = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked_5Len/",
                  outPath = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Test_Preds/",  
                  year_List = list(range(2015, 2020, 5)), 
                    periodLen = 5,
                  DataFields = ['aet__mean', 'cwd__maximum', 'cwd__mean', 'Campground',
       'FireStation_Dist', 'Airfield_Dist', 'City_Bounds', 'NPS_Bounds',
       'Lightning', 'Elev', 'Housing_Density'],
                 mask = r"../Data/Examples/buffer/StatePoly_buf.tif", params = q[5])

[0.00351779 0.00351779 0.00351779 ... 0.00340962 0.00336616 0.00336822]


([[RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=5,
              max_features='auto', max_leaf_nodes=None,
              min_impurity_decrease=0, min_impurity_split=None,
              min_samples_leaf=10, min_samples_split=2,
              min_weight_fraction_leaf=0, n_estimators=100, n_jobs=None,
              oob_score=False, random_state=None, verbose=0, warm_start=False)]],
 [2015])

In [15]:
 '''Conduct random XGBoost regressions on data, with k-fold cross-validation conducted independently 
      across both years and pixels. 
      Returns mean model MSE and R2 when predicting fire risk at 
      A) locations outside of the training dataset
      B) years outside of the training dataset
      C) locations and years outside of the training dataset

    Returns a list of objects, consisting of:
      0: Combined_Data file with testing/training groups labeled
      1: Target Data file with testing/training groups labeled
      2: summary dataFrame of MSE and R2 for each model run
          (against holdout data representing either novel locations, novel years, or both)
      3: list of random forest models for use in predicting Fires in further locations/years
      4: list of list of years not used in model training for each run
  '''

  #param combined_Data: explanatory factors to be used in predicting fire risk
  #param target_Data: observed fire occurrences
  #param varsToGroupBy: list of (2) column names from combined_Data & target_Data to be used in creating randomized groups
  #param groupVars: list of (2) desired column names for the resulting randomized groups
  #param testGroups: number of distinct groups into which data sets should be divided (for each of two variables) 
  #param DataFields: column names of data to be used in modelling
  #param outPath:  Location in which to export pickle files and summary statistics
  #param params: list of parameters to be used in hypertuning

q = CrossVal_Output = model.XGBoostReg_2dimTest(combined_Data, target_Data, ["pixel_id", "year"], ["pixel_group", "year_group"], 
                                            testGroups = [10, 3], 
                                            DataFields = ['aet__mean', 'cwd__maximum', 'cwd__mean', 'Campground',
       'FireStation_Dist', 'Airfield_Dist', 'City_Bounds', 'NPS_Bounds',
       'Lightning', 'Elev', 'Housing_Density'], outPath = 'C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/')




pixels_Years MSE Overall:  0.019428827953334355
pixels_Years R2 Overall:  -33.66389849862572


pixels MSE Overall:  0.03360051302278306
pixels R2 Overall:  -32.65973505873693


years MSE Overall:  0.02275388147150962
years R2 Overall:  -33.41626161264868




In [16]:
'''predict fire risk within period of interest using XGBoost regression- train model on combined_Data across all available years except year of interest
    save resulting predictions as csv and as tif to location 'outPath'
    
    :param combined_Data_Training: dataFrame including all desired explanatory factors 
            across all locations & years to be used in training model
    :param target_Data_Training: dataFrame including observed fire occurrences 
            across all locations & years to be used in training model
    :param preMasked_Data_Path: file path to location of files to use in predicting fire risk 
                    (note - these files should not have undergone Poisson disk masking)
    :param outPath: desired output location for predicted fire risk files (csv, pickle, and tif)
    :param year_List: list of years for which predictions are desired
    :param Datafields: list of explanatory factors to be intered into model
    :param mask: filepath of raster mask to be used in masking output predictions, 
            and as an example raster for choosing array shape and projections for .tif output files
    :param params: parameters for random forest regression (presumably developed from 2dimCrossval)
    :return:  returns a list of all models, accompanied by a list of years being predicted 
            - note - return output is equivalent to data exported as models.pickle
    '''


A = model.XGBoostReg_YearPredictor(combined_Data, target_Data, 
                  preMasked_Data_Path = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Pre_Masked_5Len/",
                  outPath = "C:/Users/Python3/Documents/wildfire_FRAP_working/wildfire_FRAP/Data/Extracted_Features/Test_Preds/",  
                  year_List = list(range(2015, 2020, 5)), 
                    periodLen = 5,
                  DataFields = ['aet__mean', 'cwd__maximum', 'cwd__mean', 'Campground',
       'FireStation_Dist', 'Airfield_Dist', 'City_Bounds', 'NPS_Bounds',
       'Lightning', 'Elev', 'Housing_Density'],
                 mask = r"../Data/Examples/buffer/StatePoly_buf.tif", params = q[5])

[-0.01861781 -0.01861781 -0.01861781 ... -0.02599061 -0.02878857
 -0.02878857]
