# Extractions for random polygons (for classifying forest disturbances)

In [None]:
# import required modules
import numpy as np
import geopandas as gpd
import rasterio as rst
from rasterstats import zonal_stats
import glob
import os
import pandas as pd
import re
import multiprocessing
from multiprocessing import Pool

### Functions

In [None]:
def event_scale(filer):
    # open raster with rasterio
    tmpr = rst.open(filer)
    # convert into array
    tmpar = tmpr.read(1)
    # if integer convert into floating point
    if ('float' in tmpar.dtype.type.__name__)==False:
        # set array values to floating
        tmpar = tmpar.astype('float')
    # exception for Aridity Index (lack of consistent naming)
    if 'AI' in os.path.basename(filer):
        varname = os.path.basename(filer).split('.')[0].split('_')[0]
        year = os.path.basename(filer).split('.')[0].split('_')[1]
            # contains population name
    elif 'pop' in os.path.basename(filer):
           varname = os.path.basename(filer).split('.')[0].split('2')[0]
           numbs = re.findall('\d+', os.path.basename(filer).split('.')[0])
           # extract the number with the longest number of digits
           year = max(numbs, key=len)
    elif 'suppressionp' in os.path.basename(filer) or 'ignitionp' in os.path.basename(filer):
               varname = os.path.basename(filer).split('.')[0].split('_')[0]
               numbs = re.findall('\d+', os.path.basename(filer).split('.')[0])
               # extract the number with the longest number of digits
               year = max(numbs, key=len)
    else:
        # get out names for variable of interest
        varnametmp = os.path.basename(filer).split('.')[0]
        varname = varnametmp.split('_')[0] + "_" + varnametmp.split('_')[1] + "_" + varnametmp.split('_')[3]
        #parse numbers (could be year, resolution or season!)
        numbs = re.findall('\d+', os.path.basename(filer).split('.')[0])
        # extract the number with the longest number of digits
        year = max(numbs, key=len)  
    print(varname)
    # extract zonal stats
    stats = zonal_stats(randp, tmpar, affine = tmpr.transform, stats=['mean'], all_touched=True, nodata=tmpr.nodata)
    # convert dictionaries into lists
    stats1 = [val for dic in stats for val in dic.values()]
    newcolumn = varname + '_' + str(year)
    # store in pandas dataframe
    #randp_effis[newcolumn] = stats1
    finalres = {newcolumn:stats1}
    return finalres        

## EFFIS data

In [None]:
# read in file containing random polygons
randp = gpd.read_file('/home/marco/Desktop/testbuffers/buffers/random_buffers_effis.shp')

### Static datasets

#### PFTs

In [None]:
for filer in glob.glob('/mnt/data1tb/rasters/EU/pfts/*.tif'):
        # get out names for variable of interest
        varname = os.path.basename(filer).split('.')[0]
        # print variable name
        print(varname)
        # open raster with rasterio
        tmpr = rst.open(filer)
        # convert into array
        tmpar = tmpr.read(1)
        # ---- zonal statistics (sum of pixels) 
        stats = zonal_stats(randp_effis, tmpar, affine=tmpr.transform, stats=['sum'], all_touched=True,nodata=tmpr.nodata)
        # convert dictionary into a list
        stats = [val for dic in stats for val in dic.values()]
        # store into geopandas dataframe as new column
        randp_effis[varname] = stats
        # fill NAs with 0s
        randp_effis[varname].fillna(0, inplace=True)
        # ---- zonal statistics (count of pixels)
        totpixels = zonal_stats(randp_effis, tmpar, affine=tmpr.transform, stats=['count'], all_touched=True,nodata=tmpr.nodata)
        # convert dictionary into a list
        pixelcount = [val for dic in totpixels for val in dic.values()]
        # store into geopandas dataframe as new column
        varname1 = varname + '_pixels'
        randp_effis[varname1] = pixelcount
        # fill NAs with 0s
        randp_effis[varname1].fillna(0, inplace=True)

#### Continuous variables

In [None]:
# Loop through raster files
for filer in glob.glob('/mnt/data1tb/rasters/EU/static1/*.tif'):
    # get out names for variable of interest
    varname = os.path.basename(filer).split('.')[0]
    # print variable name
    print(varname)
    # open raster with rasterio
    tmpr = rst.open(filer)
    # convert into array
    tmpar = tmpr.read(1)
    # ----  zonal statistics (mean)
    stats = zonal_stats(randp_effis, tmpar, affine=tmpr.transform, stats=['mean'], all_touched=True, nodata=tmpr.nodata)
    # convert dictionary into list
    stats1 = [val for dic in stats for val in dic.values()]
    # store into  geopandas dataframe as a new column
    randp_effis[varname] = stats1
    # fill NAs with 0s
    randp_effis[varname].fillna(0, inplace=True)

#### EFI tree cover maps

In [None]:
for filer in glob.glob('/mnt/data1tb/rasters/EU/EFItrees/*.tif'):
        # open raster with rasterio
        tmpr = rst.open(filer)
        # convert into array
        tmpar = tmpr.read(1)
        # get out names for variable of interest
        varname = os.path.basename(filer).split('.')[0]
        print(varname)
        # zonal statistics (mean)
        stats = zonal_stats(randp_effis, tmpar, affine=tmpr.transform, stats=['sum'], all_touched=True, nodata=tmpr.nodata)
        # convert dictionaries into lists
        stats1 = [val for dic in stats for val in dic.values()]
        # store in pandas dataframe
        randp_effis[varname] = stats1

In [None]:
# write out results for static variables
randp_effis.to_file(randp_effis,'/home/marco/Desktop/testbuffers/extractions/EFFIS_STATIC.shp')

###  Spatio-temporal datasets 

#### Event-scale variables

In [None]:
pool = multiprocessing.Pool(7)
res = list(pool.map(event_scale, files))
res1 = [x for x in res if x]
pool.close()
for listn in range(len(res1)):
    # variable name
    varname = list(res1[listn].keys())[0]
    # extract values
    tmp = list(res1[listn].values())[0]
    randp[varname] = tmp
randp1 = randp.drop('geometry',axis = 1)
randp1.to_csv('/home/marco/Desktop/testbuffers/extractions/EFFIS_dynamic.csv',index=False)

### WIND data

In [None]:
# read in file containing random polygons
randp = gpd.read_file('/home/marco/Desktop/testbuffers/buffers/random_buffers_wind.shp')
# raster data data from 1999 until 2017
list1 = glob.glob('/mnt/data1tb/rasters/EU/dynamic1_Nov19/*.tif')
files = [x for year in list(range(1999, 2017+1)) for x in list1 if re.search(str(year), x)]

#### PFTs

In [None]:
for filer in glob.glob('/mnt/data1tb/rasters/EU/pfts/*.tif'):
        # get out names for variable of interest
        varname = os.path.basename(filer).split('.')[0]
        # print variable name
        print(varname)
        # open raster with rasterio
        tmpr = rst.open(filer)
        # convert into array
        tmpar = tmpr.read(1)
        # ---- zonal statistics (sum of pixels) 
        stats = zonal_stats(randp_wind, tmpar, affine=tmpr.transform, stats=['sum'], all_touched=True,nodata=tmpr.nodata)
        # convert dictionary into a list
        stats = [val for dic in stats for val in dic.values()]
        # store into geopandas dataframe as new column
        randp_wind[varname] = stats
        # fill NAs with 0s
        randp_wind[varname].fillna(0, inplace=True)
        # ---- zonal statistics (count of pixels)
        totpixels = zonal_stats(randp, tmpar, affine=tmpr.transform, stats=['count'], all_touched=True,nodata=tmpr.nodata)
        # convert dictionary into a list
        pixelcount = [val for dic in totpixels for val in dic.values()]
        # store into geopandas dataframe as new column
        varname1 = varname + '_pixels'
        randp_wind[varname1] = pixelcount
        # fill NAs with 0s
        randp_wind[varname1].fillna(0, inplace=True)

#### Continuous variables

In [None]:
# Loop through raster files
for filer in glob.glob('/mnt/data1tb/rasters/EU/static1/*.tif'):
    # get out names for variable of interest
    varname = os.path.basename(filer).split('.')[0]
    # print variable name
    print(varname)
    # open raster with rasterio
    tmpr = rst.open(filer)
    # convert into array
    tmpar = tmpr.read(1)
    # ----  zonal statistics (mean)
    stats = zonal_stats(randp, tmpar, affine=tmpr.transform, stats=['mean'], all_touched=True, nodata=tmpr.nodata)
    # convert dictionary into list
    stats1 = [val for dic in stats for val in dic.values()]
    # store into  geopandas dataframe as a new column
    randp_wind[varname] = stats1
    # fill NAs with 0s
    randp_wind[varname].fillna(0, inplace=True)

#### EFI trees

In [None]:
for filer in glob.glob('/mnt/data1tb/rasters/EU/EFItrees/*.tif'):
        # open raster with rasterio
        tmpr = rst.open(filer)
        # convert into array
        tmpar = tmpr.read(1)
        # get out names for variable of interest
        varname = os.path.basename(filer).split('.')[0]
        print(varname)
        # zonal statistics (mean)
        stats = zonal_stats(randp, tmpar, affine=tmpr.transform, stats=['sum'], all_touched=True, nodata=tmpr.nodata)
        # convert dictionaries into lists
        stats1 = [val for dic in stats for val in dic.values()]
        # store in pandas dataframe
        randp_wind[varname] = stats1

In [None]:
# write out results for static variables
#randp_wind1 = randp.drop(['geometry'], axis=1)
#randp_wind1.to_csv('/home/marco/Desktop/testbuffers/extractions/WIND_P1.csv',index=False)

#### Event scale variables

In [None]:
pool = multiprocessing.Pool(6)
res = list(pool.map(event_scale, files))
res1 = [x for x in res if x]
pool.close()
for listn in range(len(res1)):
    # variable name
    varname = list(res1[listn].keys())[0]
    # extract values
    tmp = list(res1[listn].values())[0]
    randp[varname] = tmp

In [None]:
randp1 = randp.drop('geometry',axis = 1)
randp1.to_csv('/home/marco/Desktop/testbuffers/extractions/WIND_dynamic.csv',index=False)

### INSECT data

In [None]:
# read in file containing random polygons
randp = gpd.read_file('/home/marco/Desktop/testbuffers/buffers/random_buffers_insects.shp')

#### PFTs

In [None]:
for filer in glob.glob('/mnt/data1tb/rasters/NA/pfts/*.tif'):
        # get out names for variable of interest
        varname = os.path.basename(filer).split('.')[0]
        # print variable name
        print(varname)
        # open raster with rasterio
        tmpr = rst.open(filer)
        # convert into array
        tmpar = tmpr.read(1)
        # ---- zonal statistics (sum of pixels) 
        stats = zonal_stats(randp_insects, tmpar, affine=tmpr.transform, stats=['sum'], all_touched=True,nodata=tmpr.nodata)
        # convert dictionary into a list
        stats = [val for dic in stats for val in dic.values()]
        # store into geopandas dataframe as new column
        randp_insects[varname] = stats
        # fill NAs with 0s
        randp_insects[varname].fillna(0, inplace=True)
        # ---- zonal statistics (count of pixels)
        totpixels = zonal_stats(randp_insects, tmpar, affine=tmpr.transform, stats=['count'], all_touched=True,nodata=tmpr.nodata)
        # convert dictionary into a list
        pixelcount = [val for dic in totpixels for val in dic.values()]
        # store into geopandas dataframe as new column
        varname1 = varname + '_pixels'
        randp_insects[varname1] = pixelcount
        # fill NAs with 0s
        randp_insects[varname1].fillna(0, inplace=True)

#### Continuous variables

In [None]:
# Loop through raster files
for filer in glob.glob('/mnt/data1tb/rasters/NA/static/*.tif'):
    # get out names for variable of interest
    varname = os.path.basename(filer).split('.')[0]
    # print variable name
    print(varname)
    # open raster with rasterio
    tmpr = rst.open(filer)
    # convert into array
    tmpar = tmpr.read(1)
    # ----  zonal statistics (mean)
    stats = zonal_stats(randp_insects, tmpar, affine=tmpr.transform, stats=['mean'], all_touched=True, nodata=tmpr.nodata)
    # convert dictionary into list
    stats1 = [val for dic in stats for val in dic.values()]
    # store into  geopandas dataframe as a new column
    randp_insects[varname] = stats1
    # fill NAs with 0s
    randp_insects[varname].fillna(0, inplace=True)

In [None]:
# write out results for static variables
randp_insects1 = randp_insects.drop(['geometry'], axis=1)
randp_insects1.to_csv('/home/marco/Desktop/testbuffers/extractions/INSECTS_P1.csv',index=False)

In [None]:
# raster data data from 1999 until 2017
list1 = glob.glob('/mnt/data1tb/rasters/NA/dynamic/*.tif')
files = [x for year in list(range(1995, 2017+1)) for x in list1 if re.search(str(year), x)]

In [None]:
files = files[0:6]

In [None]:
pool = multiprocessing.Pool(6)
res = list(pool.map(event_scale, files))

In [None]:
pool = multiprocessing.Pool(6)
res = list(pool.map(event_scale, files))
res1 = [x for x in res if x]
pool.close()
for listn in range(len(res1)):
    # variable name
    varname = list(res1[listn].keys())[0]
    # extract values
    tmp = list(res1[listn].values())[0]
    randp[varname] = tmp

### Extra variables

In [None]:
files = []
files.extend(glob.glob('/mnt/data1tb/rasters/NA/extra/*.tif'))
files.extend([fn for fn in glob.glob('/mnt/data1tb/rasters/NA/dynamic/*.tif') if 'tmean_2017' in fn])

In [None]:
pool = multiprocessing.Pool(7)
res = list(pool.map(event_scale, files))
res1 = [x for x in res if x]
pool.close()
for listn in range(len(res1)):
    # variable name
    varname = list(res1[listn].keys())[0]
    # extract values
    tmp = list(res1[listn].values())[0]
    randp[varname] = tmp

In [None]:
randp1 = randp.drop('geometry',axis = 1)

In [None]:
randp1.to_csv('/home/marco/Desktop/testbuffers/extractions/insect_dynamicv1.csv',index = False)