<a href="https://colab.research.google.com/github/duncansnh/burn-mapping/blob/master/CUU_burn_extent_pixel_box_plots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Script to extract pixel values from a raster relating to a set of training polygons and then generate box plots
Inputs are

1)  indices generated from sentinel 2 imagery (generated from previous script) 

2)	polygon dataset of training samples . Needs Poly_ID_ field containing polygon ID (short integer) and 'Class_labe' field containing the class label.

Main steps:
•	extracts pixel values for polygons
• generates box plots using seaborn package

Duncan Blake NatureScot 11 September 2020

In [None]:
#This is only required if running in colab notebook to install the libraries
#If running Python code elsewhere need to make sure below libraries are installed
! pip install geopandas
! pip install descartes
! pip install rasterio
! pip install rasterstats

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import geopandas as gpd
import descartes
import rasterio
from rasterio.mask import mask
from rasterio.features import geometry_mask
from shapely.geometry import mapping
from rasterstats import zonal_stats
import seaborn as sns

In [None]:
#Only if running in Google Colab, in which case input image, training polygons and output results need to be in Google Drive.
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Set working drive, file paths and dataset name parameters


In [None]:

wd = '/content/drive/My Drive'
image_dir = os.path.join(wd, 'Imagery')
training_dir= os.path.join(wd, 'Training_Data')
plots_dir = os.path.join(wd, 'Plots')
# parameters for Skye
# image = 'CUU_burn_skye_indices.tif'
# training_polys = 'Box_plot_polygons_Skye.shp'
# filename_diff = ('Skye_difference_indices_boxplot.png')
# filename = ('Skye_postburn_indices_boxplot.png')

# parameters for east cairngorms
image = 'CUU_burn_cairngorms_indices.tif'
training_polys = 'Box_plot_polygons_Cairngorms.shp'
filename_diff = ('East_cairngorms_difference_indices_boxplot.png')
filename = ('East_cairngorms_postburn_indices_boxplot.png')

###Open input image and polygon dataset

In [None]:
#Read image
s2 = rasterio.open(os.path.join(image_dir,image))
#Print number of bands
B = s2.count
print(B)
# read polygons
TaggedPolys= gpd.read_file(os.path.join(training_dir, training_polys))
print(TaggedPolys.count)
# show column headings
for col in TaggedPolys.columns:
  print(col)

##Extract all data for all pixels where the centroid falls within polygons, add polygon id and text class label to final 2 columns






In [None]:

def getPixels(image, poly, indexInput, polygons,  target):
    global B
    shape=[mapping(poly)] # create list of coordinates in gjson format from gpandas?
    #print((shape))

    #reduce imagery to pixels overlapping polygon, noData values outside the polygon
    outImage, out_transform = mask(image, shape, crop=True, nodata=np.nan)

    # reshape output array to rows equal to number of bands, and number of columns to match input (-1) .T switches rows and columns
    outList = outImage.reshape((B, -1)).T 
    # get current polygon ID
    currentPolyID = polygons.loc[indexInput,"Poly_ID_"]
    # creates 1D array of polyID, size equal to number of pixels (shape returns rows, columns)
    currentPolyIDarr= np.repeat(currentPolyID, outList.shape[0])
    # creates 2D array, 1column
    currentPolyIDarr= currentPolyIDarr.reshape((outList.shape[0],1))
    currentCategory =polygons.loc[indexInput,"Class_labe"]
    currentCategoryarr= np.repeat(currentCategory, outList.shape[0])
    # create 2D array of current class / categor
    currentCategoryarr= currentCategoryarr.reshape((outList.shape[0],1))
    # convert to dataframe to cope with string attribute
    currentCategorydf = pd.DataFrame(currentCategoryarr)
    currentCategorydf = currentCategorydf.astype(str)
    #print(currentCategorydf.shape)

    # add poly ID to pixel values
    outList = np.concatenate((outList,currentPolyIDarr), axis = 1)
    # convert to pandas dataframe
    outList = pd.DataFrame(outList)
    # add class to pixel values by appending two dataframes together
    outList = np.append(outList,currentCategorydf, axis=1) 
    # drop noData values from the df
    outList = pd.DataFrame(outList).dropna()
    #print(outList.shape)
    #print(outList.head())
    #print(("Number of pixels = " + str(outList.shape[0])))
    return np.append(target, outList, axis=0)


def extractAllPolygons(image, featuresgeom, features):
    global B 
    # number of columns in extracted pixel dataset
    finalcolno = B+2 
    # create empty dataset with number of colums set and datatype set to float
    flatten = np.array([]).reshape(0,finalcolno).astype(float)
    # iterate through each polygon
    for index, f in enumerate(featuresgeom): 
      indexInput= index
      flatten = getPixels(image,f,indexInput, features, flatten)
    flattenArr = np.ma.masked_array(flatten, mask=(flatten == np.nan))
    return pd.DataFrame(flattenArr).dropna()

# run both of above functions, input is imagery, geometry part of pandas arraym, gp dataframe
totValues = extractAllPolygons(s2,TaggedPolys.geometry.values, TaggedPolys)
print(totValues.head())

## Generate box plots

In [None]:
# set the 23 column names
totValues.columns = ['prenbr','postnbr','dnbr','prenbr2','postnbr2','dnbr2','prendvi','postndvi','dndvi','prenmirbi','postnmirbi','dnmirbi',
                     'precsi','postcsi','dcsi','prebais2','postbais2','dbais2','presave','postsavi','dsavi','Poly_ID_','Class']


### Run loop to create box plots for difference indices which should be run with the change classes only

In [None]:
# Subset to contain difference indices only
df_differences = totValues[['dnbr','dnbr2','dndvi','dnmirbi','dcsi','dbais2','dsavi','Class']]
print(df_differences.head())

# next select only the change classes we want to compare
dClassList = ["Burns", "Field to bare field", "Trees to no trees", "Cloud to no cloud","No cloud to cloud","Shadow to no shadow",
              "No shadow to shadow", "Snow to no snow"]

# define class list as the order we want to display the classes in the boxplots.  This will include any categories with no data though.
orderList = dClassList

df_differences_classes = df_differences[df_differences.Class.isin(dClassList)]
print(df_differences_classes.head())

# and select the list of difference indices we want to create box plots for
dList = ['dnbr','dnbr2','dndvi','dnmirbi','dcsi','dbais2','dsavi']

fig, axs = plt.subplots(4,2, figsize=(12,15))

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[0] , data = df_differences_classes, ax=axs[0][0], order=orderList, showfliers=False).set_title(dList[0])
_ = axs[0][0].get_xaxis().set_visible(False) 
_ = plt.ylabel(dList[0] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[1] , data = df_differences_classes, ax=axs[0][1], order=orderList, showfliers=False).set_title(dList[1])
_ = axs[0][1].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[1] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[2] , data = df_differences_classes, ax=axs[1][0], order=orderList, showfliers=False).set_title(dList[2])
_ = axs[1][0].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[2] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[3] , data = df_differences_classes, ax=axs[1][1], order=orderList, showfliers=False).set_title(dList[3])
_ = axs[1][1].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[3] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[4] , data = df_differences_classes, ax=axs[2][0], order=orderList,  showfliers=False).set_title(dList[4])
_ = axs[2][0].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[4] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[5] , data = df_differences_classes, ax=axs[2][1], order=orderList, showfliers=False).set_title(dList[5])
_ = axs[2][1].set_xticklabels(axs[2][1].get_xticklabels(), rotation=45, horizontalalignment='right')
_ = plt.ylabel(dList[5] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[6] , data = df_differences_classes, ax=axs[3][0], order=orderList, showfliers=False).set_title(dList[6])
_ = axs[3][0].set_xticklabels(axs[3][0].get_xticklabels(), rotation=45, horizontalalignment='right')
_ = plt.ylabel(dList[6] + ' values')

_ = axs[3][1].set_visible(False)

# export the figure - needs to happen before displaying - reduce whitespace with the bbox_inches command
plt.savefig(f"{plots_dir}/{filename_diff}", bbox_inches='tight')
# display the figure
plt.show()

### Run loop to create box plots for post burn indices which should be run with the land cover classes only

In [None]:
# Subset to contain post burn indices only
df_differences = totValues[['postnbr','postnbr2','postndvi','postnmirbi','postcsi','postbais2','postsavi','Class']]
print(df_differences.head())

# next select only the change classes we want to compare
dClassList = ["Burns", "Bare field", "Bare peat", "Felled", "Rock", 
              "Shadow", "Vegetation", "Water"]

# define class list as the order we want to display the classes in the boxplots.  This will include any categories with no data though.
orderList = dClassList
df_differences_classes = df_differences[df_differences.Class.isin(dClassList)]
print(df_differences_classes.head())

# and select the list of indices we want to create box plots for
dList = ['postnbr','postnbr2','postndvi','postnmirbi','postcsi','postbais2','postsavi']

fig, axs = plt.subplots(4,2, figsize=(12,15))

# x axis ordered and outliers are removed because some were quite extreme and the plots were hard to interpret
_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[0] , data = df_differences_classes, ax=axs[0][0], order=orderList, showfliers=False).set_title(dList[0])
_ = axs[0][0].get_xaxis().set_visible(False) 
_ = plt.ylabel(dList[0] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[1] , data = df_differences_classes, ax=axs[0][1], order=orderList, showfliers=False).set_title(dList[1])
_ = axs[0][1].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[1] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[2] , data = df_differences_classes, ax=axs[1][0], order=orderList, showfliers=False).set_title(dList[2])
_ = axs[1][0].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[2] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[3] , data = df_differences_classes, ax=axs[1][1], order=orderList, showfliers=False).set_title(dList[3])
_ = axs[1][1].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[3] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[4] , data = df_differences_classes, ax=axs[2][0], order=orderList, showfliers=False).set_title(dList[4])
_ = axs[2][0].get_xaxis().set_visible(False)
_ = plt.ylabel(dList[4] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[5] , data = df_differences_classes, ax=axs[2][1], order=orderList, showfliers=False).set_title(dList[5])
_ = axs[2][1].set_xticklabels(axs[2][1].get_xticklabels(), rotation=45, horizontalalignment='right')
_ = plt.ylabel(dList[5] + ' values')

_ = sns.set_style("whitegrid") 
_ = sns.boxplot(x = 'Class', y = dList[6] , data = df_differences_classes, ax=axs[3][0], order=orderList, showfliers=False).set_title(dList[6])
_ = axs[3][0].set_xticklabels(axs[3][0].get_xticklabels(), rotation=45, horizontalalignment='right')
_ = plt.ylabel(dList[6] + ' values')

_ = axs[3][1].set_visible(False)

# export the figure - needs to happen before displaying - reduce whitespace with the bbox_inches command
plt.savefig(f"{plots_dir}/{filename}", bbox_inches='tight')
# display the figure
plt.show()