In [1]:
import fiona
import rasterio
import rasterio.mask
from rasterio.plot import show
from rasterio.warp import calculate_default_transform, reproject, Resampling

import geopandas as gpd

from osgeo import gdal
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Burn severity data:
- https://data.fs.usda.gov/geodata/rastergateway/ravg/index.php



Code modified from:

- https://here.isnew.info/how-to-calculate-the-area-of-a-certain-class-in-a-geotiff-file-using-numpy.html
- https://thinkinfi.com/clip-raster-with-a-shape-file-in-python/


In [4]:
def make_projections_the_same(shapefile, rasterfile, year):
    # Read shape file using gpd
    shape_file = gpd.read_file(shapefile)

    # Read imagery file 
    raster_file = rasterio.open(rasterfile)
    
    # Check coordinate reference system (CRS) of both datasets
    print('Raster file Projection: ', raster_file.crs)
    
    # Transform projection of shapefile to the raster file's coordinate system
    # Specify output projection system
    dst_crs = 'ESRI:102039'
    
    shape_file = shape_file.to_crs(dst_crs)
    shape_file.to_file('output/reprojected_shape/reprojected_sierra.shp')

In [5]:
make_projections_the_same('input/sierra-nevada-raw.shp', 'input/cbi4/ravg_2021_cbi4.tif', 2021)

Raster file Projection:  ESRI:102039


  pd.Int64Index,


In [6]:
def clip_raster_with_shapefile(shapefile, rasterfile, year, region, cat):
    # Read Shape file
    with fiona.open(shapefile, "r") as shapefile:
        shapes = [feature["geometry"] for feature in shapefile]

    # read raster file
    with rasterio.open(rasterfile) as src:
        out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
        out_meta = src.meta

    # Save clipped raster
    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})

    with rasterio.open("output/clipped_tif/"+str(year)+"_"+region+"_"+cat+"_clipped.tif", "w", **out_meta) as dest:
        dest.write(out_image)

In [7]:
def read_clipped_tiff(file):
    df = gdal.Open(file)
    # Get data from raster with classifications
    band = df.GetRasterBand(1)
    arr = band.ReadAsArray()
    return arr, df

In [8]:
def extract_one_class(df, arr, classname, sel_area_name, year):
    sel_arr = np.where(arr == classname, 1, 0)
    # calculate the number of cells in the selected class
    sel_arr_ncells = np.sum(arr == classname)
    # based on this tutorial: https://gdal.org/tutorials/geotransforms_tut.html
    # extract the resolution of the data set
    dx = df.GetGeoTransform()[1]
    dy = -df.GetGeoTransform()[5]
    # The area of a cell can be calculated by multiplying the x- and y-resolution
    area_size = sel_arr_ncells * dx * dy
#     print("Total area of the fire burnt at the " + sel_area_name + " in " + str(year) + " was", area_size)
    return sel_arr, area_size

In [9]:
def write_geotiff(file, arr, input_df):
    if arr.dtype == np.float32:
        arr_type = gdal.GDT_Float32
    else:
        arr_type = gdal.GDT_Int32
    
    driver = gdal.GetDriverByName("GTiff")
    output_df = driver.Create(file, arr.shape[1], arr.shape[0], 1, arr_type)
    output_df.SetProjection(input_df.GetProjection())
    output_df.SetGeoTransform(input_df.GetGeoTransform())
    band = output_df.GetRasterBand(1)
    band.WriteArray(arr)
    band.FlushCache()
    band.ComputeStatistics(False)

In [10]:
def create_df_to_compare_years(start_year, end_year, clipped_shapefile_path, region, cat, cat_dict):
    final = []
    for i in range((end_year + 1 - start_year)):
        year = start_year+i
        clip_raster_with_shapefile(clipped_shapefile_path, 'input/'+cat+'/ravg_'+str(year)+'_'+cat+'.tif', year, region, cat)
        arr, df = read_clipped_tiff("output/clipped_tif/"+str(year)+"_"+ region + "_" + cat +"_clipped.tif")
        
        for n in range(len(cat_dict)):
            sel_index = n+1
            sel_area_name = cat_dict[sel_index]
            sel_arr, sel_area_size = extract_one_class(df, arr, sel_index, sel_area_name, year)
            final.append({
                'year': year,
                'ind': sel_index,
                'ind_name': sel_area_name,
                'sqm': sel_area_size
            })
            write_geotiff("output/tif_file_by_index/"+cat+"_"+str(year)+"_"+region+"_clipped_"+sel_area_name+".tif", sel_arr, df)
            print(str(year) + " " + sel_area_name + ' finished!')
    return pd.DataFrame(final)

In [11]:
burn_index_dict = {
    
    1: 'unchanged',
    2: 'low',
    3: 'medium',
    4: 'high'
    
}

In [12]:
df_cbi4_sierra = create_df_to_compare_years(2012, 2021, 'output/reprojected_shape/reprojected_sierra.shp', 'sierra', 'cbi4', burn_index_dict)

2012 unchanged finished!
2012 low finished!
2012 medium finished!
2012 high finished!
2013 unchanged finished!
2013 low finished!
2013 medium finished!
2013 high finished!
2014 unchanged finished!
2014 low finished!
2014 medium finished!
2014 high finished!
2015 unchanged finished!
2015 low finished!
2015 medium finished!
2015 high finished!
2016 unchanged finished!
2016 low finished!
2016 medium finished!
2016 high finished!
2017 unchanged finished!
2017 low finished!
2017 medium finished!
2017 high finished!
2018 unchanged finished!
2018 low finished!
2018 medium finished!
2018 high finished!
2019 unchanged finished!
2019 low finished!
2019 medium finished!
2019 high finished!
2020 unchanged finished!
2020 low finished!
2020 medium finished!
2020 high finished!
2021 unchanged finished!
2021 low finished!
2021 medium finished!
2021 high finished!


In [13]:
df_cbi4_sierra.to_csv('output/sum_of_sierra_fire_size_by_burn_index.csv')

# Making sure the numbers are correct

## 1. Using CalFire data (x)
I tried comparing our analysis with Calfire's data, which documents [acres burned by year.](https://www.fire.ca.gov/stats-events/)

The numbers don't match bc CBI-4 data has records for only wildland fires reported within the conterminous United States (CONUS) that include at least 1000 acres of forested National Forest System (NFS) land (500 acres for Regions 8 and 9 as of 2016).

## 2. QGIS Grass r.report (v)
I also tried using QGIS to do the same cleaning and analysis with the help of the GRASS plugin.
https://grass.osgeo.org/grass78/manuals/r.report.html

Clean and anlyze the geodata all in QGIS and compare the results. The steps are:
1. Crop the tifs with Sierra Nevada's vector file
2. Use Grass r.report to find out the areas burned by index (in sq. meter)

The final results calculated by the GRASS plugin, stored in the "qgis-grass" folder, are the same as the results produced by my analysis. 

## 3. Areas burned by a single wildfire (~v)

The third way to verify our analysis result is to get the size of a single wildfire using our python script and compare the number with the state [official's data.](https://hub.arcgis.com/maps/CALFIRE-Forestry::california-fire-perimeters/about) 

In the following sections, I calcualte acres burned by two fires - North Complex Fire and Creek Fire - in the Sierra Nevada region in 2021.

In [12]:
# calculate acres burned by the North Complex Fire in 2020
df_cbi4_nc = create_df_to_compare_years(2020, 2020, 'output/reprojected_shape/north_complex_fire.shp', 'nc', 'cbi4', burn_index_dict)
df_cbi4_nc.to_csv('output/sum_of_nc_fire_size_by_burn_index.csv')

2020 unchanged finished!
2020 low finished!
2020 medium finished!
2020 high finished!


In [13]:
# calculate acres burned by the Creek Fire in 2020
df_cbi4_crk = create_df_to_compare_years(2020, 2020, 'output/reprojected_shape/creek_fire.shp', 'crk', 'cbi4', burn_index_dict)
df_cbi4_crk.to_csv('output/sum_of_crk_fire_size_by_burn_index.csv')

2020 unchanged finished!
2020 low finished!
2020 medium finished!
2020 high finished!


In [14]:
nc =  pd.read_csv('output/sum_of_nc_fire_size_by_burn_index.csv', index_col=0)
crk = pd.read_csv('output/sum_of_crk_fire_size_by_burn_index.csv', index_col=0)


# sq. meter to acre
nc['acre'] = nc.sqm.apply(lambda x: x*0.000247105)
crk['acre'] = crk.sqm.apply(lambda x: x*0.000247105)

### Numbers from U.S. Forest Service
- North Complex fire acre burned: [318,935](https://inciweb.nwcg.gov/incident/6997/) 
- Creek fire acre burned: [379,895](https://inciweb.nwcg.gov/incident/7147)

### Numbers calculated by our script

In [15]:
# North Complex fire size
nc.acre.sum()

316604.14611750003

In [16]:
# Creek fire size
crk.acre.sum()

364200.5722185

In [17]:
(318935-316604)/316604

0.007362509633485364

In [18]:
(379895-364200)/364200

0.043094453596924764

The difference could be due to areas with an index of 0 or 9, which represent unmappable areas and areas outside the perimeter.

# Basal area change

Seven-category percent change in basal area (BA): 0 = outside perimeter 1 = 0% BA loss 2 = 0% < BA loss < 10% 3 = 10% <= BA loss < 25% 4 = 25% <= BA loss < 50% 5 = 50% <= BA loss < 75% 6 = 75% <= BA loss < 90% 7 = BA loss >= 90% 9 = unmappable 

In [19]:
ba_index_dict = {
    
    1: '0%',
    2: '<10%',
    3: '10~25%',
    4: '25~50%',
    5: '50~75%',
    6: '75~90%',
    7: '>=90%'
}

df_ba_sierra = create_df_to_compare_years(2012, 2021, 'output/reprojected_shape/reprojected_sierra.shp', 'sierra', 'ba7', ba_index_dict)

2012 0% finished!
2012 <10% finished!
2012 10~25% finished!
2012 25~50% finished!
2012 50~75% finished!
2012 75~90% finished!
2012 >=90% finished!
2013 0% finished!
2013 <10% finished!
2013 10~25% finished!
2013 25~50% finished!
2013 50~75% finished!
2013 75~90% finished!
2013 >=90% finished!
2014 0% finished!
2014 <10% finished!
2014 10~25% finished!
2014 25~50% finished!
2014 50~75% finished!
2014 75~90% finished!
2014 >=90% finished!
2015 0% finished!
2015 <10% finished!
2015 10~25% finished!
2015 25~50% finished!
2015 50~75% finished!
2015 75~90% finished!
2015 >=90% finished!
2016 0% finished!
2016 <10% finished!
2016 10~25% finished!
2016 25~50% finished!
2016 50~75% finished!
2016 75~90% finished!
2016 >=90% finished!
2017 0% finished!
2017 <10% finished!
2017 10~25% finished!
2017 25~50% finished!
2017 50~75% finished!
2017 75~90% finished!
2017 >=90% finished!
2018 0% finished!
2018 <10% finished!
2018 10~25% finished!
2018 25~50% finished!
2018 50~75% finished!
2018 75~90% fi

In [20]:
df_ba_sierra.to_csv("output/sum_of_sierra_area_by_ba_index.csv")

# Canopy coverage change
Five-category percent change in canopy cover (CC): 0 = outside perimeter 1 = 0% CC loss 2 = 0% < CC loss < 25% 3 = 25% <= CC loss < 50% 4 = 50% <= CC loss < 75% 5 = CC loss >= 75% 9 = unmappable

In [21]:
cc_index_dict = {
    
    1: '0%',
    2: '<25%',
    3: '25~50%',
    4: '50~75%',
    5: '>=75%'
}

df_cc_sierra = create_df_to_compare_years(2012, 2021, 'output/reprojected_shape/reprojected_sierra.shp', 'sierra', 'cc5', cc_index_dict)

2012 0% finished!
2012 <25% finished!
2012 25~50% finished!
2012 50~75% finished!
2012 >=75% finished!
2013 0% finished!
2013 <25% finished!
2013 25~50% finished!
2013 50~75% finished!
2013 >=75% finished!
2014 0% finished!
2014 <25% finished!
2014 25~50% finished!
2014 50~75% finished!
2014 >=75% finished!
2015 0% finished!
2015 <25% finished!
2015 25~50% finished!
2015 50~75% finished!
2015 >=75% finished!
2016 0% finished!
2016 <25% finished!
2016 25~50% finished!
2016 50~75% finished!
2016 >=75% finished!
2017 0% finished!
2017 <25% finished!
2017 25~50% finished!
2017 50~75% finished!
2017 >=75% finished!
2018 0% finished!
2018 <25% finished!
2018 25~50% finished!
2018 50~75% finished!
2018 >=75% finished!
2019 0% finished!
2019 <25% finished!
2019 25~50% finished!
2019 50~75% finished!
2019 >=75% finished!
2020 0% finished!
2020 <25% finished!
2020 25~50% finished!
2020 50~75% finished!
2020 >=75% finished!
2021 0% finished!
2021 <25% finished!
2021 25~50% finished!
2021 50~75% f

In [22]:
df_cc_sierra.to_csv("output/sum_of_sierra_area_by_cc_index.csv")