# Create Individual Deforestation Files From Hansen Dataset

The purpose of this notebook is two fold:
1. Takes the 'lossyear' image from the Hansen et al. (2013) dataset and creates a tiff file deforestation_year for each year desired.
2. 'lossyear' and 'treecover2000' also need to be cropped so that pixels are only within the boundary of the active property of that year. 

# Import the necessary libraries and load the datasets.


In [46]:
import os
from pathlib import Path
import rasterio
from rasterio.merge import merge
from rasterio.mask import mask
from rasterio.transform import from_origin
import geopandas as gpd
import pandas as pd
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
from constants import HANSEN_FILEPATHS, PROPERTIES_SHAPEFILE_PATHS, PROJECT_PATH, SHAPEFILE_PATH

# Extract pixels corresponding to each year (2011-2021).

In [None]:
# Extract pixels corresponding to each year (2011-2021)
# This will return binary but we want to keep the encoded value of the pixelReturns pixels encoded with value of 1 and zeros as NaN.
#if `year_pixels[year_pixels == 0] = np.nan` is removed then will return Unique values for year 2011: [0 1]. 
'''
def extract_pixels_by_year_binary(raster_data, start_year, end_year):
    year_data = {}
    for year in range(start_year, end_year + 1):
        year_pixels = (raster_data == year).astype(int) 
        year_data[year] = year_pixels

        # Print unique values for each year
        unique_values = np.unique(year_pixels)
        print(f"Unique values for year {year + 2000}: {unique_values}") # Add 2000 to the year to get the correct year values
    return year_data

pixels_by_year = extract_pixels_by_year(study_boundary_masked, 11, 21)
'''




Returns pixels encoded with value of corresponding year(11,12,13...) and zeros as NaN.
if `year_pixels[year_pixels == 0] = np.nan` is removed then will return [0 11]. 

In [30]:
def extract_pixels_by_year(raster_data, start_year, end_year):
    year_data = {}
    for year in range(start_year, end_year + 1):
        year_pixels = (raster_data == year).astype(int) * year
        year_data[year] = year_pixels

        # Print unique values for each year
        unique_values = np.unique(year_pixels)
        print(f"Unique values for year {year + 2000}: {unique_values}") # Add 2000 to the year to get the correct year values
             
    return year_data

pixels_by_year = extract_pixels_by_year(hansen_array, 11, 21)

Unique values for year 2011: [ 0 11]
Unique values for year 2012: [ 0 12]
Unique values for year 2013: [ 0 13]
Unique values for year 2014: [ 0 14]
Unique values for year 2015: [ 0 15]
Unique values for year 2016: [ 0 16]
Unique values for year 2017: [ 0 17]
Unique values for year 2018: [ 0 18]
Unique values for year 2019: [ 0 19]
Unique values for year 2020: [ 0 20]
Unique values for year 2021: [ 0 21]


# Write raster files for each year 

In [33]:
# Write raster files for each year
def write_year_rasters(year_data, out_transform, out_meta, output_dir):
    for year, data in year_data.items():
        out_filepath = os.path.join(output_dir, f'deforestation_{year}.tif')
        with rasterio.open(out_filepath, 'w', **out_meta) as dst:
            dst.write(data, 1)
            dst.transform = out_transform

output_dir = os.path.join(PROJECT_PATH, 'output', 'deforestation_by_year')
os.makedirs(output_dir, exist_ok=True)

write_year_rasters(pixels_by_year, out_transform, out_meta, output_dir)


In [None]:
# Read one of the TIF files and print its unique values
with rasterio.open("/Users/romero61/github/PYFOREST-ML/output/deforestation_by_year/deforestation_11.tif") as src:
    data = src.read(1)
print("Unique values in uncropped TIF:", np.unique(data))

# Crop Hansen Dataset within Property

In [48]:
shapefile_2011_path = PROPERTIES_SHAPEFILE_PATHS[0]  # The first file in the list should be for 2011
properties_2011 = gpd.read_file(shapefile_2011_path)
# Check the CRS
crs = properties_2011.crs
print("CRS:", crs)

# Reproject the shapefile to EPSG:4326 if necessary
if crs != "epsg:4326":
    properties_2011 = properties_2011.to_crs("epsg:4326")



CRS: PROJCS["WGS 84 / UTM zone 21S",GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-57],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32721"]]


In [42]:
 # Define the deforestation_11 and treecover2000 file paths
deforestation_11_filepath = os.path.join('/Users/romero61/../../capstone/pyforest/ml_data/hansen/deforestation_by_year/deforestation_11.tif')
treecover2000_filepath = '/Users/romero61/../../capstone/pyforest/ml_data/hansen/clipped_treecover2000.tif'

In [44]:
# Crop and mask the deforestation_11 and treecover2000 files
def crop_and_mask(input_raster_path, output_raster_path, shapefile):
    with rasterio.open(input_raster_path) as src:
        out_image, out_transform = rasterio.mask.mask(src, shapefile.geometry, crop=True)
        out_meta = src.meta

    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})

    with rasterio.open(output_raster_path, "w", **out_meta) as dest:
        dest.write(out_image)


In [49]:


# Create output directory for cropped rasters
cropped_output_dir = Path(output_dir) / 'cropped'
os.makedirs(cropped_output_dir, exist_ok=True)

# Crop and mask deforestation_11 file
crop_and_mask(deforestation_11_filepath, cropped_output_dir / 'cropped_deforestation_11.tif', properties_2011)

# Crop and mask treecover2000 file
crop_and_mask(treecover2000_filepath, cropped_output_dir / 'cropped_treecover2000.tif', properties_2011)

In [None]:
plt.imshow(pixels_value_11, cmap='viridis')
plt.colorbar()
plt.show()