# **Subset the LULCC data to shapefile**

By Bridget Bittmann

Date created: March 14, 2022

In [47]:
## IMPORT PACKAGES ##

import numpy as np #basic computation
!pip install geopandas
import geopandas as gpd #geopandas for .shp
import matplotlib.pyplot as plt #to create plots
import pandas as pd #to create dataframes and export .csv
!pip install rasterio
import rasterio as rso #import GeoTiff files
from rasterio.mask import mask #to crop data to a boundary
from rasterio.plot import show #to plot the image
from rasterio.crs import CRS
from shapely.ops import unary_union #creates boundary of shapefile
import json #imports metadata
!pip install rioxarray #to clip rasters to a .shp file
import rioxarray as rxr
from rasterio.warp import calculate_default_transform, reproject, Resampling
!pip install pylandstats
import pylandstats #to perform landscape metrics
from pylandstats import landscape
from pylandstats import SpatioTemporalAnalysis #to calculate landscape metrics through time
import glob
import os



In [2]:
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
## NAVIGATE TO YOUR DIRECTORY ##
%cd gdrive/MyDrive/spatial_colab/datasets/
%ls

/content/gdrive/MyDrive/spatial_colab/datasets
2005a.tif  2010.tif  2016.tif    2021.tif               [0m[01;34mirrig_lbrb[0m/
2006.tif   2011.tif  2017.tif    [01;34mBoise_CDL[0m/             [01;34mIrrMapper[0m/
2007a.tif  2012.tif  2018.tif    [01;34mCDL_reproject[0m/         [01;34mLBRB_shp[0m/
2007b.tif  2013.tif  2019.tif    [01;34mCDL_subsets[0m/           [01;34mmasked[0m/
2008.tif   2014.tif  2020.tif    [01;34mdiversion_timeseries[0m/  [01;34msubset_test_shp[0m/
2009.tif   2015.tif  [01;34m2021_clip[0m/  [01;34mirrigation_companies[0m/


In [49]:
## CREATE A FUNCTION TO SUBSET GEOSPATIAL DATA ##
## ------------------------------------------- ##

shp_file = gpd.read_file('subset_test_shp/subset.shp') #open shapefile
files = glob.glob('Boise_CDL/CDLs_boisearea/*.tif') #get all the years of cdl imagery
data =[]
for i in range(len(files)):
  data.append(rso.open(files[i])) #open cdl image and append to a list
shp = shp_file.to_crs(data[1].crs) #reproject the shp file to same projection
years = np.arange(2008, 2022) #years of CDL data
collection = []
for i in range(len(shp)):
  for n in range(len(years)):
    extent = gpd.GeoSeries(shp['geometry'][i]) #get the geometry from shapefile
    coords = [json.loads(extent.to_json())['features'][0]['geometry']] #gets coordinates for rasterio input
    out_img, out_transform = mask(dataset=data[n], shapes=coords, crop=True, nodata=0) #crop the data to the shapefile
    out_meta = data[n].meta.copy()
    out_meta.update({"driver": "GTiff",
                     "height": out_img.shape[1],
                     "width": out_img.shape[2],
                     "transform": out_transform})
    # Merge original file name with init_landcover to denote that it is the initial land cover data for Janus
    in_file = files[n]
    out_filename = os.path.join(files[n])
    # Save clipped land cover coverage THIS WILL OVERWRITE FILES
    out_tiff = rso.open(out_filename, 'w', **out_meta)
    out_tiff.write(np.squeeze(out_img, 0), 1)
    out_tiff.close()
    collection.append(out_img)

In [58]:
## Import multiple rasters into PyLandStats ##
## ---------------------------------------- ##

years = np.arange(2008, 2022)
shp_file = gpd.read_file('subset_test_shp/subset.shp') #open shapefile
data = rso.open('Boise_CDL/CDLs_boisearea/RasExt_CDL_2010_30m.tif') #open cdl image
shp_file = shp_file.to_crs(data.crs) #reproject the shp file to same projection
shp_file.crs
files = glob.glob('Boise_CDL/CDLs_boisearea/*.tif') #name for all the csv files
sta = SpatioTemporalAnalysis(files, dates=years, nodata=0) #import all CDL rasters and mask

In [59]:
# CALCULATE THE CLASS PROPORTIONS #
# ------------------------------- #

df = SpatioTemporalAnalysis.compute_class_metrics_df(sta, metrics=['proportion_of_landscape'])

In [60]:
display(df)

Unnamed: 0_level_0,metric,proportion_of_landscape
class_val,dates,Unnamed: 2_level_1
1,2008,1.670015
1,2009,0.992414
1,2010,3.065691
1,2011,4.197523
1,2012,3.305550
...,...,...
247,2017,0.000000
247,2018,0.000000
247,2019,0.000000
247,2020,0.000000
