# **Subset geospatial data and calculate landscape metrics**

By Bridget Bittmann

Date created: March 14, 2022

Date editted: March 28, 2022

In [None]:
## IMPORT PACKAGES ##

import numpy as np #basic computation
!pip install geopandas
import geopandas as gpd #geopandas for .shp
import pandas as pd #to create dataframes and export .csv
!pip install rasterio
import rasterio as rso #import GeoTiff files
from rasterio.mask import mask #to crop data to a boundary
from rasterio.plot import show #to plot the image
from rasterio.crs import CRS
from shapely.ops import unary_union #creates boundary of shapefile
import json #imports metadata
!pip install rioxarray #to clip rasters to a .shp file
import rioxarray as rxr
from rasterio.warp import calculate_default_transform, reproject, Resampling
!pip install pylandstats
import pylandstats #to perform landscape metrics
from pylandstats import landscape
from pylandstats import SpatioTemporalAnalysis #to calculate landscape metrics through time
import glob
import os

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
## NAVIGATE TO YOUR DIRECTORY ##
%cd gdrive/MyDrive/
%ls

/content/gdrive/MyDrive
[0m[01;34m'BSU Year 1'[0m/                                [01;34m'Grad Logistics'[0m/
 [01;34mColab[0m/                                      [01;34m'Hydro Summer Meet up'[0m/
[01;34m'Colab Notebooks'[0m/                           'PODS for Bridgett.kmz'
 [01;34mDrainage_analysis[0m/                           [01;34mspatial_colab[0m/
 GMT20220318-210220_Recording_1920x1080.mp4  [01;34m'Thesis Papers'[0m/


In [None]:
## ---------------------- ##
## SUBSET GEOSPATIAL DATA ##
## ---------------------- ##

shp_file = gpd.read_file('Drainage_analysis/datasets/drain_delineation/Drains_Merge_07072022.shp') #open shapefile
names = shp_file['Name']
print(names)
files = glob.glob('spatial_colab/datasets/lcmap_files/*.tiff') #get all the years of cdl imagery
data =[]
for i in range(len(files)):
  data.append(rso.open(files[i])) #open cdl image and append to a list
shp = shp_file.to_crs(data[1].crs) #reproject the shp file to same projection
years = np.arange(1987, 2021) #years of LCMAP data
collection = []
for i in range(len(shp)):
  for n in range(len(years)):
    dataset = data[n]
    year_out = dataset.name[44:48]
    extent = gpd.GeoSeries(shp['geometry'][i]) #get the geometry from shapefile
    coords = [json.loads(extent.to_json())['features'][0]['geometry']] #gets coordinates for rasterio input
    out_img, out_transform = mask(dataset=data[n], shapes=coords, crop=True, nodata=0) #crop the data to the shapefile
    out_meta = data[n].meta.copy()
    out_meta.update({"driver": "GTiff",
                     "height": out_img.shape[1],
                     "width": out_img.shape[2],
                     "transform": out_transform})
    # Merge original file name with init_landcover to denote that it is the initial land cover data for Janus
    in_file = files[n]
    out_filename = os.path.join('Drainage_analysis/datasets/land_use/'+names[i]+'_'+year_out+'.tif') #create a file name to export to
    print(out_filename)
    # Save clipped land cover coverage THIS WILL OVERWRITE FILES
    out_tiff = rso.open(out_filename, 'w', **out_meta)
    out_tiff.write(np.squeeze(out_img, 0), 1)
    out_tiff.close()
    collection.append(out_img)

0                West Hartley
1              Sand Run Gulch
2                 Eagle Drain
3                 Mason Drain
4                 Mason Creek
5          East Hartley Drain
6                Willow Creek
7                Conway Gulch
8                ThurmanDrain
9             North Middleton
10            South Middleton
11               Indian Creek
12    Drainage District No. 3
13         Fifteen Mile Creek
14                Dixie drain
Name: Name, dtype: object
Drainage_analysis/datasets/land_use/West Hartley_1987.tif
Drainage_analysis/datasets/land_use/West Hartley_1986.tif
Drainage_analysis/datasets/land_use/West Hartley_1989.tif
Drainage_analysis/datasets/land_use/West Hartley_1988.tif
Drainage_analysis/datasets/land_use/West Hartley_1990.tif
Drainage_analysis/datasets/land_use/West Hartley_1992.tif
Drainage_analysis/datasets/land_use/West Hartley_1991.tif
Drainage_analysis/datasets/land_use/West Hartley_1993.tif
Drainage_analysis/datasets/land_use/West Hartley_1994.tif
Dr

In [None]:
## ---------------------------------------- ##
## Import multiple rasters into PyLandStats ##
## ---------------------------------------- ##
years = np.arange(1987,2021)
temporal_group = []
for i in names:
  files= sorted(glob.glob('Drainage_analysis/datasets/land_use/'+i+'_*.tif')) #name for all the csv files
  sta = SpatioTemporalAnalysis(files, dates=years, nodata=0) #import all CDL rasters and mask
  temporal_group.append(sta)

In [None]:
# ------------------------------- #
# CALCULATE THE CLASS PROPORTIONS #
# ------------------------------- #

proportions = []

for i in range(len(names)):
  df = SpatioTemporalAnalysis.compute_class_metrics_df(temporal_group[i], metrics=['proportion_of_landscape'])
  df.to_csv('Drainage_analysis/datasets/land_use/proportions/'+names[i]+'_prop.csv')
  proportions.append(df)

In [None]:
# ------------------------------- #
# CALCULATE CONFIGURATION METRICS #
# ------------------------------- #

config = []

for i in range(len(names)):
  df = SpatioTemporalAnalysis.compute_landscape_metrics_df(temporal_group[i], metrics = ['contagion', 'largest_patch_index'])
  df.to_csv('Drainage_analysis/datasets/land_use/configuration/'+names[i]+'_configuration.csv')
  config.append(df)

In [None]:
# ----------------------------------------------------------------- #
# Put class proportions in the same format as configuration metrics #
# ----------------------------------------------------------------- #

# Import csv files into a list of dataframes

files = sorted(glob.glob('Drainage_analysis/datasets/land_use/proportions/*_prop.csv'))
files_config = sorted(glob.glob('Drainage_analysis/datasets/land_use/configuration/*_configuration.csv'))
names = list(sorted(shp_file['Name']))

proportions = []
for i in files:
  data = pd.read_csv(i)
  proportions.append(data)
config = []
for i in files_config:
  data = pd.read_csv(i)
  config.append(data)

#Create new dataframes in same format as configuration metrics

new_df = []
for i in range(len(proportions)):
  df = pd.DataFrame(years, columns=['dates'])
  prop = proportions[i]
  df['DrainName'] = names[i]
  df['class1_urban'] = prop['proportion_of_landscape'][prop['class_val'] == 1]
  df['class2_crops'] = prop['proportion_of_landscape'][prop['class_val'] == 2].values
  df = df.fillna(0)
  new_df.append(df)


In [None]:
## ------------------------------------------ ##
## CALCULATE CHANGE IN URBAN AREA FOR MAPPING ##
## ------------------------------------------ ##

prop = pd.concat(new_df)

change = prop.groupby('DrainName', as_index=False).class1_urban.agg(['min','max']).reset_index().fillna(0)
change['urb_change'] = change['max']-change['min']
change.to_csv('Drainage_analysis/datasets/land_use/proportions/change.csv')

In [None]:
## -------------------- ##
## MERGE TWO DATAFRAMES ##
## -------------------- ##

merged = []

for i in range(len(new_df)):
  df = new_df[i]
  con = config[i]
  df_merge = df.merge(con, on='dates', how='left')
  display(df_merge)
  df_merge.to_csv('Drainage_analysis/datasets/output_files/land_cover/'+names[i]+'.csv')
  merged.append(df_merge)