In [1]:
import os
import pylandstats as pls
import geopandas as gpd
import pandas as pd
import time
from IPython.display import clear_output
import fiona
import rasterio
import rasterio.mask

wd = os.getcwd()



In [2]:
propVars = ["HMM", "disturbance", "fragstats", "nightlights"]

propVars = tuple([s + ".tif" for s in propVars])

In [3]:
d = 'outputs'
parkFolders = [os.path.join(wd, d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]
parkFolders = parkFolders[0:1]
hmmDfs = []
files = []

for park in parkFolders:
    parkName = park.rsplit("\\", 1)[-1]
    #print(parkName)
    subzone = os.path.join(park, parkName + "_PACE_Subzones.shp")
    ppa_gpe = os.path.join(park, parkName + "_PACE.shp")
    #print(subzone)
    rasterLoc = os.path.join(park, "rasters")
    for file in os.listdir(rasterLoc):
        if file.endswith(propVars):
            files.append(file)


In [4]:
dist = os.path.join(rasterLoc, files[0])
frag = os.path.join(rasterLoc, files[1])
hmm = os.path.join(rasterLoc, files[2])
print(dist, "\n", frag, "\n", hmm)

C:\Users\evanmuis.stu\Sync\Masters\Data\outputs\AdamsLakeMarineParkPoplarPointSite\rasters\AdamsLakeMarineParkPoplarPointSite_PACE-1985-disturbance.tif 
 C:\Users\evanmuis.stu\Sync\Masters\Data\outputs\AdamsLakeMarineParkPoplarPointSite\rasters\AdamsLakeMarineParkPoplarPointSite_PACE-1986-disturbance.tif 
 C:\Users\evanmuis.stu\Sync\Masters\Data\outputs\AdamsLakeMarineParkPoplarPointSite\rasters\AdamsLakeMarineParkPoplarPointSite_PACE-1987-disturbance.tif


# function for working with VLCE and disturbance data to get proportions of total area

In [5]:
def propGen(fName, zones):

    za = pls.ZonalAnalysis(fName, masks = zones, masks_index_col = 'ppa_gpe')
    class_metrics_df = za.compute_class_metrics_df(metrics = ['proportion_of_landscape', 'total_area'], fillna = False)
    class_metrics_df["park"] = parkName
    class_metrics_df["var"] = var
    class_metrics_df["year"] = year
    return class_metrics_df

In [6]:
def dissolve_ppa_gpe(shapefile):
    read_shape = gpd.read_file(shapefile)
    shape_dissolved = read_shape.dissolve(by = "ppa_gpe")
    shape_dissolved.reset_index(level = 0, inplace = True)
    save_name = shapefile[:-4] + "_dissolved" + shapefile[-4:]
    shape_dissolved.to_file(save_name)
    return(save_name)

# function for working with reclassified VLCE data to get fragstats

In [7]:
def fragGen(fName, zones):

    
    #create df with subzone information
    df = gpd.read_file(zones)
    newDf = df[["myFID", "ppa_gpe", "ZONE", "SUBZONE", "VARIANT", "PHASE", "NATURAL_DI"]]

    #run pylandstats, adding park, variable, year, and subzone information
    za = pls.ZonalAnalysis(fName, masks = zones, masks_index_col = 'myFID')
    class_metrics_df = za.compute_class_metrics_df(metrics = ['proportion_of_landscape', "total_area", "number_of_patches", "area_mn"], fillna = False)
    class_metrics_df["park"] = parkName
    class_metrics_df["var"] = var
    class_metrics_df["year"] = year
    class_metrics_df = class_metrics_df.reset_index()
    class_metrics_df = pd.merge(class_metrics_df, newDf)
    return class_metrics_df

# creating csv files

In [43]:
num_done = 0
hmmDfs = []
distDfs = []
fragDfs = []
fragSubzoneDfs = []
distDfSubzones = []
nightDfs = []
broken_hmm = []

In [51]:
num_done += 1

In [53]:
d = 'outputs'
parkFolders = [os.path.join(wd, d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]
#parkFolders = parkFolders




for park in parkFolders[num_done:]:
    j = 1
    rasterLoc = os.path.join(park, "rasters")
    if os.path.isdir(rasterLoc):
        parkName = park.rsplit("\\", 1)[-1]
        #print(parkName)
        subzone = os.path.join(park, parkName + "_PACE_Subzones.shp")
        ppa_gpe = os.path.join(park, parkName + "_PACE.shp")
        ppa_gpe = dissolve_ppa_gpe(ppa_gpe)
        #print(subzone)
    
        tifs = []
        for file in os.listdir(rasterLoc):
            if file.endswith(propVars):
                tifs.append(file)
        #print(tifs)
        for file in tifs:
            clear_output(wait = True)
            print(str(num_done + 1), "/", len(parkFolders), "folders", str(j), "/", len(tifs), "files:", file)

            fileLoc = os.path.join(rasterLoc, file)
            print(fileLoc)
            print(ppa_gpe)
            if file.endswith(propVars[0]): #vlce data
                try:
                    hmmDf = propGen(fileLoc, ppa_gpe)
                    hmmDfs.append(hmmDf)
                except:
                    broken_hmm.append(fileLoc)
                    pass
            if file.endswith(propVars[1]): #disturbance data
                
                distDf = propGen(fileLoc, ppa_gpe)
                distDfs.append(distDf)

                distDfSubzone = fragGen(fileLoc, subzone)
                distDfSubzones.append(distDfSubzone)

            if file.endswith(propVars[2]): #fragstats
                fragDf = propGen(fileLoc, ppa_gpe)
                fragDfs.append(fragDf)
                fragSubzoneDf = fragGen(fileLoc, subzone)
                fragSubzoneDfs.append(fragSubzoneDf)

            if file.endswith(propVars[3]): #nightlights
                nightDf = propGen(fileLoc, ppa_gpe)
                nightDfs.append(nightDf)
            j += 1

    num_done += 1    

pd.concat(hmmDfs).fillna(0).to_csv("outputCsvs/vlce.csv")
pd.concat(distDfs).fillna(0).to_csv("outputCsvs/disturbance.csv")
pd.concat(distDfSubzones).fillna(0).to_csv("outputCsvs/disturbance_subzones.csv")
pd.concat(fragSubzoneDfs).fillna(0).to_csv("outputCsvs/fragSubzones.csv")
pd.concat(nightDfs).fillna(0).to_csv("outputCsvs/nightlights.csv")
pd.concat(fragDfs).fillna(0).to_csv("outputCsvs/fragPpaGpe.csv")
print("done")

979 / 979 folders 37 / 37 files: ZumtelaBayConservancy_PACE-2018-disturbance.tif
C:\Users\evanmuis.stu\Sync\Masters\Data\outputs\ZumtelaBayConservancy\rasters\ZumtelaBayConservancy_PACE-2018-disturbance.tif
C:\Users\evanmuis.stu\Sync\Masters\Data\outputs\ZumtelaBayConservancy\ZumtelaBayConservancy_PACE_dissolved.shp
done


# broke
elephanthill - marti fixing
RM - size issues
Spats - size issues
tweeds - size issues

In [13]:
def subzone_split(pace_subzone):

    subzone_shp = gpd.read_file(pace_subzone)

    keep_columns = subzone_shp.columns[[0, 1, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 26, 32, 33]]
    #print(keep_columns)

    ppa_subzones_filename = "scratch/ppa_subzones.shp"
    ppa_subzones = subzone_shp[subzone_shp.ppa_gpe == "PPA"]
    ppa_subzones = ppa_subzones[ppa_subzones.columns.intersection(keep_columns)]
    ppa_subzones.to_file(ppa_subzones_filename)

    gpe_subzones_filename = "scratch/gpe_subzones.shp"
    gpe_subzones = subzone_shp[subzone_shp.ppa_gpe == "GPE"]
    gpe_subzones = gpe_subzones[gpe_subzones.columns.intersection(keep_columns)]
    gpe_subzones.to_file(gpe_subzones_filename)

    return ppa_subzones_filename, gpe_subzones_filename

In [18]:
def clip_subzones(shapefile_path, raster_path):

    with fiona.open(shapefile_path) as shapefile:
        shapes = [feature["geometry"] for feature in shapefile]

    with rasterio.open(raster_path) as src:
        out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
        out_meta = src.meta

    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})
    
    if "ppa" in shapefile_path:
        save_location = "scratch/ppa_raster.tif"
    if "gpe" in shapefile_path:
        save_location = "scratch/gpe_raster.tif"
        
    with rasterio.open(save_location, "w", **out_meta) as dest:
        dest.write(out_image)
        
    return save_location

In [10]:
broken_parks = ["RM", "Spats", "Tweeds"]
park_paths = [os.path.join("outputs", park) for park in broken_parks]
raster_paths = [os.path.join(park, "rasters") for park in park_paths]

In [11]:
large_hmmDfs = []
large_distDfs = []
large_distDfsSubzone = []
large_fragDfs = []
large_fragDfsSubzone = []
large_nightDfs = []

num_done = 0
j = 0

In [31]:
for parkName in broken_parks[num_done:]:
    
    #parkName = "Spats"
    park_path = os.path.join("outputs", parkName)
    raster_location = os.path.join(park_path, "rasters")

    ppa_gpe = os.path.join(park_path, parkName + "_PACE.shp")
    subzone = os.path.join(park_path, parkName + "_PACE_Subzones.shp")
    ppa_gpe = dissolve_ppa_gpe(ppa_gpe)

    #split_names = subzone_split(subzone)
    #ppa_subzones = split_names[0]
    #gpe_subzones = split_names[1]

    tifs = []
    for file in os.listdir(raster_location):
        if file.endswith(propVars):
            tifs.append(os.path.join(raster_location, file))

    for file in tifs[j:]:
        
        year = file.split("\\")[-1].split(".")[0].split("-")[1]
        var = file.split("\\")[-1].split(".")[0].split("-")[-1]
        
        clear_output(wait = True)
        print(str(num_done + 1), "/", len(broken_parks), "folders", str(j + 1), "/", len(tifs), "files:", file)
        print(parkName, year, var)
        print(ppa_gpe)
        print(subzone)
        print(file)
        
        
        if file.endswith(propVars[0]): #vlce
            hmmDf = propGen(file, ppa_gpe)
            large_hmmDfs.append(hmmDf)

        if file.endswith(propVars[1]): #disturbance
            distDf = propGen(file, ppa_gpe)
            large_distDfs.append(distDf)
            print("done disturbance all")
            
            
            #ppa_raster = clip_subzones(ppa_subzones, file)

            #print("working on subzone ppa")
            #distDfSubzone = fragGen(ppa_raster, ppa_subzones)
            #large_distDfsSubzone.append(distDfSubzone)
            #print("done disturbance ppa")

            #gpe_raster = clip_subzones(gpe_subzones, file)
            
            #print("working on subzone gpe")
            #distDfSubzone = fragGen(gpe_raster, gpe_subzones)
            #large_distDfsSubzone.append(distDfSubzone)
            #print("done disturbance gpe")

        if file.endswith(propVars[2]): #fragstats
            fragDf = propGen(file, ppa_gpe)
            large_fragDfs.append(fragDf)
            print("done frags all")

            #ppa_raster = clip_subzones(ppa_subzones, file)
            #print("working on subzone ppa")
            #fragDfSubzone = fragGen(ppa_raster, ppa_subzones)
            #large_fragDfsSubzone.append(distDfSubzone)
            #print("done frags ppa gpe")

            #gpe_raster = clip_subzones(gpe_subzones, file)
            #print("working on subzone")
            #fragDfSubzone = fragGen(gpe_raster, gpe_subzones)
            #large_fragDfsSubzone.append(distDfSubzone)
            #print("done frags gpe")

        if file.endswith(propVars[3]): #nightlights
            nightDf = propGen(file, ppa_gpe)
            large_nightDfs.append(nightDf)
        j += 1
    num_done += 1
    j = 0
    clear_output(wait = True)
    
print("done")

3 / 3 folders 37 / 37 files: outputs\Tweeds\rasters\Tweeds_PACE-2018-disturbance.tif
Tweeds 2018 disturbance
outputs\Tweeds\Tweeds_PACE_dissolved.shp
outputs\Tweeds\Tweeds_PACE_Subzones.shp
outputs\Tweeds\rasters\Tweeds_PACE-2018-disturbance.tif
done disturbance all


In [32]:
pd.concat(large_hmmDfs).fillna(0).to_csv("outputCsvs/large_vlce.csv")
pd.concat(large_distDfs).fillna(0).to_csv("outputCsvs/large_disturbance.csv")
#pd.concat(large_distDfsSubzone).fillna(0).to_csv("outputCsvs/large_disturbance_subzones.csv")
#pd.concat(large_fragDfsSubzone).fillna(0).to_csv("outputCsvs/large_fragSubzones.csv")
pd.concat(large_nightDfs).fillna(0).to_csv("outputCsvs/large_nightlights.csv")
pd.concat(large_fragDfs).fillna(0).to_csv("outputCsvs/large_fragPpaGpe.csv")