In [137]:
import geopandas as gpd
import os
import glob
import shutil
import shapely

# Functions

Functions being created for automization of certain steps in checking and updating inventory

In [49]:
### Function to check if files exist and if so, put them into a new subdirectory and mark them as downloaded in the spreadsheet
### This WILL NOT automatically save the new dataframe to a csv!! 
### This WILL update the copy of the df with codes in the `Download` column
    ### `Y` : downloaded    `N` : not downloaded 
def check_for_files(submodel_df, codes,axiom_dir, submodel_dir):

    for code in codes: #loops through each unique code

        if code != "NA": #any codes that have `NA` will be skipped 
            file_path_processed = glob.glob(f"{axiom_dir}{code}.gpkg") #axiom data came as processed and raw data so we will check for both versions
            file_path_raw = glob.glob(f"{axiom_dir}{code}_raw.gpkg") #raw data

            if len(file_path_raw) == 1: #check if the data file is there
                shutil.copy(file_path_raw[0], submodel_dir) #copy the file to the new subd
                submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "Downloaded"] = "Y"

            if len(file_path_processed) == 1: #same thing but for processed data
                shutil.copy(file_path_processed[0], submodel_dir)

            if len(file_path_raw) == 0: #set download to N since there was no file found; only looking for raw because we would only have processed data if we also have raw
                submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "Downloaded"] = "N"

        if code == "NA":
            submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "Downloaded"] = "N"

In [147]:
### Function to check if the downloaded data is empty, and if it is not empty, where there is overlap at both the regional and study-area scale ###
### This WILL NOT automatically save the new dataframe to a csv!!
### This WILL update the `downloaded` column with marker `E` if the data was marked as downloaded but is empty, and update the `study_region_overlap` and `study_area_overlap` columns
    ### Outpout in each column is a list string of study areas or regions where there was overlap
    ### `None` : no overlap      
def check_coverage(submodel_df, study_areas, codes, submodel_dir, crs):
    
    for code in codes:
        
        if code != "NA" and submodel_df.loc[fisheries["UNIQUE_ID"]== code]["Downloaded"].values == "Y": #only loop through rows in df that have a code AND that have data downloaded

            data_layer = gpd.read_file(f"{submodel_dir}{code}_raw.gpkg").to_crs(crs) #raw data; project to the same crs as the study area polygon
            
            if data_layer.shape[0] == 0:
                
                submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "Downloaded"] = "E" #Update so we know that the data was attempted to be downloaded but the gpkg is empty
                submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "study_area_overlap"] = "None" #Update so we know there is no overlap
                submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "study_region_overlap"] = "None" #Update so we know there is no overlap
           
            if data_layer.shape[0] > 0:
                
                data_layer = data_layer.loc[data_layer.geometry.geometry.type=='Polygon'] #We probably should not keep this but one of the layers I ran this on had a single geometry that was a line?? So I filtered it out lol
                                                                                          #Not best practice but code was getting mad that the layer had multiple geometry types
                overlay = study_areas.overlay(data_layer, how='intersection') #Overlay analysis to see where there is intersection
                study_area_coverage = overlay["portName"].unique() #get unique `portName` (these are the study site names) from overlay output
                study_region_coverage = overlay["region"].unique() #get unique `region` (region names) from overlay output

                if study_area_coverage.shape[0] == 0: #if there was no intersection of the study areas and the data, input `None` as overlap value
                    submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "study_area_overlap"] = "None"
                    submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "study_region_overlap"] = "None"

                else: #if there were areas of overlap, create a string of values of the overlap areas
                    submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "study_area_overlap"] = ",".join(str(element) for element in study_area_coverage) #create string of study area overlap locations and add to original df of all data layers
                    submodel_df.loc[submodel_df["UNIQUE_ID"] == code, "study_region_overlap"] = ",".join(str(element) for element in study_region_coverage) #create string of study region overlap locations and add to original df of all data layers


                


# Examples

Examples for use of above functions

In [112]:
#csv version of the submodel inventory (convert to csv and make sure all column headings have `_` between words; example: https://docs.google.com/spreadsheets/d/1F_F7vWr95jUbVkLp3WgpGT1VImV4VhzNRt2VZLnVhqM/edit?usp=sharing)
fisheries_csv = "C:/Alaska/Fisheries/fisheries_spreadsheet.csv"
fisheries = gpd.read_file(fisheries_csv) #read csv
codes = fisheries["UNIQUE_ID"].unique() #getting all unique ID values

#shapefile of the study areas
study_areas_shp = "C:/Alaska/study_areas"
study_areas = gpd.read_file(study_areas_shp)

#directory where the original data is
axiom_dir = "C:/Alaska/Axiom_data/tables/"
#new subdirectory where you want the data to go
submodel_dir = "C:/Alaska/Fisheries/"

crs = "EPSG:4269" #crs of the study area file

#### Example 1: check_for_files()

Function that checks if each file exists according to `UNIQUE_ID` 
populates the `Downloaded` column in the dfdependent on if the data exists

In [None]:
check_for_files(fisheries,codes,axiom_dir,submodel_dir) #run the check_for_files func on the fisheries submodel

fisheries.tail() #look at bottom of fisheries df; you can see if it updated the df `downloaded` column

#### Example 2: check if file had data, and if data area overlaps with study areas

In [None]:
check_coverage(fisheries, study_areas, codes, submodel_dir) #run the check_coverage func on the fisheries submodel

fisheries.tail() #look at bottom of fisheries df; you can see if it updated the `study_region_overlap` and `study_area_overlap`

#### Download final df as a csv file

In [11]:
fisheries.to_csv(f'{submodel_dir}fisheries_test.csv') #export the df as a csv to preserve changes