In [None]:
#Assessing the 10-20-30 Rules using CUFI and mangement polygons for San Jose's
#urban forest

#04/21/2022

#Cami Pawlak, Cam Gonsalves, Kylee Neilson, Olivia Ross, Tricia Nguyen

##First, make sure you have geopandas installed, and are in an environment with it
import geopandas as gpd
import fiona
import pandas as pd
import numpy as np

#change the pandas options to display all columns
pd.set_option('display.max_columns', None)


In [None]:
##Load the path to the polygon of the scale you want to assess and your path to CUFI
## make sure the polygon is EPSG 4326 (GCS_WGS_1984)
path_to_CUFI = "YOUR PATH HERE/ SCC_trees_v3.shp"
path_to_polygon = "YOUR PATH HERE/ YOUR FILENAME HERE"

## Load your polygon
poly_gdf = gpd.read_file(path_to_polygon, driver='shapefile')

## Load the CUFI
CUFI = gpd.read_file(path_to_CUFI, driver='shapefile')

#make sure that crs is the same for each file or the next step will not work

#Spatially join the polygon and points #keep only points that intersect polygon
pointInPoly = gpd.sjoin(CUFI, poly_gdf, how='left',op='within') 

In [None]:
#look at the data
pointInPoly.head(1)

In [None]:
#important step: rename the indentifier of you polygon like zip_code or county name to a column named identifier
pointInPoly['identifier'] = pointInPoly['zip_code']
poly_gdf['identifier'] = poly_gdf['zip_code']

In [None]:
#count the total number of trees in each zip code and use transform to add as a column to the dataframe
pointInPoly['TotalTrees'] = pointInPoly.groupby('identifier')['V1'].transform('count')
#count the number of trees of each species in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountSpecies'] = pointInPoly.groupby(['identifier','species'])['V1'].transform('count')
#count the number of trees of each genera in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountGenus'] = pointInPoly.groupby(['identifier','genus'])['V1'].transform('count')
#count the number of trees of each family in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountFamilies'] = pointInPoly.groupby(['identifier','family'])['V1'].transform('count')

In [None]:
#check out the data
pointInPoly.head(1)

In [None]:
#get only relevant columns
subset = pointInPoly[['identifier', 'species', 'genus', 'family', 'TotalTrees', 'CountSpecies', 'CountGenus', 'CountFamilies']]
#drop duplicates from the table
summary_df = subset.drop_duplicates()

In [None]:
#Look at the data
summary_df.head(1)

In [None]:
#Calculate the percent that each species, genus, and family makes up of the data set
#There will be warnings when you run this but it is okay- this has been tested 
summary_df['perc_sp'] = (summary_df['CountSpecies']/summary_df['TotalTrees'])*100
summary_df['perc_gen'] =(summary_df['CountGenus']/summary_df['TotalTrees'])*100
summary_df['perc_fam'] = (summary_df['CountFamilies']/summary_df['TotalTrees'])*100

In [None]:
#Look at the data
summary_df.head(5)

In [None]:
#Create definitions for violating the 10, 20 and 30 rule and all three
def sp_violate(row):  
    if row['perc_sp'] > 10:
        return '1'
    elif row['perc_sp'] <= 10:
        return '0'
    
def gen_violate(row):  
    if row['perc_sp'] > 20:
        return '1'
    elif row['perc_sp'] <= 20:
        return '0'
    
def fam_violate(row):  
    if row['perc_sp'] > 30:
        return '1'
    elif row['perc_sp'] <= 30:
        return '0'

In [None]:
#Create columns where 1 means the rule is being broken and 0 means it is not based on rules above
summary_df['sp_violate'] = summary_df.apply(lambda row: sp_violate(row), axis=1)
summary_df['sp_violate'] = pd.to_numeric(summary_df['sp_violate'])
summary_df['gen_violate'] = summary_df.apply(lambda row: gen_violate(row), axis=1)
summary_df['gen_violate'] = pd.to_numeric(summary_df['gen_violate'])
summary_df['fam_violate'] = summary_df.apply(lambda row: fam_violate(row), axis=1)
summary_df['fam_violate'] = pd.to_numeric(summary_df['fam_violate'])

In [None]:
#define if all three are being broken
def tentwentythirty_violate(row):  
    if row['sp_violate'] == 1 and row['gen_violate'] == 1 and row['fam_violate'] == 1:
        return 1
    return 0
    
#and apply it

summary_df['102030_violate'] = summary_df.apply(lambda row: tentwentythirty_violate(row), axis=1)
summary_df['102030_violate'] = pd.to_numeric(summary_df['102030_violate'])

In [None]:
#Save the csv if you want to so you can see what species violate what later
summary_df.to_csv('YOUR PATH HERE/ YOUR FILENAME HERE.csv')

In [None]:
df1 = pd.DataFrame(summary_df.groupby('identifier')['sp_violate'].sum())
df2 = pd.DataFrame(summary_df.groupby('identifier')['gen_violate'].sum())
df3 = pd.DataFrame(summary_df.groupby('identifier')['fam_violate'].sum())
df4 = pd.DataFrame(summary_df.groupby('identifier')['102030_violate'].sum())

In [None]:
df5 = df1.join(df2, on='identifier', how='left')
df5 = df5.join(df3, on='identifier', how='left')
df5 = df5.join(df4, on='identifier', how='left')      

In [None]:
polygon_with_102030 = poly_gdf.join(df5, on='identifier', how='inner')

In [None]:
#save the shapefile with attribute data
polygon_with_102030.to_file('YOUR PATH HERE/ YOUR FILENAME HERE.shp')  