In [2]:
#Assessing the 10-20-30 Rules using CUFI and mangement polygons for San Jose's
#urban forest

#04/21/2022

#Cami Pawlak, Cam Gonsalves, Kylee Neilson, Olivia Ross, Tricia Nguyen

##First, make sure you have geopandas installed, and are in an environment with it
import geopandas as gpd
import fiona
import pandas as pd
import numpy as np

#change the pandas options to display all columns
pd.set_option('display.max_columns', None)


In [3]:
##Load the path to the polygon of the scale you want to assess and your path to CUFI
## make sure the polygon is EPSG 4326 (GCS_WGS_1984)
path_to_CUFI = "YOUR PATH HERE/ SCC_trees_v3.shp"
path_to_polygon = "YOUR PATH HERE/ YOUR FILENAME HERE"

## Load your polygon
poly_gdf = gpd.read_file(path_to_polygon, driver='shapefile')

## Load the CUFI
CUFI = gpd.read_file(path_to_CUFI, driver='shapefile')

#make sure that crs is the same for each file or the next step will not work

#Spatially join the polygon and points #keep only points that intersect polygon
pointInPoly = gpd.sjoin(CUFI, poly_gdf, how='left',op='within') 

In [4]:
#look at the data
pointInPoly.head(1)

Unnamed: 0,Field1,V1,id,latitude,longitude,species,family,genus,specific_e,geometry,index_right,zip_code,city,county,state_fips,state_code,state_name,fips_class,mtfcc_feat,functional,area_land_,area_water,internal_p,internal_1,internal_2
0,1,13,223887,37.37321,-122.064418,Liriodendron tulipifera,Magnoliaceae,Liriodendron,tulipifera,POINT (-122.06442 37.37321),1556,94040,"Mountain View city, Los Altos city, Palo Alto ...",Santa Clara County,6,CA,California,B5,G6350,S,9550444.0,0,37.380568,-122.085297,POINT(-122.0852971 37.3805678)


In [100]:
#important step: rename the indentifier of you polygon like zip_code or county name to a column named identifier
pointInPoly['identifier'] = pointInPoly['zip_code']
poly_gdf['identifier'] = poly_gdf['zip_code']

In [87]:
#count the total number of trees in each zip code and use transform to add as a column to the dataframe
pointInPoly['TotalTrees'] = pointInPoly.groupby('identifier')['V1'].transform('count')
#count the number of trees of each species in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountSpecies'] = pointInPoly.groupby(['identifier','species'])['V1'].transform('count')
#count the number of trees of each genera in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountGenus'] = pointInPoly.groupby(['identifier','genus'])['V1'].transform('count')
#count the number of trees of each family in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountFamilies'] = pointInPoly.groupby(['identifier','family'])['V1'].transform('count')

In [88]:
#check out the data
pointInPoly.head(1)

Unnamed: 0,Field1,V1,id,latitude,longitude,species,family,genus,specific_e,geometry,index_right,zip_code,city,county,state_fips,state_code,state_name,fips_class,mtfcc_feat,functional,area_land_,area_water,internal_p,internal_1,internal_2,TotalTrees,CountSpecies,CountGenus,CountFamilies,identifier
0,1,13,223887,37.37321,-122.064418,Liriodendron tulipifera,Magnoliaceae,Liriodendron,tulipifera,POINT (-122.06442 37.37321),1556,94040,"Mountain View city, Los Altos city, Palo Alto ...",Santa Clara County,6,CA,California,B5,G6350,S,9550444.0,0,37.380568,-122.085297,POINT(-122.0852971 37.3805678),3330,23,23,140,94040


In [89]:
#get only relevant columns
subset = pointInPoly[['identifier', 'species', 'genus', 'family', 'TotalTrees', 'CountSpecies', 'CountGenus', 'CountFamilies']]
#drop duplicates from the table
summary_df = subset.drop_duplicates()

In [90]:
#Look at the data
summary_df.head(1)

Unnamed: 0,identifier,species,genus,family,TotalTrees,CountSpecies,CountGenus,CountFamilies
0,94040,Liriodendron tulipifera,Liriodendron,Magnoliaceae,3330,23,23,140


In [91]:
#Calculate the percent that each species, genus, and family makes up of the data set
#There will be warnings when you run this but it is okay- this has been tested 
summary_df['perc_sp'] = (summary_df['CountSpecies']/summary_df['TotalTrees'])*100
summary_df['perc_gen'] =(summary_df['CountGenus']/summary_df['TotalTrees'])*100
summary_df['perc_fam'] = (summary_df['CountFamilies']/summary_df['TotalTrees'])*100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['perc_sp'] = (summary_df['CountSpecies']/summary_df['TotalTrees'])*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['perc_gen'] =(summary_df['CountGenus']/summary_df['TotalTrees'])*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['perc_fam'] = (summary_df['Count

In [92]:
#Look at the data
summary_df.head(5)

Unnamed: 0,identifier,species,genus,family,TotalTrees,CountSpecies,CountGenus,CountFamilies,perc_sp,perc_gen,perc_fam
0,94040,Liriodendron tulipifera,Liriodendron,Magnoliaceae,3330,23,23,140,0.690691,0.690691,4.204204
1,95014,Platanus occidentalis,Platanus,Platanaceae,17937,57,937,937,0.317779,5.223839,5.223839
2,95051,Tristaniopsis laurina,Tristaniopsis,Myrtaceae,7707,95,95,596,1.232646,1.232646,7.73323
3,95020,Pinus pinea,Pinus,Pinaceae,21445,24,404,591,0.111914,1.883889,2.755887
4,95037,Ligustrum,Ligustrum,Oleaceae,15058,50,239,886,0.332049,1.587196,5.883916


In [93]:
#Create definitions for violating the 10, 20 and 30 rule and all three
def sp_violate(row):  
    if row['perc_sp'] > 10:
        return '1'
    elif row['perc_sp'] <= 10:
        return '0'
    
def gen_violate(row):  
    if row['perc_sp'] > 20:
        return '1'
    elif row['perc_sp'] <= 20:
        return '0'
    
def fam_violate(row):  
    if row['perc_sp'] > 30:
        return '1'
    elif row['perc_sp'] <= 30:
        return '0'

In [94]:
#Create columns where 1 means the rule is being broken and 0 means it is not based on rules above
summary_df['sp_violate'] = summary_df.apply(lambda row: sp_violate(row), axis=1)
summary_df['sp_violate'] = pd.to_numeric(summary_df['sp_violate'])
summary_df['gen_violate'] = summary_df.apply(lambda row: gen_violate(row), axis=1)
summary_df['gen_violate'] = pd.to_numeric(summary_df['gen_violate'])
summary_df['fam_violate'] = summary_df.apply(lambda row: fam_violate(row), axis=1)
summary_df['fam_violate'] = pd.to_numeric(summary_df['fam_violate'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['sp_violate'] = summary_df.apply(lambda row: sp_violate(row), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['sp_violate'] = pd.to_numeric(summary_df['sp_violate'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['gen_violate'] = summary_df.apply(lambda row: 

In [95]:
#define if all three are being broken
def tentwentythirty_violate(row):  
    if row['sp_violate'] == 1 and row['gen_violate'] == 1 and row['fam_violate'] == 1:
        return 1
    return 0
    
#and apply it

summary_df['102030_violate'] = summary_df.apply(lambda row: tentwentythirty_violate(row), axis=1)
summary_df['102030_violate'] = pd.to_numeric(summary_df['102030_violate'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['102030_violate'] = summary_df.apply(lambda row: tentwentythirty_violate(row), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['102030_violate'] = pd.to_numeric(summary_df['102030_violate'])


In [118]:
#Save the csv if you want to so you can see what species violate what later
summary_df.to_csv('YOUR PATH HERE/ YOUR FILENAME HERE.csv')

In [108]:
df1 = pd.DataFrame(summary_df.groupby('identifier')['sp_violate'].sum())
df2 = pd.DataFrame(summary_df.groupby('identifier')['gen_violate'].sum())
df3 = pd.DataFrame(summary_df.groupby('identifier')['fam_violate'].sum())
df4 = pd.DataFrame(summary_df.groupby('identifier')['102030_violate'].sum())

In [109]:
df5 = df1.join(df2, on='identifier', how='left')
df5 = df5.join(df3, on='identifier', how='left')
df5 = df5.join(df4, on='identifier', how='left')      

In [113]:
polygon_with_102030 = poly_gdf.join(df5, on='identifier', how='inner')

In [116]:
#save the shapefile with attribute data
polygon_with_102030.to_file('YOUR PATH HERE/ YOUR FILENAME HERE.shp')  

  polygon_with_102030.to_file('C:/Users/camip/OneDrive - Cal Poly/Documents/school/SPRING2022/GEOG441/UrbanForests_GroupProject/zip_codes/zip_code_102030.shp')
