In [1]:
#Assessing the 10-20-30 Rules using CUFI and mangement polygons for San Jose's
#urban forest

#04/21/2022

#Cami Pawlak, Cam Gonsalves, Kylee Neilson, Olivia Ross, Tricia Nguyen

##First, make sure you have geopandas installed, and are in an environment with it
import geopandas as gpd
import fiona
import pandas as pd
import numpy as np

#change the pandas options to display all columns
pd.set_option('display.max_columns', None)


In [14]:
##Load the path to the polygon of the scale you want to assess and your path to CUFI
## make sure the polygon is EPSG 4326 (GCS_WGS_1984)
path_to_CUFI = "PATH TO THE TREE SHAPEFILE/SCC_trees_v3.shp"
path_to_polygon = "PATH TO POLYGON SHAPE"

## Load your polygon
poly_gdf = gpd.read_file(path_to_polygon, driver='shapefile')

## Load the CUFI
CUFI = gpd.read_file(path_to_CUFI, driver='shapefile')

#make sure that crs is the same for each file or the next step will not work

#Spatially join the polygon and points #keep only points that intersect polygon
pointInPoly = gpd.sjoin(CUFI, poly_gdf, how='left',op='within') 

In [36]:
#look at the data
pointInPoly.head(1)

Unnamed: 0,Field1,V1,id,latitude,longitude,species,family,genus,specific_e,geometry,index_right,zip_code,city,county,state_fips,state_code,state_name,fips_class,mtfcc_feat,functional,area_land_,area_water,internal_p,internal_1,internal_2,TotalTrees,CountSpecies,CountGenus,CountFamilies
0,1,13,223887,37.37321,-122.064418,Liriodendron tulipifera,Magnoliaceae,Liriodendron,tulipifera,POINT (-122.06442 37.37321),1556,94040,"Mountain View city, Los Altos city, Palo Alto ...",Santa Clara County,6,CA,California,B5,G6350,S,9550444.0,0,37.380568,-122.085297,POINT(-122.0852971 37.3805678),3330,23,23,140


In [57]:
#count the total number of trees in each zip code and use transform to add as a column to the dataframe
pointInPoly['TotalTrees'] = pointInPoly.groupby('zip_code')['V1'].transform('count')
#count the number of trees of each species in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountSpecies'] = pointInPoly.groupby(["zip_code","species"])["V1"].transform('count')
#count the number of trees of each genera in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountGenus'] = pointInPoly.groupby(['zip_code','genus'])['V1'].transform('count')
#count the number of trees of each family in each zipcode and use transform to add as a column to the dataframe
pointInPoly['CountFamilies'] = pointInPoly.groupby(['zip_code','family'])['V1'].transform('count')

In [65]:
#check out the data
pointInPoly.head(1)

Unnamed: 0,Field1,V1,id,latitude,longitude,species,family,genus,specific_e,geometry,index_right,zip_code,city,county,state_fips,state_code,state_name,fips_class,mtfcc_feat,functional,area_land_,area_water,internal_p,internal_1,internal_2,TotalTrees,CountSpecies,CountGenus,CountFamilies
0,1,13,223887,37.37321,-122.064418,Liriodendron tulipifera,Magnoliaceae,Liriodendron,tulipifera,POINT (-122.06442 37.37321),1556,94040,"Mountain View city, Los Altos city, Palo Alto ...",Santa Clara County,6,CA,California,B5,G6350,S,9550444.0,0,37.380568,-122.085297,POINT(-122.0852971 37.3805678),3330,23,23,140


In [59]:
#get only relevant columns
subset = pointInPoly[['zip_code', 'species', 'genus', 'family', 'TotalTrees', 'CountSpecies', 'CountGenus', 'CountFamilies']]
#drop duplicates from the table
summary_df = subset.drop_duplicates()

In [68]:
summary_df.head(1)

Unnamed: 0,zip_code,species,genus,family,TotalTrees,CountSpecies,CountGenus,CountFamilies,perc_sp,perc_gen,perc_fam
0,94040,Liriodendron tulipifera,Liriodendron,Magnoliaceae,3330,23,23,140,0.690691,0.690691,4.204204


In [61]:
summary_df['perc_sp'] = (summary_df['CountSpecies']/summary_df['TotalTrees'])*100
summary_df['perc_gen'] =(summary_df['CountGenus']/summary_df['TotalTrees'])*100
summary_df['perc_fam'] = (summary_df['CountFamilies']/summary_df['TotalTrees'])*100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['perc_sp'] = (summary_df['CountSpecies']/summary_df['TotalTrees'])*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['perc_gen'] =(summary_df['CountGenus']/summary_df['TotalTrees'])*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df['perc_fam'] = (summary_df['Count

In [62]:
summary_df

Unnamed: 0,zip_code,species,genus,family,TotalTrees,CountSpecies,CountGenus,CountFamilies,perc_sp,perc_gen,perc_fam
0,94040,Liriodendron tulipifera,Liriodendron,Magnoliaceae,3330,23,23,140,0.690691,0.690691,4.204204
1,95014,Platanus occidentalis,Platanus,Platanaceae,17937,57,937,937,0.317779,5.223839,5.223839
2,95051,Tristaniopsis laurina,Tristaniopsis,Myrtaceae,7707,95,95,596,1.232646,1.232646,7.733230
3,95020,Pinus pinea,Pinus,Pinaceae,21445,24,404,591,0.111914,1.883889,2.755887
4,95037,Ligustrum,Ligustrum,Oleaceae,15058,50,239,886,0.332049,1.587196,5.883916
...,...,...,...,...,...,...,...,...,...,...,...
558538,94301,Abutilon hybridum,Abutilon,Malvaceae,13588,2,2,381,0.014719,0.014719,2.803945
558637,94306,Magnolia stellata,Magnolia,Magnoliaceae,13168,2,839,935,0.015188,6.371507,7.100547
558710,94306,Ilex aquifolium,Ilex,Aquifoliaceae,13168,1,5,5,0.007594,0.037971,0.037971
558823,94303,Tetradium daniellii,Tetradium,Rutaceae,9633,1,1,24,0.010381,0.010381,0.249144


In [63]:
summary_df.to_csv('PATH TO WHERE YOU WANT TO OUTPUT THE CSV')