In [1]:
################################################################
#### CALCULATING CLUSTERING STATS FOR POLLUTION_YEAR/grams #####
################################################################

# To do the analysis for *Capital_Amenity* change variable at 3 places where shown in code 
# and column names as to not overwrite existing data

# Steps: 
# 1. Read in attributes as CSV 
# 2. Read in Camden boundaries as GEOJSON 
# 3. Transform attributes to GEOJSON 
# 4. Calculate KNN weights (same results if you use boundaries or attributes)


# References:

# Spatial analysis: 
# http://darribas.org/gds_scipy16/ipynb_md/04_esda.html
# https://methods.sagepub.com/dataset/howtoguide/local-morans-i-berlin-districts-2018-python

# Function to transform csv to geojson:
# https://gis.stackexchange.com/questions/220997/pandas-to-geojson-multiples-points-features-with-python

# KNN justification 
# https://towardsdatascience.com/a-simple-introduction-to-k-nearest-neighbors-algorithm-b3519ed98e

# About Local Moran 
# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-spatial-autocorrelation-moran-s-i-spatial-st.htm



import geojson
import pandas 
import geopandas
import pysal

# Reading in attributes data (Camden Tree data)
attributes_csv = pandas.read_csv('cleaned.csv')

# Aggregating by ward 
attributes_agg = attributes_csv.groupby(['Ward_Name'], as_index=False).mean()

# Function that will transform the csv pandas df to a geojson 
def data2geojson(df):
    features = []
    insert_features = lambda X: features.append(
            geojson.Feature(geometry=geojson.Point((X["Lat"],
                                                    X["Lon"])),
                            properties=dict(Ward=X["Ward_Name"],
                                            Amenity_Value=X["Amenity_Value"],
                                            Pollution_Year_grams=X["Pollution_Year_grams"])))
    df.apply(insert_features, axis=1)
    with open('attributes.geojson', 'w', encoding='utf8') as fp:
        geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False)

# Applying function 
data2geojson(attributes_agg)

# Reading the attributes data as a geojson 
attributes_geojson = geopandas.read_file('attributes.geojson').to_crs(epsg = 27700)
attributes_geojson

# Reading in ward boundaries as a geojson 
neighbourhoods = geopandas.read_file('Camden Ward Boundary.geojson').to_crs(epsg = 27700) # geometry is a point 

# Calculating weights 'w' using the Camden Ward Boundary (NB: used with which has the centroid coords of each ward)
w_pollution = pysal.weights.KNN.from_dataframe(neighbourhoods, k=5)

# Calculating local moran statistics of proximity of *Pollution_Year_grams* using the weights 
local_morans = pysal.esda.moran.Moran_Local(attributes_geojson.Pollution_Year_grams, w_pollution, permutations=9999) 

# Checking what is significant 
Lag_pol = pysal.lag_spatial(w_pollution, attributes_geojson.Pollution_Year_grams) #*Change variable here*
polperyr = attributes_geojson.Pollution_Year_grams.values #*Change variable here*

sigs = polperyr[local_morans.p_sim <= .001]
W_sigs = Lag_pol[local_morans.p_sim <= .001]
insigs = polperyr[local_morans.p_sim > .001]
W_insigs = Lag_pol[local_morans.p_sim > .001]

# Calculating hotspots and coldspots 
sig = local_morans.p_sim < 0.05
hotspots = local_morans.q==1 * sig
hotspots.sum()
coldspots = local_morans.q==3 * sig
coldspots.sum()

# Assigning local moran values, coldspots and hotspots to: 
# 1. the attributes df 
# 2. the Camden geojson boundary 

# 1.
attributes_agg2 = attributes_agg.assign(pol_cl_value =local_morans.Is)
attributes_agg2 = attributes_agg2.assign(pol_hotspots = local_morans.q==1 * sig)
attributes_agg2 = attributes_agg2.assign(pol_coldspots = local_morans.q==3 * sig)

# 2. 
attributes_geojson2 = attributes_geojson.assign(pol_cl_value =local_morans.Is)
attributes_geojson2 = attributes_geojson2.assign(pol_hotspots = local_morans.q==1 * sig)
attributes_geojson2 = attributes_geojson2.assign(pol_coldspots = local_morans.q==3 * sig)

# Another statistic that can be used as the variable to be measured: z_sim
# z_sim = standardized Is based on permutations
# Assigning it to the two objects:
# Breaks for mapping this variable: 2.58 - 1.96 - 1.65 - -1.65 - -1.96 - -2.58; 

attributes_agg2 = attributes_agg2.assign(pol_z_sim = local_morans.z_sim)
attributes_geojson2 = attributes_geojson2.assign(pol_z_sim =local_morans.z_sim)

# THE END: there are two objects with the same data: 
# attributes_agg2 - attributes data and clustering stats in pandas df format.
# attributes_geojson2 - attributes data and clustering stats in GeoJSON.  

# WHAT SHOULD BE MAPPED: 
# Either: coldspots and hotspots (but there are no coldspots, so the map might be ugly, only red and greyish)
# Or pol_z_sim with breaks mentioned above, like this map can have a gradient (like the one made in R) 
# though I am not sure what the breaks mean, (they are from the GIS prac), I think they refer to the 
# 'intensity' of the clustering. 

You can install them with  `pip install urbanaccess pandana` or `conda install -c udst pandana urbanaccess`
  "You need pandana and urbanaccess to work with segregation's network module\n"


RuntimeError: b'no arguments in initialization list'

In [2]:
#attributes_agg2["SD"] = attributes_agg2["pol_cl_value"]
#attributes_agg2.assign(SD = attributes_agg2.pol_cl_value
for i in range(len(attributes_agg2)) : 
    attributes_agg2.loc[i, "SD"] = np.std(attributes_agg2.loc[i, "pol_cl_value"])
attributes_agg2

#for i in range(len(attributes_agg2)) : 
#    attributes_agg2.loc.assign([i, "SD"] = np.std(attributes_agg2.loc[i, "pol_cl_value"])
#attributes_agg2

#for index, row in df.iterrows(): 
  #  print (row["Name"], row["Age"]) 

NameError: name 'attributes_agg2' is not defined

In [38]:
for row in attributes_agg2['pol_cl_value']: 
    attributes_agg2['SD']= np.std(attributes_agg2.loc['pol_cl_value']]
    
attributes_agg2

KeyError: "None of [Float64Index([-0.016414652113974946,    1.3654101276378772,\n                0.28386030757587655,    0.1130117232134158,\n                0.40029482787573295,  -0.18841769817914888,\n                0.09062003918895907,  0.008758347095537734,\n                0.11952241124521208,    0.1030701266057009,\n                  1.562957699674429,    0.4822398473280599,\n                0.26605076665007954,    1.1768477189620332,\n                 0.4181212973529868,   0.08510295508113544,\n                0.02982360643258467,    0.5439037107975143],\n             dtype='float64')] are in the [index]"

In [35]:
#attributes_agg2
local_morans.Is
#print(arr1.std)

import numpy as np
#np.std(local_morans.Is)

#df.loc[df[‘column name’]
       
for row in attributes_agg2.loc[attributes_agg2['pol_cl_value']]: 
    attributes_agg2['SD']= np.std(attributes_agg2['pol_cl_value'])
    
attributes_agg2

KeyError: "None of [Float64Index([-0.016414652113974946,    1.3654101276378772,\n                0.28386030757587655,    0.1130117232134158,\n                0.40029482787573295,  -0.18841769817914888,\n                0.09062003918895907,  0.008758347095537734,\n                0.11952241124521208,    0.1030701266057009,\n                  1.562957699674429,    0.4822398473280599,\n                0.26605076665007954,    1.1768477189620332,\n                 0.4181212973529868,   0.08510295508113544,\n                0.02982360643258467,    0.5439037107975143],\n             dtype='float64')] are in the [index]"

In [30]:
for index, row in attributes_agg2.iterrows():
    attributes_agg2['SD'] = np.std(attributes_agg2['pol_cl_value'])

    
#attributes_agg2['SD'] = np.std(attributes_agg2['pol_cl_value'])
#attributes_agg2
attributes_agg2

Unnamed: 0.1,Ward_Name,Unnamed: 0,id,No_Trees,Height_M,Spread_M,Diameter_CM,Removed_Soon,Newly_Planted,Amenity_Value,Carbon_StorageKG,Carbon_Sequest_YR,Pollution_Year_grams,Lon,Lat,pol_cl_value,pol_hotspots,pol_coldspots,pol_z_sim,SD
0,Belsize,11637.923214,11790.203571,1.001786,11.933036,6.726786,39.260714,0.0,0.001786,17146.586875,481.965,9.334821,227.710893,-0.166808,51.546101,-0.016415,False,False,-0.850376,0.482701
1,Bloomsbury,11763.616129,11917.078495,1.03871,13.356452,7.510753,39.684946,0.0,0.004301,21988.597441,691.863011,10.211935,345.056237,-0.130059,51.522427,1.36541,True,False,2.537454,0.482701
2,Camden Town with Primrose Hill,11854.271654,12009.132546,1.015748,9.679396,5.653543,32.42126,0.0,0.006562,13479.822533,440.372047,8.243307,176.854462,-0.14868,51.541269,0.28386,False,False,1.222908,0.482701
3,Cantelowes,11905.414747,12060.702304,1.041475,10.549309,5.828313,32.653456,0.0,0.00553,11907.74071,366.626359,7.857419,180.085622,-0.13338,51.544978,0.113012,False,False,0.572506,0.482701
4,Fortune Green,11533.971884,11685.262418,1.0806,9.467666,5.649297,33.025305,0.0,0.005623,12832.37746,385.458013,8.122212,166.444892,-0.202191,51.552642,0.400295,False,False,1.446921,0.482701
5,Frognal and Fitzjohns,11606.703499,11758.720074,1.061694,11.795488,6.99337,43.133517,0.0,0.000921,22295.85663,740.777532,10.961878,247.250829,-0.184929,51.55621,-0.188418,False,False,-1.4766,0.482701
6,Gospel Oak,11283.803731,11432.128954,1.056772,10.445093,6.347526,33.404704,0.0,0.020276,12606.95854,398.960746,8.702028,211.884509,-0.155695,51.552691,0.09062,False,False,1.414967,0.482701
7,Hampstead Town,11956.649436,12112.352444,1.015038,10.777162,6.027726,38.207707,0.00188,0.010338,16282.592679,464.951504,8.840038,185.833459,-0.171663,51.55595,0.008758,False,False,0.131734,0.482701
8,Haverstock,11612.253333,11764.327059,1.068235,10.81749,6.376471,33.201176,0.003922,0.010196,12691.184918,380.70549,8.319765,208.152941,-0.152306,51.546689,0.119522,False,False,1.44243,0.482701
9,Highgate,11539.30567,11690.467954,1.12613,11.145275,6.521569,36.112983,0.003698,0.004519,15747.99636,462.036154,9.039811,211.059778,-0.146024,51.563849,0.10307,False,False,1.512682,0.482701


In [4]:
import pandas
attributes_csv = pandas.read_csv('cleaned.csv')
attributes_csv.count() 

Unnamed: 0              19919
id                      19919
No_Trees                19919
Site_Name               19919
Sci_Name                19919
Com_Name                19919
Date                    19919
Height_M                19919
Spread_M                19919
Diameter_CM             19919
Maturity                19919
Condition               19919
Removed_Soon            19919
Newly_Planted           19919
Amenity_Value           19919
Carbon_StorageKG        19919
Carbon_Sequest_YR       19919
Pollution_Year_grams    19919
Ward_Code               19919
Ward_Name               19919
Lon                     19919
Lat                     19919
Loc                     19919
Uploaded                19919
dtype: int64

In [5]:
attributes_csv

Unnamed: 0.1,Unnamed: 0,id,No_Trees,Site_Name,Sci_Name,Com_Name,Date,Height_M,Spread_M,Diameter_CM,...,Amenity_Value,Carbon_StorageKG,Carbon_Sequest_YR,Pollution_Year_grams,Ward_Code,Ward_Name,Lon,Lat,Loc,Uploaded
0,2,4,1,DYNHAM ROAD,Betula jacquemontii,Birch - Himalayan,2018-09-14 00:00:00,8.0,3.0,15.0,...,1456.39,33.1,3.0,27.3,E05000140,Kilburn,-0.193201,51.543997,"(51.543997, -0.193201)",2020-04-05 20:31:00
1,3,5,1,Denton Estate,Tilia europaea,Lime - Common,2020-03-19 00:00:00,16.0,8.0,42.0,...,14272.60,269.7,7.6,265.0,E05000136,Haverstock,-0.151179,51.545382,"(51.545382, -0.151179)",2020-04-05 20:31:00
2,4,6,1,KINGSGATE ROAD,Prunus padus,Cherry - Bird,2018-09-20 00:00:00,13.0,5.0,37.0,...,6892.13,215.6,7.9,128.8,E05000140,Kilburn,-0.195794,51.543000,"(51.543, -0.195794)",2020-04-05 20:31:00
3,7,10,1,SKARDU ROAD,Betula papyrifera,Paper birch,2018-08-29 00:00:00,8.0,7.0,26.0,...,4375.64,142.1,6.6,114.8,E05000132,Fortune Green,-0.210279,51.554557,"(51.554557, -0.210279)",2020-04-05 20:31:00
4,11,16,1,Wendling Estate,Sorbus,Rowan,2020-03-17 00:00:00,6.0,3.0,29.0,...,7258.21,126.0,5.3,20.6,E05000134,Gospel Oak,-0.155297,51.551437,"(51.551437, -0.155297)",2020-04-05 20:31:00
5,13,18,1,GRAFTON ROAD,Amelanchier alnifolia obelisk,June berry - Upright,2017-04-08 00:00:00,4.0,2.0,6.0,...,258.91,1.4,0.5,2.6,E05000139,Kentish Town,-0.145707,51.548033,"(51.548033, -0.145707)",2020-04-05 20:31:00
6,17,22,1,Broadfield Estate 1,Sorbus torminalis,Rowan,2018-04-26 00:00:00,5.0,2.0,8.0,...,552.35,3.7,0.8,11.6,E05000144,Swiss Cottage,-0.184755,51.547048,"(51.547048, -0.184755)",2020-04-05 20:31:00
7,18,23,1,"BRUNSWICK SQUARE, GARDENS (LS)",Fagus sylvatica,Beech - Common,2018-03-21 00:00:00,21.0,11.0,56.0,...,22554.23,909.3,19.2,492.3,E05000141,King's Cross,-0.122026,51.524719,"(51.524719, -0.122026)",2020-04-05 20:31:00
8,19,24,1,Dunboyne Road Estate,Robinia pseudoacacia,False Acacia,2020-06-03 00:00:00,15.0,10.0,51.0,...,19953.58,670.3,15.9,348.9,E05000134,Gospel Oak,-0.158430,51.552332,"(51.552332, -0.15843)",2020-04-05 20:31:00
9,24,33,1,Estate 1-158 Dorney (cons),Liriodendron tulipifera,Tulip Tree,2018-04-17 00:00:00,3.0,1.0,5.0,...,239.74,2.1,0.5,5.0,E05000128,Belsize,-0.164566,51.543455,"(51.543455, -0.164566)",2020-04-05 20:31:00
