In [None]:
# Steps: 
# 1. Read in attributes as CSV 
# 2. Read in Camden boundaries as GEOJSON 
# 3. Transform attributes to GEOJSON 
# 4. Calculate KNN weights (same results if you use boundaries or attributes)


# References:

# Spatial analysis: 
# http://darribas.org/gds_scipy16/ipynb_md/04_esda.html
# https://methods.sagepub.com/dataset/howtoguide/local-morans-i-berlin-districts-2018-python

# Function to transform csv to geojson:
# https://gis.stackexchange.com/questions/220997/pandas-to-geojson-multiples-points-features-with-python

# *** This is for the *Pollution_Year_grams* *** 
# To do the analysis for *Capital_Amenity* change variable at 3 places where shown in code 


import geojson
import pandas 
import geopandas
import pysal

# Reading in attributes data (Camden Tree data)
attributes_csv = pandas.read_csv('cleaned.csv')

# Aggregating by ward 
attributes_agg = attributes_csv.groupby(['Ward_Name'], as_index=False).mean()

# Function that will transform the csv pandas df to a geojson 
def data2geojson(df):
    features = []
    insert_features = lambda X: features.append(
            geojson.Feature(geometry=geojson.Point((X["Lat"],
                                                    X["Lon"])),
                            properties=dict(Ward=X["Ward_Name"],
                                            Amenity_Value=X["Amenity_Value"],
                                            Pollution_Year_grams=X["Pollution_Year_grams"])))
    df.apply(insert_features, axis=1)
    with open('attributes.geojson', 'w', encoding='utf8') as fp:
        geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False)

# Applying function 
data2geojson(attributes_agg)

# Reading the attributes data as a geojson 
attributes_geojson = geopandas.read_file('attributes.geojson').to_crs(epsg = 27700)
attributes_geojson

# Reading in ward boundaries as a geojson 
neighbourhoods = geopandas.read_file('Camden Ward Boundary.geojson').to_crs(epsg = 27700) # geometry is a point 

# Calculating weights 'w' 
w_pollution = pysal.weights.KNN.from_dataframe(neighbourhoods, k=5)

# Calculating local moran statistics 
local_morans = pysal.esda.moran.Moran_Local(attributes_geojson.Pollution_Year_grams, w_pollution) #*Change variable here*

# Checking what is significant 
Lag_pol = pysal.lag_spatial(w_pollution, attributes_geojson.Pollution_Year_grams) #*Change variable here*
polperyr = attributes_geojson.Pollution_Year_grams.values #*Change variable here*

sigs = polperyr[local_morans.p_sim <= .001]
W_sigs = Lag_pol[local_morans.p_sim <= .001]
insigs = polperyr[local_morans.p_sim > .001]
W_insigs = Lag_pol[local_morans.p_sim > .001]

# Calculating hotspots and coldspots 
sig = local_morans.p_sim < 0.05
hotspots = local_morans.q==1 * sig
hotspots.sum()
coldspots = local_morans.q==3 * sig
coldspots.sum() # no coldspots 
attributes_geojson.Pollution_Year_grams[hotspots]
attributes_geojson[hotspots] # 3 hotspots: Bloomsbury, Holborn and Covent Garden, King's Cross 
#attributes_geojson[coldspots] # remove hashtag if there are coldspots to see the name of the coldspot. 