<b>Spatial Analysis for London Gentrification Project</b>

In [137]:
#Load packages
import pandas as pd
import numpy as np
import fiona #for reading shapefiles

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors
from matplotlib.colors import Normalize

from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon 
from shapely.prepared import prep #for processing shapefiles to make operations quicker

from itertools import chain

from mpl_toolkits.basemap import Basemap #for mapping

In [138]:
#Set working directory
import os
os.chdir("C:/Users/Claire/Google Drive/LondonGentrification")

In [139]:
#CHECK PROJECTION
#Import OGR module
from osgeo import ogr
#Activate OGR Shapefile driver
shp_driver = ogr.GetDriverByName('ESRI Shapefile')
#Open shapefile with OGR
shp_dataset = shp_driver.Open(r'Data/ESRI/London_ward_CityMerged.shp')
#Access layer information
shp_layer = shp_dataset.GetLayer()
#get coordinate information using GetSpatialRef() function
shp_srs = shp_layer.GetSpatialRef()
#Pront spatial reference system on screen
print shp_srs

PROJCS["British_National_Grid",
    GEOGCS["GCS_OSGB_1936",
        DATUM["OSGB_1936",
            SPHEROID["Airy_1830",6377563.396,299.3249646]],
        PRIMEM["Greenwich",0],
        UNIT["Degree",0.0174532925199433]],
    PROJECTION["Transverse_Mercator"],
    PARAMETER["False_Easting",400000],
    PARAMETER["False_Northing",-100000],
    PARAMETER["Central_Meridian",-2],
    PARAMETER["Scale_Factor",0.999601272],
    PARAMETER["Latitude_Of_Origin",49],
    UNIT["Meter",1]]


In [140]:
#CREATE PANDAS DATAFRAME TO WRITE FEATURES FROM SHAPEFILE

ward_names = []
ward_code = []

for feature in shp_layer:
    name = feature.GetField("NAME")
    code = feature.GetField("GSS_Code")
    ward_names.append(name)
    ward_code.append(code)
    
ward_variables = pd.DataFrame(index=ward_code, columns = ['Ward Name'], data=ward_names)

In [141]:
#IMPORT THE .CSV WITH INCOME INFORMATION
ward_income = pd.read_csv("Data/modelled-household-income-estimates-wards.csv", index_col = 'Code')

In [142]:
#JOIN INCOME INFORMATION TO 
ward_variables = ward_variables.join(ward_income['Median 2012_13'] , on=None, how='left', lsuffix='', rsuffix='', sort=False)

#Rename Column
ward_variables=ward_variables.rename(columns = {'Median 2012_13':'Median Income 2012_13'})

In [143]:
#CATEGORISE AS HIGH LOW (HIGHER OR LOWER THAN MEAN)

#Caluclate mean
mean_income = np.mean(ward_variables['Median Income 2012_13'])

#Define function for categories
def income_category (row):
    if row['Median Income 2012_13'] >= mean_income:
          return 'High'
    if row['Median Income 2012_13'] < mean_income:
          return 'Low'
    return 'Other'

#Apply function to create new dataframe coloumn
ward_variables['Income Category'] = ward_variables.apply(lambda row: income_category (row), axis=1)

In [144]:
#CREATE A SPECTRUM OF COLOURS
N = 7 #Number of intervals
increment = 255/(N+1)
RGB_tuples = [((230-(x*increment)), ((230-(x*increment))), 255) for x in range(N)]

#Convert to hex color format for mapping
hex_colour = []
for x in RGB_tuples:
    hexa = '#%02x%02x%02x' % (x)
    hex_colour.append(hexa)

print RGB_tuples
print hex_colour

[(230, 230, 255), (199, 199, 255), (168, 168, 255), (137, 137, 255), (106, 106, 255), (75, 75, 255), (44, 44, 255)]
['#e6e6ff', '#c7c7ff', '#a8a8ff', '#8989ff', '#6a6aff', '#4b4bff', '#2c2cff']


In [150]:
#VISUALISE COLOURS
cmap = matplotlib.colors.ListedColormap(hex_colour, name = 'from_list', N=N)

fig = plt.figure(figsize=(8, 3))
ax1 = fig.add_axes([0.05, 0.80, 0.9, 0.15])

cb1 = matplotlib.colorbar.ColorbarBase(ax1, cmap=cmap,
                                orientation='horizontal')

plt.show()

In [146]:
#FUNCTION TO ASSIGN COLOURS (EQUAL INTERVAL)
def color_assign (row, data, var):
    minimum = data.min()
    maximum = data.max()
    N=len(hex_colour)
    intervals = np.linspace(minimum, maximum, num=N+1)
    for x in range(N):
        if row[var] <= intervals[x+1]:
            return hex_colour[x]

In [147]:
#APPLY FUNCTION TO DATAFRAME
ward_variables['Income Colour'] = ward_variables.apply(lambda row: color_assign (row, ward_variables['Median Income 2012_13'], 'Median Income 2012_13'), axis=1)

In [148]:
ward_variables

Unnamed: 0,Ward Name,Median Income 2012_13,Income Category,Income Colour
E05000405,Chessington South,38310,Low,#c7c7ff
E05000414,Tolworth and Hook Rise,37840,Low,#c7c7ff
E05000401,Berrylands,42330,High,#c7c7ff
E05000400,Alexandra,41390,High,#c7c7ff
E05000402,Beverley,40700,High,#c7c7ff
E05000406,Coombe Hill,45650,High,#a8a8ff
E05000404,Chessington North and Hook,37230,Low,#c7c7ff
E05000413,Surbiton Hill,43160,High,#a8a8ff
E05000410,Old Malden,41760,High,#c7c7ff
E05000412,St. Mark's,44930,High,#a8a8ff


In [149]:
#EXPORT CSV
ward_variables.to_csv('Data/claire_ward_variables.csv', index_label = 'Code')
