# Import required packages

Make sure all necessary packages have been installed correctly

In [78]:
from lxml import etree #for processing XML documents
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors
from matplotlib.colors import Normalize
from matplotlib.collections import PatchCollection
from mpl_toolkits.basemap import Basemap #for mapping
from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon 
from shapely.prepared import prep #for processing shapefiles to make operations quicker
from pysal.esda.mapclassify import Natural_Breaks as nb
from descartes import PolygonPatch #to map polygons using matplotlib
import fiona #for reading shapefiles
from itertools import chain
import pyproj #for converting coordinate systems

# Postcodes

<b>Ordance Survey Postcode Files Information</b>

- Several separate csv files split by postcode district
- Inner London postcode districts include: 
    - EC, WC, E, N, NW, SE, SW, W
- Outer London postcode districts include: 
    - BR: Bromley, CR: Croydon, DA: Dartford, EN: Enfield, HA: Harrow
        IG: Ilford, KT: Kingston, RM: Romford, SM: Sutton, TW: Twickenham
        UB: Uxbridge, WD: Watford
- The file contains the following fields:
 - <i>PC Postcode
 - PQ Positional_quality_indicator	
 - EA Eastings
 - NO Northings
 - CY Country_code
 - RH NHS_regional_HA_code	
 - LH NHS_HA_code	
 - CC Admin_county_code
 - DC Admin_district_code	
 - WC Admin_ward_code</i>

In [79]:
#Clear Column Headers for Postcode Files
pc_headers = ['Postcode', 'Positional_quality_indicator', 'E', 
              'N', 'Country_code', 'NHS_regional_HA_code',
              'NHS_HA_code', 'Admin_county_code', 'Admin_district_code', 
              'Admin_ward_code']

In [80]:
#Code to import and join all postcode tables together for London

#List of postcode districts that apply to London
Lon_pc_dis_lst = ['ec', 'wc', 'e', 'n', 'nw', 'se', 'sw', 'w', 'br', 'cr', 
              'da', 'en', 'ha', 'ig', 'kt', 'rm', 'sm', 'tw', 'ub', 'wd']

#Generate list of file directories for concatenation
files_lst = []
direct = "London_Postcodes_Data\\"

for x in Lon_pc_dis_lst:
    files_lst.append( direct + x +'.csv')
    
Lon_pc_df = pd.concat([pd.read_csv(f, header=None, names=pc_headers) for f in files], axis = 0)

<b>Import dataframe </b>

In [81]:
#Clear Column headers for imported dataset (make sure to channge to suit the current dataset! use 'Postcode' for postcode field)
pp_headers = ['Transaction_ID', 'Price', 'Transfer_Date', 'Postcode', 
              'Property_Type', 'Old_New', 'Duration','PAON', 'SAON', 
              'Street', 'Locality', 'Town_City','District','County', 'PPD_Category_Type', 'Record_Status']

In [82]:
#Import datadrame (make sure to change the file name!)
df=pd.read_csv('Data\Complete_PP_2015.csv', header=0, names=pp_headers)  

<b>Dataset specific cleaning operations (remove if not needed)</b>

In [83]:
#Convert date column to date format
df['Transfer_Date'] = pd.to_datetime(df['Transfer_Date'])

<b>Join postcode information (which include coordinates) using the dataframe's 'Postcode' field</b>

In [84]:
#Join easting and northing values to the existing dataframe 'df'
df_pc = df.merge(Lon_pc_df, on='Postcode', how='left')

<b>Subset the dataframe to include only those values which the London postcode table was joined to</b>

In [85]:
#Add boolean field - True if a London postcode was identified
df_pc['London'] = df_pc.E.notnull()

#Create new dataframe with only London records
df_pc_London = df_pc.loc[df_pc.London == True, :]

#Make into csv
df_pc_London[['Transaction_ID','E','N']].to_csv('Data\BNG.csv')

<b> Convert eastings and northings to latitide and lonitude for mapping </b>

In [86]:
#Convert BNG Eastings and Northings to WGS84 Lat and lon

wgs84=pyproj.Proj("+init=EPSG:4326") # LatLon with WGS84 datum used by GPS units and Google Earth
osgb36=pyproj.Proj("+init=EPSG:27700") # UK Ordnance Survey, 1936 datum

#Convert easting and norththing colloums to an array to allor pyproj.transform operation to work
E_N_array = df_pc_London.as_matrix(columns=['E', 'N'])

x, y = WGS84_x, WGS84_y = pyproj.transform(osgb36, wgs84, E_N_array[:,0], E_N_array[:,1])

#Put back into dataframe

df_pc_London['lon'] = x

df_pc_London['lat'] = y


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


# Mapping Points

Code following tutorial at: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.Vwon1fkrKUk


<b>Open shapefile with Fiona and get some data out of it in order to set up the basemap</b>

In [87]:
#Extract map boundaries
#Calculated the extent, width and height of our basemap

shp = fiona.open('data/london_wards.shp')
bds = shp.bounds
shp.close()
extra = 0.01
ll = (bds[0], bds[1])
ur = (bds[2], bds[3])
coords = list(chain(ll, ur))
w, h = coords[2] - coords[0], coords[3] - coords[1]

<b>Set up the basemap</b> 

In [88]:
#Create a basemap instance to plot maps on

m = Basemap(
    projection = 'tmerc',
    lon_0 =-2,
    lat_0=49.,
    ellps = 'WGS84',
    llcrnrlon=coords[0] - extra * w,
    llcrnrlat=coords[1] - extra + 0.01 * h,
    urcrnrlon=coords[2] + extra * w,
    urcrnrlat=coords[3] + extra +0.01* h,
    lat_ts=0,
    resolution='i',
    suppress_ticks = True)

m.readshapefile(
    'data/london_wards',
    'london',
    color = 'none',
    zorder=2)

(649,
 5,
 [-0.5103750689005356, 51.28676016315085, 0.0, 0.0],
 [0.3340155643740321, 51.691874116909894, 0.0, 0.0],
 <matplotlib.collections.LineCollection at 0x43edfc18>)

<b>Set up a map dataframe</b>

map_points weries was creates by passing latitude and longitude values to Basemap instance
this converts coordinates from lon and lat degrees to map projection coordinates
df_map dataframe now contains columns holding:

     a polygon for each ward in shapefile
     its description
     its area in square metres
     its area in square kilometres
    
Also prepared a geometry object from combined wards polygons to speed up membership checking operation

In [89]:
df_map = pd.DataFrame({
        'poly':[Polygon(xy) for xy in m.london],
        'ward_name':[ward['NAME'] for ward in m.london_info]})

df_map['area_m'] = df_map['poly'].map(lambda x: x.area)

df_map['area_km'] = df_map['area_m']/100000

#Create point objects in map coordinates from dataframe lon and lat values

map_points = pd.Series(
    [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(df_pc_London['lon'], df_pc_London['lat'])])
plaque_points = MultiPoint(list(map_points.values))
wards_polygon = prep(MultiPolygon(list(df_map['poly'].values)))

# Calculate points that fall within the London boundary
ldn_points = filter(wards_polygon.contains, plaque_points)



<b> Pre-prepared functions to generate map color ramps easily </b>

In [90]:
def colorbar_index(ncolors, cmap, labels=None, **kwargs):
    """
    This is a convenience function to stop you making off-by-one errors
    Takes a standard colour ramp, and discretizes it,
    then draws a colour bar with correctly aligned labels
    """
    cmap = cmap_discretize(cmap, ncolors)
    mappable =  cm.ScalarMappable(cmap=cmap)
    mappable.set_array([])
    mappable.set_clim(-0.5, ncolors+0.5)
    colorbar = plt.colorbar(mappable, **kwargs)
    colorbar.set_ticks(np.linspace(0, ncolors, ncolors))
    colorbar.set_ticklabels(range(ncolors))
    if labels:
        colorbar.set_ticklabels(labels)
    return colorbar

def cmap_discretize(cmap, N):
    """
    Return a discrete colormap from the continuous colormap cmap.

        cmap: colormap instance, eg. cm.jet. 
        N: number of colors.

    Example
        x = resize(arange(100), (5,100))
        djet = cmap_discretize(cm.jet, 5)
        imshow(x, cmap=djet)

    """
    if type (cmap) == str:
        cmap = get_cmap(cmap)
    colors_i = np.concatenate((np.linspace(0,1.,N), (0.,0.,0.,0.)))
    colors_rgba = cmap (colors_i)
    indices = np.linspace(0,1.,N + 1)
    cdict = {}
    for ki, key in enumerate(('red','green','blue')):
        cdict[key] = [(indices[i], colors_rgba[i-1,ki], colors_rgba[i,ki]) for i in xrange(N+1)]
    return matplotlib.colors.LinearSegmentedColormap(cmap.name + "_%d" % N, cdict, 1024)

## Dot Density Map / Scatter Plot

<b> Code to make a dot-density map (matplotlib scatter plot on the ward polygons which are converted to 'patches') </b>

Change titles to whatever is most appropriate for your own data

In [92]:
#Making a scatter plot
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(x,
                                                             fc='#555555',
                                                             ec='#787878', lw=.25, alpha=.9,
                                                             zorder=4))
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# we don't need to pass points to m() because we calculated using map_points and shapefile polygons
dev = m.scatter(
    [geom.x for geom in ldn_points],
    [geom.y for geom in ldn_points],
    5, marker='o', lw=.25,
    facecolor='#33ccff', edgecolor='w',
    alpha=0.9, antialiased=True,
    label='Price Paid Points', zorder=3)

# plot boroughs by adding the PatchCollection to the axes instance
ax.add_collection(PatchCollection(df_map['patches'].values, match_original=True))

#Add copyright and source data info

smallprint = ax.text(
    1.03, 0,
    'Data From: %s\nContains Ordnance Survey data\n$\copyright$ Crown copyright and database right etc',
    ha='right', va='bottom',
    size=4,
    color='#555555',
    transform=ax.transAxes)

# Draw a map scale

m.drawmapscale(
    coords[0] + 0.08, coords[1] + 0.015,
    coords[0], coords[1],
    10.,
    barstyle = 'fancy', labelstyle='simple',
    fillcolor1='w', fillcolor2 = '#555555',
    fontcolor='#555555',
    zorder=5)

plt.title('Dot Density Map')
plt.tight_layout
# this will set the image width to 722px at 100dp
fig.set_size_inches(7.22, 5.25)
plt.savefig('Data/dot_density_map.png', dpi=100, alphe=True)
plt.show()

## Chloropleth Map - Density of Points by Ward

Change titles to whatever is most appropriate for your own data

In [None]:
#create a chloropleth map normalised by ward area
#Add fields for denity into map dataframe

df_map['count'] = df_map['poly'].map(lambda x: int(len(filter(prep(x).contains, ldn_points))))
df_map['density_m'] = df_map['count']/df_map['area_m']
df_map['density_km'] = df_map['count']/df_map['area_km']

# it's easier to work with NaN values when classifying
df_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True)

In [None]:
#divide wards into classes
breaks = nb(
    df_map[df_map['density_km'].notnull()].density_km.values,
    initial=300,
    k=5)

#The notnull method lets us match indices when joining
jb = pd.DataFrame({'jenks_bins':breaks.yb}, index=df_map[df_map['density_km'].notnull()].index)
df_map = df_map.join(jb)
df_map.jenks_bins.fillna(-1, inplace=True)

In [None]:
#Labels for colour classes
jenks_labels = ["<=%0.1f/km$^2$(%s wards)" % (b,c) for b, c in zip(
    breaks.bins, breaks.counts)]
jenks_labels.insert(0, 'No plaques (%s wards)' % len (df_map[df_map['density_km'].isnull()]))

In [None]:
#cloropleth
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# use a blue colour ramp - we'll be converting it to a map using cmap()
cmap = plt.get_cmap('Blues')
# draw wards with grey outlines
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4))
pc = PatchCollection(df_map['patches'], match_original=True)
# impose our colour map onto the patch collection
norm = Normalize()
pc.set_facecolor(cmap(norm(df_map['jenks_bins'].values)))
ax.add_collection(pc)

# Add a colour bar
cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels)
cb.ax.tick_params(labelsize=6)

# Show highest densities, in descending order
highest = '\n'.join(
    value[1] for _, value in df_map[(df_map['jenks_bins'] == 4)][:10].sort().iterrows())
highest = 'Most Dense Wards:\n\n' + highest
# Subtraction is necessary for precise y coordinate alignment
details = cb.ax.text(
    -1., 0 - 0.007,
    highest,
    ha='right', va='bottom',
    size=5,
    color='#555555')

# Bin method, copyright and source data info
smallprint = ax.text(
    1.03, 0,
    'Classification method: natural breaks etc',
    ha='right', va='bottom',
    size=4,
    color='#555555',
    transform=ax.transAxes)

# Draw a map scale
m.drawmapscale(
    coords[0] + 0.08, coords[1] + 0.015,
    coords[0], coords[1],
    10.,
    barstyle='fancy', labelstyle='simple',
    fillcolor1='w', fillcolor2='#555555',
    fontcolor='#555555',
    zorder=5)

# this will set the image width to 722px at 100dpi
plt.tight_layout()
fig.set_size_inches(7.22, 5.25)
plt.savefig('data/choloropleth.png', dpi=100, alpha=True)
plt.show()