# Spatial exploration of Newcastle's [Urban Observatory](http://uoweb1.ncl.ac.uk)  sensors

Author: Adelson Araújo Jr (adelsondias@live.com)

* to visualize the maps inside of this notebook, open it by [NBViewer](http://nbviewer.jupyter.org/github/adaj/air-quality/blob/master/eda/spatial_exploration.ipynb)

In [1]:
import time
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import fiona

import folium
from folium import plugins
from folium.plugins import HeatMap
import shapely.geometry

In [16]:
# utils
def choropleth(grid, data, color_scale, location, zoom):
    f = folium.Figure(width=970, height=300)
    m = folium.Map(
        location=location,
        tiles='Stamen Terrain',
        zoom_start=zoom
    ).add_to(f)
    m.choropleth(geo_data=grid, 
                 data=data,
                 threshold_scale=color_scale,
                 key_on="feature.id",
                 fill_color='Spectral_r',line_weight=2)
    return m

def kGrid(points_df, k):
    from sklearn.cluster import KMeans
    kmeans = KMeans(K, random_state=1).fit(points_df[['lat','lon']])
    pdf_copy = points_df.copy()
    pdf_copy['k'] = kmeans.labels_
    pdf_copy['geometry'] = points_df['geometry'].apply(lambda x: [x])
    kgrid = gpd.GeoDataFrame(pdf_copy.groupby('k').agg({'geometry':'sum'}))
    kgrid['geometry'] = [shapely.geometry.MultiPoint(x).convex_hull for x in kgrid.geometry]
    kgrid = kgrid.loc[[type(x)==shapely.geometry.Polygon for x in kgrid.geometry]] #excluding Points and Linestrings grid
    kgrid = kgrid.loc[kgrid.area>1e-6]
    kgrid = gpd.GeoDataFrame(kgrid, geometry=kgrid['geometry'])
    kgrid.crs = {'init': 'epsg:4326'}
    kgrid = kgrid.to_crs(fiona.crs.from_epsg(4326))
    return kgrid

def rectangular_grid(xcells, ycells, points_df):
    xmin = points_df['lon'].min()
    xmax = points_df['lon'].max()
    ymin = points_df['lat'].min()
    ymax = points_df['lat'].max()

    xwindow = (xmax-xmin)/xcells
    ywindow = (ymax-ymin)/ycells

    x0, y0 = xmin, ymax
    pols = []
    for w in range(ycells):
        for h in range(xcells):
            pols.append(shapely.geometry.Polygon([(x0,y0),(x0+xwindow,y0),(x0+xwindow,y0-ywindow),(x0,y0-ywindow)]))
            x0 += xwindow
        y0 -= ywindow
        x0 = xmin
    rg = gpd.GeoDataFrame(geometry=pols, crs={'init': 'epsg:4326'}).to_crs(fiona.crs.from_epsg(4326))
    rg.index = rg.index.map(str)
    return rg 

def cscale(data):
    scale = []
    for i in [0,0.50,0.80,0.95,1]:
        if i==0:
            if data.quantile(i)==0:
                scale.append(0)
            else:
                scale.append(data.quantile(i)-1)
        else:
            scale.append(data.quantile(i)+1)
    return scale

def create_join_hash(meta, grid):
    return gpd.sjoin(meta, grid, rsuffix='grid' ,op='intersects').set_index('name')

def grid_medians(var, freq, sensors, join_hash):
    # sensors is the dataframe with the columns ['Variable', 'Sensor Name']
    # join hash 
    svalues = sensors.loc[sensors['Variable']==var]
    svalues = svalues.join(join_hash['index_grid'],on='Sensor Name').dropna()

    svalues['Timestamp'] = pd.to_datetime(svalues['Timestamp'])
    svalues = svalues.set_index(['index_grid','Sensor Name','Timestamp'])

    level_values = svalues.index.get_level_values
    result = (svalues.groupby([level_values(i) for i in [0,1]]
                          +[pd.Grouper(freq=freq, level=-1)]).median())
    result = result.groupby(level=[0,2]).median()
    return result.reset_index()

## Datasets

In [3]:
SHAPE_FOLDER = '/home/adelsondias/Repos/newcastle/air-quality/shape/Middle_Layer_Super_Output_Areas_December_2011_Full_Extent_Boundaries_in_England_and_Wales'
DATA_FOLDER = '/home/adelsondias/Repos/newcastle/air-quality/data_allsensors_8days'

In [4]:
sensors = pd.read_csv(DATA_FOLDER+'/data.csv')
meta = pd.read_csv(DATA_FOLDER+'/sensors.csv')

Spatial parsing

In [5]:
geometry = [shapely.geometry.Point(xy) for xy in zip(meta['lon'], meta['lat'])]
meta = gpd.GeoDataFrame(meta, geometry=geometry, crs={'init': 'epsg:4326'}).to_crs(fiona.crs.from_epsg(4326))

In [6]:
f = folium.Figure(width=970, height=300)
m = folium.Map(
    location=[54.95 ,-1.560279],
    tiles='Stamen Terrain',
    zoom_start=9
).add_to(f)

folium.plugins.MarkerCluster(meta[['lat','lon']].values).add_to(m)
m

## 1. District's grid

Shapefile of Middle Layer Super Output Areas (MLSOA), filtered by **Newcastle Upon Tyne zones**

In [6]:
gdf = gpd.read_file(SHAPE_FOLDER+'/Middle_Layer_Super_Output_Areas_December_2011_Full_Extent_Boundaries_in_England_and_Wales.shp')

gdf = gdf[gdf['msoa11nm'].str.contains('Newcastle upon Tyne')]
gdf = gdf.to_crs(fiona.crs.from_epsg(4326))
gdf.crs = {'init': 'epsg:4326', 'no_defs': True}

In [9]:
f = folium.Figure(width=970, height=300)
m = folium.Map(
    location=[55.02 ,-1.560279],
    tiles='Stamen Terrain',
    zoom_start=10
).add_to(f)

folium.GeoJson(gdf[:23]).add_to(m)
m

Spatial join between Newcastle's MLSOA and sensors

In [7]:
meta = gpd.sjoin(meta, gdf, rsuffix='grid' ,op='intersects')
meta = meta[['name','type','lat','lon','geometry','msoa11nm']]
meta.head()

Unnamed: 0,name,type,lat,lon,geometry,msoa11nm
0,new_new_emote_1172,Air Quality,54.986412,-1.595367,POINT (-1.595367 54.986412),Newcastle upon Tyne 017
9,new_new_emote_1171,Air Quality,54.987092,-1.594598,POINT (-1.594598 54.987092),Newcastle upon Tyne 017
76,CAJT_NCA1058_SR2_SR3,Traffic,54.988204,-1.59043,POINT (-1.59043006099675 54.98820376997431),Newcastle upon Tyne 017
121,CAJT_NCA1058_SR3_SR2,Traffic,54.988118,-1.590259,POINT (-1.59025900067755 54.9881178222662),Newcastle upon Tyne 017
306,aq_monitor915,High Precision Air Monitor,54.987092,-1.594598,POINT (-1.594598 54.987092),Newcastle upon Tyne 017


In [11]:
grid = gpd.GeoDataFrame(gdf.set_index('msoa11nm')['geometry'], crs={'init': 'epsg:4326'})
data = meta['msoa11nm'].value_counts()

grid = grid.loc[data.index]
choropleth(grid, data, color_scale=[1,10,20,130], location=[55.0 ,-1.560279], zoom=11)

In [12]:
meta['type'].value_counts()

Traffic                       110
Air Quality                    80
Environmental                  43
Weather                        18
Bee Hive                        7
High Precision Air Monitor      2
Name: type, dtype: int64

Filtering by **Air Quality** sensors

In [13]:
grid = gpd.GeoDataFrame(gdf.set_index('msoa11nm')['geometry'], crs={'init': 'epsg:4326'})
data = meta[meta['type']=='Air Quality']['msoa11nm'].value_counts()
grid = grid.loc[data[data>1].index]

print(data)
choropleth(grid, data, color_scale=[1,10,20,60], location=[55.0 ,-1.560279], zoom=11)

Newcastle upon Tyne 024    59
Newcastle upon Tyne 006     8
Newcastle upon Tyne 005     3
Newcastle upon Tyne 017     3
Newcastle upon Tyne 023     2
Newcastle upon Tyne 018     2
Newcastle upon Tyne 002     2
Newcastle upon Tyne 012     1
Name: msoa11nm, dtype: int64


Micro-level analysis on **Newcastle upon Tyne 024** (hottest place)

In [14]:
f = folium.Figure(width=970, height=300)
m = folium.Map(
    location=[54.98,-1.61],
    tiles='Stamen Terrain',
    zoom_start=13
).add_to(f)

nut24 = meta.loc[meta['msoa11nm']=='Newcastle upon Tyne 024']
folium.plugins.MarkerCluster(nut24[['lat','lon']].values).add_to(m)
m

## 2. Rectangular grid

To have an alternative for the district's grid, the *Rectangular grid* seems to be an effective way to spatially manipulate sensors data, as it have the **granularity level adjustable**.

In [17]:
# grid creation
xcells, ycells = 20, 20
grid = rectangular_grid(xcells, ycells, meta)

# spatial join
join = create_join_hash(meta, grid)

# map with all sensors
data = join['index_grid'].value_counts()
choropleth(grid=grid, 
           data=data, 
           color_scale=cscale(data), location=[55.00 ,-1.50], zoom=11)

In [26]:
# map for NO2 sensors
no2sensors = sensors.loc[sensors['Variable']=='NO2','Sensor Name'].unique()
no2meta = meta.apply(lambda x: x if x['name'] in no2sensors else np.nan, axis=1).dropna()

join = create_join_hash(no2meta, grid)
data = join['index_grid'].value_counts()

choropleth(grid=grid, 
           data=data, 
           color_scale=cscale(data), location=[55.00 ,-1.50], zoom=11)

