In [1]:
%matplotlib inline

In [2]:
from lxml import etree

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.collections import PatchCollection

In [5]:
from mpl_toolkits.basemap import Basemap
from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon
from shapely.prepared import prep
from pysal.esda.mapclassify import Natural_Breaks as nb

In [6]:
from descartes import PolygonPatch
import fiona
from itertools import chain

In [7]:
shp = fiona.open('data/new.shp')
bds = shp.bounds
shp.close()
extra = 0.333
ll = (bds[0], bds[1])
ur = (bds[2], bds[3])
coords = list(chain(ll, ur))
w, h = (coords[2] - coords[0]), coords[3] - coords[1]

In [8]:
coords

[29.1680027, 59.5560006, 31.1729986, 60.3449994]

In [9]:
m = Basemap(
    projection='tmerc',
    lon_0=30.5,
    lat_0=60.,
    ellps='WGS84',
    llcrnrlon=coords[0] + extra * w,
    llcrnrlat=coords[1] + extra * h,
    urcrnrlon=coords[2] - extra * w,
    urcrnrlat=coords[3] - extra * h,
    lat_ts=0,
    resolution='i',
    suppress_ticks=True)

In [10]:
m.readshapefile(
    'data/new',
    'london',
    drawbounds=False)

(254753,
 5,
 [29.1680027, 59.5560006, 0.0, 0.0],
 [31.1729986, 60.3449994, 0.0, 0.0])

In [11]:
# set up a map dataframe
df_map = pd.DataFrame({
    'poly': [Polygon(ward_coords) for ward_coords in m.london]})
df_map['area_m'] = df_map['poly'].map(lambda x: x.area)
df_map['area_km'] = df_map['area_m'] / 100000

In [12]:
# draw ward patches from polygons
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(
    x,
    fc='#555555',
    ec='#787878', lw=0.25, alpha=.9,
    zorder=4))

In [80]:
df = pd.read_csv('petrovsky_11.csv', nrows=50000)

report_df = pd.read_csv('report.csv')


def index_of_min(values):
    return min(range(len(values)), key=values.__getitem__)


def get_time(x):
    return datetime.strptime(x, "%d.%m.%Y %H.%M.%S")


from_time = get_time("24.11.2015 11.00.00")
to_time = get_time("24.11.2015 15.00.00")


def create_aligned_path(data, start_time, end_time, interval):
    lacs, times = data["cellid"], data["time"]
    step = timedelta(minutes=interval)
    current_time = start_time
    acc = []
    while current_time <= end_time:
        time_differences = [abs(current_time - get_time(some_time)) for some_time in times]
        index = index_of_min(time_differences)
        acc.append(lacs[index])
        current_time += step
    return acc


def between_time(x):
    return to_time > get_time(x) > from_time

# Choose only events in some time period
# Group people by code - unique number for every user
# Collect all LACs (base stations) for every user in a list

df1 = df[df['time'].apply(between_time)]
df2 = df1.dropna(subset=['cellid'])

lacs = df2.groupby('subsid')['cellid'].apply(lambda x: x.tolist())
# The same with all times for every LAC
times = df2.groupby('subsid')['time'].apply(lambda x: x.tolist())

# Join 2 series to dataframe on Code
data = pd.concat([lacs, times], axis=1).reset_index()

# Add alignded LACs sequences
data['path'] = data.apply(lambda x: create_aligned_path(x, from_time, to_time, 10), axis=1)

report_df.drop_duplicates(subset=['Cell ID'], inplace=True)

report_dict = report_df.set_index('Cell ID').T.to_dict('list')

In [87]:
map_points = pd.Series(
    [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in [(report_dict[i][1], report_dict[i][2]) for i in report_dict]])
plaque_points = MultiPoint(list(map_points.values))

In [88]:
listt = list(plaque_points)

In [89]:
len(listt)

34832

In [None]:
import matplotlib.colors as colors
from random import randint

plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# we don't need to pass points to m() because we calculated using map_points and shapefile polygons
dev = m.scatter(
    [geom.x for geom in list(plaque_points)],
    [geom.y for geom in list(plaque_points)],
    5, marker='.', lw=.25,
    facecolor='#33ccff', edgecolor='w',
    alpha=0.9, antialiased=True,
    label='Blue Plaque Locations', zorder=3)

colors = list(colors.cnames.keys())
cur_color = colors[0]
paths = list(filter(lambda x: len(list(set(x))) >= 3, data['path']))[50:75]
for i in range(len(paths)):
    path = paths[i]
    cur_color = colors[i% len(colors)]
    pathcoords = [(report_dict[x][1], report_dict[x][2]) for x in path]
    for i in range(len(pathcoords) - 1):
        point1, point2 = pathcoords[i], pathcoords[i + 1]
        map_point1 = Point(m(point1[0], point1[1]))
        map_point2 = Point(m(point2[0], point2[1]))
        xs = [map_point1.x, map_point2.x]
        ys = [map_point1.y, map_point2.y]
        plt.plot(xs, ys, linestyle='-', color=cur_color)

print('Plotting done')

# plot boroughs by adding the PatchCollection to the axes instance
ax.add_collection(PatchCollection(df_map['patches'].values, match_original=True))

fig.set_size_inches(7.22 * 3, 5.25 * 3)
plt.savefig('data/london_plaques.png', dpi=400, alpha=True)