In [1]:
import functools
from collections import defaultdict
import numpy as np
import matplotlib
import matplotlib.cm as cm
from my_gmplot import gmplot
import pandas as pd
from multiprocessing import Pool
from glob import glob

In [2]:
EXAMPLE_PLACE_ID_1 = 'ChIJZaR1M1hSUkYRxP0WkwYYy_k'
EXAMPLE_PLACE_ID_2 = 'ChIJy35LngZTUkYRrnSgEJgzuWk'
DATA_PATH = '/mnt/sdc1/inon/norrecampus/data/by_place_5min/'
ALL_PLACE_IDS = [s.split('/')[-1][:-4] for s in glob(DATA_PATH + '/*.csv')]

In [3]:
LAT_LNG = pd.merge(pd.read_csv('/mnt/sdc1/inon/norrecampus/place_details.csv'),
         pd.DataFrame().assign(place_id=ALL_PLACE_IDS),
         on='place_id')\
    .set_index('place_id')
LAT_LNG.head()

Unnamed: 0_level_0,name,vicinity,lat,lng,type
place_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ChIJG8e8q1ZSUkYRL5Yp4xYTOtc,Jagtvej 155-153,København N,55.700785,12.555977,street_address
ChIJydKmJa1TUkYRofNij-GPBXo,Nørrebrogade 148-152,København N,55.694877,12.547986,street_address
ChIJfbnu3qxTUkYRkUsGjRdHZGE,Jagtvej,København N,55.694427,12.548895,route
ChIJre8_-PpSUkYRX-l8RrXs9lk,Østerbrogade 96,København Ø,55.701722,12.577434,street_address
ChIJT9erZFVSUkYRNiSDTV-sJzE,Tagensvej,København N,55.696247,12.561515,route


In [4]:
def rgb(vmin, vmax, value):
    m = cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin, vmax), cmap=cm.bwr)
    return '#%02X%02X%02X' % tuple(int(255 * e) for e in m.to_rgba(value)[:-1])

def rgb_a(vmin, vmax, value):
    m = cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin, vmax), cmap=cm.bwr)
    return tuple(int(255 * e) for e in m.to_rgba(value)[:-1]) + (1,)

In [5]:
def plot_duplicate_place_ids():
    dup = defaultdict(list)
    for row in LAT_LNG.iterrows():
        dup[(row[1]['lat'], row[1]['lng'])].append((row[0], row[1]['type']))
    norrecampus_center = 55.697731, 12.558122
    gmap = gmplot.GoogleMapPlotter(*norrecampus_center, 14)
    for (lat, lng), places in dup.items():
        gmap.circle(lat, lng, 5, rgb(0, 1, 1) if len(places) > 1 else rgb(0, 1, 0.5),
                    ew=2 if places[0][1] == 'street_address' else 6)
    gmap.draw('duplicate_place_ids.html')
    return functools.reduce(lambda lst1, lst2: lst1 + lst2, filter(lambda lst: len(lst) > 1, dup.values()))

duplicate_place_ids = plot_duplicate_place_ids()
duplicate_place_ids[:6]

[('ChIJ3fM6s1JSUkYRupTpc_OITAk', 'street_address'),
 ('ChIJ3fM6s1JSUkYRu5Tpc_OITAk', 'street_address'),
 ('ChIJaUW-ZxtTUkYRwnLO9ykWeYc', 'route'),
 ('ChIJaUW-ZxtTUkYRw3LO9ykWeYc', 'street_address'),
 ('ChIJZxF44KxTUkYR1bYg7KPbzyY', 'route'),
 ('ChIJZxF44KxTUkYR1LYg7KPbzyY', 'route')]

In [15]:
LAT_LNG_NO_DUPS = LAT_LNG.loc[~LAT_LNG.index.isin(map(lambda pair: pair[0], duplicate_place_ids))]
LAT_LNG_NO_DUPS.to_csv('places_no_duplicates.csv')
assert len(LAT_LNG_NO_DUPS) == len(LAT_LNG) - len(duplicate_place_ids)

In [7]:
def one_place_since_2015(place_id):
    return pd.read_csv(DATA_PATH + place_id + '.csv')\
        [lambda df: df.start_interval_s >= '2015-01-01']\
        [['place_id', 'flow_bucket', 'speed_mean']]\
        .groupby('place_id')\
        .agg(['mean', 'std'])

avgs = pd.concat(Pool(maxtasksperchild=1).imap_unordered(one_place_since_2015, ALL_PLACE_IDS))
avgs.head()

Unnamed: 0_level_0,flow_bucket,flow_bucket,speed_mean,speed_mean
Unnamed: 0_level_1,mean,std,mean,std
place_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
ChIJRUH8NeNSUkYRK8w_lMvMqss,7.313944,1.801072,26.056915,8.421768
ChIJybFuBgRTUkYRTHGNpPM_g8c,6.776696,1.969826,24.804983,12.069287
ChIJHyMG7FFSUkYRYP3z8AjbOho,6.855222,1.770906,25.387028,8.615648
ChIJ49API01SUkYRXzPneRsFRJc,3.38353,1.934931,13.344248,17.232735
ChIJ0Tt28QNTUkYRczntLRAtIBQ,6.866719,2.143373,28.670842,11.316649


In [8]:
avgs.speed_mean['mean'].head()

place_id
ChIJRUH8NeNSUkYRK8w_lMvMqss    26.056915
ChIJybFuBgRTUkYRTHGNpPM_g8c    24.804983
ChIJHyMG7FFSUkYRYP3z8AjbOho    25.387028
ChIJ49API01SUkYRXzPneRsFRJc    13.344248
ChIJ0Tt28QNTUkYRczntLRAtIBQ    28.670842
Name: mean, dtype: float64

In [9]:
pd.merge(pd.DataFrame(avgs.speed_mean['mean']), LAT_LNG_NO_DUPS, left_index=True, right_index=True, how='inner').head()

Unnamed: 0_level_0,mean,name,vicinity,lat,lng,type
place_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ChIJRUH8NeNSUkYRK8w_lMvMqss,26.056915,Østerbrogade,København Ø,55.696621,12.579546,route
ChIJybFuBgRTUkYRTHGNpPM_g8c,24.804983,Øster Søgade,København K,55.687756,12.567258,route
ChIJHyMG7FFSUkYRYP3z8AjbOho,25.387028,Tagensvej 88,København N,55.70221,12.549674,street_address
ChIJ49API01SUkYRXzPneRsFRJc,13.344248,Hillerødgade 4-14,København N,55.697444,12.542425,street_address
ChIJ0Tt28QNTUkYRczntLRAtIBQ,28.670842,Øster Søgade,København K,55.688482,12.56823,route


In [10]:
pd.merge(pd.DataFrame(avgs.speed_mean['mean']), LAT_LNG_NO_DUPS, left_index=True, right_index=True, how='inner').min()

mean                   9.18585
name    Dag Hammarskjölds Allé
lat                    55.6832
lng                    12.5424
type                     route
dtype: object

In [11]:
def draw_on_map(ser, html_basename):
    norrecampus_center = 55.697731, 12.558122
    gmap = gmplot.GoogleMapPlotter(*norrecampus_center, 14)
    unified = pd.merge(pd.DataFrame(ser), LAT_LNG_NO_DUPS, left_index=True, right_index=True, how='inner')
    ser_unfolded = {}
    for row in unified.iterrows():
        ser_unfolded[(row[1]['lat'], row[1]['lng'])] = row[1][ser.name]
    # BEWARE: don't use heatmap, it sums up a lot of points around intersections, thus unreliably showing high values in intersections.
    # MISLEADING: gmap.weighted_heatmap(ser_unfolded)  # gradient=[(0,0,0,0)] + [rgb_a(0, 1, e) for e in np.arange(0, 1.01, 0.1)]
    for (lat, lng), val in ser_unfolded.items():
        gmap.circle(lat, lng, 5, rgb(min(ser_unfolded.values()), max(ser_unfolded.values()), val), ew=2)
    gmap.draw(html_basename)
    
draw_on_map(avgs.flow_bucket['mean'], 'flow_mean.html')
draw_on_map(avgs.flow_bucket['std'], 'flow_std.html')
draw_on_map(avgs.speed_mean['mean'], 'speed5min_mean.html')
draw_on_map(avgs.speed_mean['std'], 'speed5min_std.html')

In [12]:
# TODO: Add legends

In [13]:
def indicate_filipes_places():
    norrecampus_center = 55.697731, 12.558122
    gmap = gmplot.GoogleMapPlotter(*norrecampus_center, 14)
    filipes_places = [
        'ChIJ4QuVTlZSUkYRRDRPcHdYULQ', 
        'ChIJBTt5RlZSUkYR_SyA8BgiwaM', 
        'ChIJj1RhMlhSUkYRxwx00g4P0QE', 
        'ChIJP6TdhFdSUkYRdrsWKXZMAs8',
        'ChIJZaR1M1hSUkYRxP0WkwYYy_k',
        'ChIJbcDEbFZSUkYRcnQFsIj5j5U',
        'ChIJf9Y9sFdSUkYRmaDEJhCweGc',
        'ChIJozaGTFZSUkYRNtWl2AGUPkI',
        'ChIJuYkcKlhSUkYRFPCipW5rTvU']
    for row in LAT_LNG_NO_DUPS[LAT_LNG_NO_DUPS.index.isin(filipes_places)].iterrows():
        gmap.circle(row[1].lat, row[1].lng, 5, rgb(0, 1, 0.5), ew=2)
    gmap.draw('filipes_places.html')
    
indicate_filipes_places()