In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
import folium
import re

In [3]:
df = pd.read_csv('data/Cholera.csv')
df.head()

Unnamed: 0,count,geometry
0,3,"<Point><coordinates>-0.13793,51.513418</coordi..."
1,2,"<Point><coordinates>-0.137883,51.513361</coord..."
2,1,"<Point><coordinates>-0.137853,51.513317</coord..."
3,1,"<Point><coordinates>-0.137812,51.513262</coord..."
4,4,"<Point><coordinates>-0.137767,51.513204</coord..."


In [4]:
def coords_grabber(str):
    coords = re.search('<Point><coordinates>(.+)</coordinates></Point>', str).group(1)
    coords = coords.split(',')
    lat = float(coords[1])
    long = float(coords[0])
    return (lat, long)
def lat(str):
    return coords_grabber(str)[0]
def lon(str):
    return coords_grabber(str)[1]

In [5]:
lats = df['geometry'].apply(lat)

In [6]:
df['lat'] = lats

In [7]:
df['lon'] = df['geometry'].apply(lon)

In [8]:
df = df.drop('geometry', axis=1)

In [9]:
df_deaths = df[df['count'] >= 0]
df_wells = df[df['count'] == -999]

In [10]:
df_deaths.tail()

Unnamed: 0,count,lat,lon
245,3,51.514526,-0.137108
246,2,51.514706,-0.137065
247,1,51.512311,-0.138474
248,1,51.511998,-0.138123
249,1,51.511856,-0.137762


In [11]:
df_wells

Unnamed: 0,count,lat,lon
250,-999,51.513341,-0.136668
251,-999,51.513876,-0.139586
252,-999,51.514906,-0.139671
253,-999,51.512354,-0.13163
254,-999,51.512139,-0.133594
255,-999,51.511542,-0.135919
256,-999,51.510019,-0.133962
257,-999,51.511295,-0.138199


In [12]:
first_map = folium.Map(location = [51.513, -0.135], zoom_start=16.25, tiles='Stamen Watercolor')

In [13]:
for idx in df_deaths.index:
    lat = df_deaths.loc[idx]['lat']
    lon = df_deaths.loc[idx]['lon']
    size = 2 * df_deaths.loc[idx]['count']
    folium.CircleMarker(location = [lat, lon], color='rgba(171,52,40, 0.35)', radius=size).add_to(first_map)

In [14]:
for idx in df_wells.index:
    lat = df_wells.loc[idx]['lat']
    lon = df_wells.loc[idx]['lon']
    folium.CircleMarker(location = [lat, lon], fill_color='rgba(17,249,199, 1)', radius=10).add_to(first_map)

In [15]:
first_map

In [16]:
def random_color():
    r = np.random.randint(1, 256)
    g = np.random.randint(1, 256)
    b = np.random.randint(1, 256)
    return 'rgba({},{},{}, 1)'.format(r, g, b)

In [17]:
colors = pd.Series([random_color() for i in df_wells.index], index=df_wells.index)

In [18]:
colors

250      rgba(81,64,165, 1)
251    rgba(115,124,242, 1)
252      rgba(183,239,2, 1)
253    rgba(148,101,216, 1)
254       rgba(3,173,24, 1)
255      rgba(118,89,72, 1)
256       rgba(182,59,8, 1)
257     rgba(58,188,161, 1)
dtype: object

In [19]:
df_wells['color'] = colors

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [20]:
df_wells

Unnamed: 0,count,lat,lon,color
250,-999,51.513341,-0.136668,"rgba(81,64,165, 1)"
251,-999,51.513876,-0.139586,"rgba(115,124,242, 1)"
252,-999,51.514906,-0.139671,"rgba(183,239,2, 1)"
253,-999,51.512354,-0.13163,"rgba(148,101,216, 1)"
254,-999,51.512139,-0.133594,"rgba(3,173,24, 1)"
255,-999,51.511542,-0.135919,"rgba(118,89,72, 1)"
256,-999,51.510019,-0.133962,"rgba(182,59,8, 1)"
257,-999,51.511295,-0.138199,"rgba(58,188,161, 1)"


In [21]:
for idx in df_wells.index:
    lat = df_wells.loc[idx]['lat']
    lon = df_wells.loc[idx]['lon']
    color = df_wells.loc[idx]['color']
    folium.CircleMarker(location = [lat, lon], color='red', fill_color=color, radius=10).add_to(first_map)

In [22]:
first_map

In [23]:
def distance_to_well(death_row, well_row):
    lat_dist = death_row['lat'] - well_row['lat']
    lon_dist = death_row['lon'] - well_row['lon']
    return (lat_dist**2 + lon_dist**2)**.5
def nearest_well(death_row):
    min_dist = 5000
    closest_well = None
    for idx in df_wells.index:
        dist = distance_to_well(death_row, df_wells.loc[idx])
        if dist < min_dist:
            min_dist = dist
            closest_well = idx
    return closest_well

In [24]:
closest_well = df_deaths.apply(nearest_well, axis=1)

In [25]:
df_deaths['closest_well'] = closest_well

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [26]:
df_deaths.head()

Unnamed: 0,count,lat,lon,closest_well
0,3,51.513418,-0.13793,250
1,2,51.513361,-0.137883,250
2,1,51.513317,-0.137853,250
3,1,51.513262,-0.137812,250
4,4,51.513204,-0.137767,250


In [27]:
deaths_and_wells = df_deaths.merge(df_wells, how='left', left_on='closest_well', right_index=True)

In [28]:
deaths_and_wells.tail()

Unnamed: 0,count_x,lat_x,lon_x,closest_well,count_y,lat_y,lon_y,color
245,3,51.514526,-0.137108,250,-999,51.513341,-0.136668,"rgba(81,64,165, 1)"
246,2,51.514706,-0.137065,250,-999,51.513341,-0.136668,"rgba(81,64,165, 1)"
247,1,51.512311,-0.138474,257,-999,51.511295,-0.138199,"rgba(58,188,161, 1)"
248,1,51.511998,-0.138123,257,-999,51.511295,-0.138199,"rgba(58,188,161, 1)"
249,1,51.511856,-0.137762,257,-999,51.511295,-0.138199,"rgba(58,188,161, 1)"


In [29]:
df_deaths.tail()

Unnamed: 0,count,lat,lon,closest_well
245,3,51.514526,-0.137108,250
246,2,51.514706,-0.137065,250
247,1,51.512311,-0.138474,257
248,1,51.511998,-0.138123,257
249,1,51.511856,-0.137762,257


In [30]:
second_map = folium.Map(location = [51.513, -0.135], zoom_start=16.25, tiles='Stamen Watercolor')
for idx in deaths_and_wells.index:
    lat = deaths_and_wells.loc[idx]['lat_x']
    lon = deaths_and_wells.loc[idx]['lon_x']
    size = 2 * deaths_and_wells.loc[idx]['count_x']
    color = deaths_and_wells.loc[idx]['color']
    folium.CircleMarker(location = [lat, lon], color=color, fill_color=color, radius=size).add_to(second_map)
for idx in df_wells.index:
    lat = df_wells.loc[idx]['lat']
    lon = df_wells.loc[idx]['lon']
    color = df_wells.loc[idx]['color']
    folium.CircleMarker(location = [lat, lon], color=color, fill_color='black', radius=10).add_to(second_map)
second_map

In [31]:
well_toll = deaths_and_wells.groupby('closest_well')['count_x'].sum()

In [32]:
well_toll

closest_well
250    276
251     71
252      3
254     78
255     46
257     15
Name: count_x, dtype: int64

In [33]:
type(well_toll)

pandas.core.series.Series

In [34]:
df_wells['well_toll'] = well_toll

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [35]:
df_wells

Unnamed: 0,count,lat,lon,color,well_toll
250,-999,51.513341,-0.136668,"rgba(81,64,165, 1)",276.0
251,-999,51.513876,-0.139586,"rgba(115,124,242, 1)",71.0
252,-999,51.514906,-0.139671,"rgba(183,239,2, 1)",3.0
253,-999,51.512354,-0.13163,"rgba(148,101,216, 1)",
254,-999,51.512139,-0.133594,"rgba(3,173,24, 1)",78.0
255,-999,51.511542,-0.135919,"rgba(118,89,72, 1)",46.0
256,-999,51.510019,-0.133962,"rgba(182,59,8, 1)",
257,-999,51.511295,-0.138199,"rgba(58,188,161, 1)",15.0


In [36]:
killer_wells = df_wells.dropna()

In [37]:
killer_wells

Unnamed: 0,count,lat,lon,color,well_toll
250,-999,51.513341,-0.136668,"rgba(81,64,165, 1)",276.0
251,-999,51.513876,-0.139586,"rgba(115,124,242, 1)",71.0
252,-999,51.514906,-0.139671,"rgba(183,239,2, 1)",3.0
254,-999,51.512139,-0.133594,"rgba(3,173,24, 1)",78.0
255,-999,51.511542,-0.135919,"rgba(118,89,72, 1)",46.0
257,-999,51.511295,-0.138199,"rgba(58,188,161, 1)",15.0


In [38]:
third_map = folium.Map(location = [51.513, -0.135], zoom_start=16.25, tiles='Stamen Watercolor')
for idx in killer_wells.index:
    lat = killer_wells.loc[idx]['lat']
    lon = killer_wells.loc[idx]['lon']
    color = killer_wells.loc[idx]['color']
    size = np.sqrt(killer_wells.loc[idx]['well_toll']) * 6
    folium.CircleMarker(location = [lat, lon], color=color, fill_color=color, radius=size).add_to(third_map)
third_map