In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import folium
from geopandas.tools import geocode
from folium import Marker
from folium.plugins import MarkerCluster

You'll use the `embed_map()` function from the previous exercise to visualize your maps.

In [2]:
def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

In [3]:
uni = pd.read_csv('top_universities.csv')
uni.head()

Unnamed: 0,Name
0,University of Oxford
1,University of Cambridge
2,Imperial College London
3,ETH Zurich
4,UCL


Adding some African Universities

In [5]:
GH ='University of Ghana'
SA = 'University of Pretoria'
UG = 'Makerere Univeristy'
NG = 'University of Ibadan'
KE = 'University of Nairobi'

In [6]:
top_uni = uni.append(pd.DataFrame([GH,SA,UG,NG,KE], columns=uni.columns), ignore_index=True)
top_uni.head()

Unnamed: 0,Name
0,University of Oxford
1,University of Cambridge
2,Imperial College London
3,ETH Zurich
4,UCL


In [7]:
def coder(row):
    try:
        point = geocode(row, provider='nominatim').geometry.iloc[0]
        return pd.Series({'Lat':point.y, 'Long':point.x,'geometry':point})
    except:
        None
top_uni[['Latitude','Longitude','Geometry']]=top_uni.apply(lambda x: coder(x['Name']), axis=1)
top_uni.head()

Unnamed: 0,Name,Latitude,Longitude,Geometry
0,University of Oxford,51.758708,-1.255668,POINT (-1.255668482609204 51.75870755)
1,University of Cambridge,52.199852,0.119739,POINT (0.1197386574107438 52.1998523)
2,Imperial College London,51.498871,-0.175608,POINT (-0.175607955839404 51.49887085)
3,ETH Zurich,47.377327,8.547509,POINT (8.5475089 47.3773269)
4,UCL,51.524126,-0.13293,POINT (-0.1329302373595478 51.52412645)


In [8]:
#what percentage of universities were successfully loaded
print("{}% of universities were loaded".format(
    (1-sum(np.isnan(top_uni['Latitude']))/len(top_uni))*100)
)

90.47619047619048% of universities were loaded


In [9]:
#Drop all unloaded universities
top_uni=top_uni.loc[~np.isnan(top_uni['Latitude'])]
top_uni.head()

Unnamed: 0,Name,Latitude,Longitude,Geometry
0,University of Oxford,51.758708,-1.255668,POINT (-1.255668482609204 51.75870755)
1,University of Cambridge,52.199852,0.119739,POINT (0.1197386574107438 52.1998523)
2,Imperial College London,51.498871,-0.175608,POINT (-0.175607955839404 51.49887085)
3,ETH Zurich,47.377327,8.547509,POINT (8.5475089 47.3773269)
4,UCL,51.524126,-0.13293,POINT (-0.1329302373595478 51.52412645)


In [10]:
#convert to GeoDataFrame
univ = gpd.GeoDataFrame(top_uni, geometry=top_uni.Geometry)
univ.crs={'init':'4326'}
univ.head()

Unnamed: 0,Name,Latitude,Longitude,Geometry,geometry
0,University of Oxford,51.758708,-1.255668,POINT (-1.255668482609204 51.75870755),POINT (-1.25567 51.75871)
1,University of Cambridge,52.199852,0.119739,POINT (0.1197386574107438 52.1998523),POINT (0.11974 52.19985)
2,Imperial College London,51.498871,-0.175608,POINT (-0.175607955839404 51.49887085),POINT (-0.17561 51.49887)
3,ETH Zurich,47.377327,8.547509,POINT (8.5475089 47.3773269),POINT (8.54751 47.37733)
4,UCL,51.524126,-0.13293,POINT (-0.1329302373595478 51.52412645),POINT (-0.13293 51.52413)


In [11]:
#Map Europe
map=folium.Map(location=[54,15], tiles='cartodbpositron',zoom_start=5)

#Add universities
for idx, row in univ.iterrows():
    Marker([row['Latitude'], row['Longitude']], popup=row['Name']).add_to(map)
map

# Exercise

## Introduction

You are a Starbucks big data analyst ([that’s a real job!](https://www.forbes.com/sites/bernardmarr/2018/05/28/starbucks-using-big-data-analytics-and-artificial-intelligence-to-boost-performance/#130c7d765cdc)) looking to find the next store into a [Starbucks Reserve Roastery](https://www.businessinsider.com/starbucks-reserve-roastery-compared-regular-starbucks-2018-12#also-on-the-first-floor-was-the-main-coffee-bar-five-hourglass-like-units-hold-the-freshly-roasted-coffee-beans-that-are-used-in-each-order-the-selection-rotates-seasonally-5).  These roasteries are much larger than a typical Starbucks store and have several additional features, including various food and wine options, along with upscale lounge areas.  You'll investigate the demographics of various counties in the state of California, to determine potentially suitable locations.

<center>
<img src="https://i.imgur.com/BIyE6kR.png" width="450"><br/><br/>
</center>

Before you get started, set everything up.

All the libraries and dependencies have already been imported so I will get go ahead and import the data

In [12]:
starbucks =pd.read_csv('starbucks_locations.csv')
starbucks.head()

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
0,10429-100710,Palmdale & Hwy 395,14136 US Hwy 395 Adelanto CA,Adelanto,-117.4,34.51
1,635-352,Kanan & Thousand Oaks,5827 Kanan Road Agoura CA,Agoura,-118.76,34.16
2,74510-27669,Vons-Agoura Hills #2001,5671 Kanan Rd. Agoura Hills CA,Agoura Hills,-118.76,34.15
3,29839-255026,Target Anaheim T-0677,8148 E SANTA ANA CANYON ROAD AHAHEIM CA,AHAHEIM,-117.75,33.87
4,23463-230284,Safeway - Alameda 3281,2600 5th Street Alameda CA,Alameda,-122.28,37.79


Check with stores have missing values: Latitude and Longitude especially

In [13]:
print(starbucks.isnull().sum())

Store Number    0
Store Name      0
Address         0
City            0
Longitude       5
Latitude        5
dtype: int64


So there are 5 cities that do not have longitude and Latitude coordinates

In [15]:
rows_with_missing= starbucks[starbucks['Longitude'].isnull()]
rows_with_missing.head()

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,


The 5 locations without Latitude and Longitude coordinates are all in Berkeley. 

We want to fill all the missing Longitude and Latitude values

In [16]:
def coder2(row):
    try:
        points = geocode(row, provider='nominatim').geometry.iloc[0]
        return pd.Series({'Longitude':points.x, 'Latitude':points.y})
    except:
        return None
rows_with_missing[['Latitude','Longitude']]= rows_with_missing.apply(lambda x: coder2(x['Address']), axis=1)
rows_with_missing

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,37.868839,-122.26823
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,37.891471,-122.280009
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,37.881177,-122.269869
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,37.855947,-122.25937
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,37.87041,-122.266079


In [34]:
starbucks = starbucks.append(rows_with_missing)
starbucks

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
0,10429-100710,Palmdale & Hwy 395,14136 US Hwy 395 Adelanto CA,Adelanto,-117.400000,34.510000
1,635-352,Kanan & Thousand Oaks,5827 Kanan Road Agoura CA,Agoura,-118.760000,34.160000
2,74510-27669,Vons-Agoura Hills #2001,5671 Kanan Rd. Agoura Hills CA,Agoura Hills,-118.760000,34.150000
3,29839-255026,Target Anaheim T-0677,8148 E SANTA ANA CANYON ROAD AHAHEIM CA,AHAHEIM,-117.750000,33.870000
4,23463-230284,Safeway - Alameda 3281,2600 5th Street Alameda CA,Alameda,-122.280000,37.790000
...,...,...,...,...,...,...
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,37.868839,-122.268230
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,37.891471,-122.280009
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,37.881177,-122.269869
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,37.855947,-122.259370


In [35]:
starbucks[starbucks['Latitude'].isnull()]

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,
