# Where should Amazon build its next fulfillment center?

## Assumptions made:
* One main purpose for the fulfillment center is to enable Amazon’s core value: fast delivery
* Look outside of the US to uncover new global market opportunity
* Select from existing Amazon countries to leverage infrastructure and regulatory compliance
* Use Amazon US as a benchmark for success 

In [1]:
# Import libraries
import pandas as pd
from sklearn.cluster import KMeans
import plotly.graph_objects as go

In [2]:
# Load and explore data of world cities from https://simplemaps.com/data/world-cities
data = pd.read_csv('Data/worldcities.csv')
data.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.685,139.7514,Japan,JP,JPN,Tōkyō,primary,35676000.0,1392685764
1,New York,New York,40.6943,-73.9249,United States,US,USA,New York,,19354922.0,1840034016
2,Mexico City,Mexico City,19.4424,-99.131,Mexico,MX,MEX,Ciudad de México,primary,19028000.0,1484247881
3,Mumbai,Mumbai,19.017,72.857,India,IN,IND,Mahārāshtra,admin,18978000.0,1356226629
4,São Paulo,Sao Paulo,-23.5587,-46.625,Brazil,BR,BRA,São Paulo,admin,18845000.0,1076532519


In [3]:
# Find how many cities in Brazil
data.loc[data.country == 'Brazil'].count()

city          387
city_ascii    387
lat           387
lng           387
country       387
iso2          387
iso3          387
admin_name    387
capital        27
population    387
id            387
dtype: int64

In [9]:
# Filter data to only have Brazil
brazil = data[data.country.isin(['Brazil'])]
print(brazil.shape)
brazil.head()

(387, 11)


Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
4,São Paulo,Sao Paulo,-23.5587,-46.625,Brazil,BR,BRA,São Paulo,admin,18845000.0,1076532519
13,Rio de Janeiro,Rio de Janeiro,-22.925,-43.225,Brazil,BR,BRA,Rio de Janeiro,admin,11748000.0,1076887657
45,Belo Horizonte,Belo Horizonte,-19.915,-43.915,Brazil,BR,BRA,Minas Gerais,admin,5575000.0,1076967355
75,Porto Alegre,Porto Alegre,-30.05,-51.2,Brazil,BR,BRA,Rio Grande do Sul,admin,3917000.0,1076658221
79,Brasília,Brasilia,-15.7833,-47.9161,Brazil,BR,BRA,Distrito Federal,primary,3716996.0,1076144436


In [10]:
# Find any null values
brazil.isnull().sum()

city            0
city_ascii      0
lat             0
lng             0
country         0
iso2            0
iso3            0
admin_name      0
capital       360
population      0
id              0
dtype: int64

In [27]:
# Use k-means to find clusters 
brazil2 = brazil[['lat', 'lng']]
kmeans = KMeans(n_clusters = 25)
kmeans.fit(brazil2)
y_means = kmeans.predict(brazil2)
centers = kmeans.cluster_centers_

In [28]:
# Plot on map

fig = go.Figure(data = go.Scattergeo(
    lon = brazil2['lng'],
    lat = brazil2['lat'],
    mode = 'markers',
    marker = dict(
        size = 7,
        opacity = 0.8,
        reversescale = True,
        autocolorscale = True,
        symbol = 'circle',
        line = dict(
            width = 1,
            color = ['rgba(102, 102, 102)']
        ),
        cmin = 0,
        color = y_means,
        colorbar_title = "City Clusters"
    )
))

fig.add_trace(go.Scattergeo(
    lon = centers[:,1],
    lat = centers[:,0],
    mode = 'markers',
    marker = dict(
        size = 10,
        line = dict(
            width = 1,
            color = 'orange'
        ),
        color = 'orange',
    )
))

fig.update_layout(
    title = "Brazil City Clusters and Centroids ",
    geo = dict(
        scope = "south america",
        showland = True,
    )
)

# Print to new page: fig.write_html('first_figure.html', auto_open=True)
fig