# IBM Data Science Professional Certificate Capstone

This notebook is for the neighborhood analyzation project for the data science capstone course on Coursera.

## Introduction
XX

## Part 1 - Identifying our districts

In [1]:
# Our needed imports.
!conda install -c conda-forge folium --yes
!conda install -c conda-forge geopy --yes
import folium
import ibm_boto3
import json
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
import requests
import types
from botocore.client import Config
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from IPython.display import Image 
from sklearn.cluster import KMeans

Solving environment: done

# All requested packages already installed.

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.50-py_0           conda-forge
    geopy:         1.22.0-pyh9f0ad1d_0 conda-forge


Downloading and Extracting Packages
geopy-1.22.0         | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ##################################### | 100% 
Preparing t

In [2]:
# Create our corners of Gainesville.
gainesville_north = 29.711381
gainesville_south = 29.596737
gainesville_west = -82.453961
gainesville_east = -82.262119

In [3]:
# Define how many rows and columns we want to create for districts.
DISTRICT_ROWS = 11
DISTRICT_COLUMNS = 16
GAINESVILLE_LATITUDE = 29.662737
GAINESVILLE_LONGITUDE = -82.370212

In [4]:
# Calculate how big each segment is.
lat_diff = gainesville_north - gainesville_south
long_diff = gainesville_west - gainesville_east
lat_segment = lat_diff / (DISTRICT_ROWS)
long_segment = long_diff / (DISTRICT_COLUMNS)

In [5]:
# Generate the center for all segments.
gainesville_districts = pd.DataFrame(columns=['District', 'Lat', 'Long'])
north_boundary = gainesville_north
for row in range(DISTRICT_ROWS):
    south_boundary = north_boundary - lat_segment
    row_center = (north_boundary + south_boundary) / 2
    west_boundary = gainesville_west
    for column in range(DISTRICT_COLUMNS):
        east_boundary = west_boundary - long_segment
        column_center = (east_boundary + west_boundary) / 2
        west_boundary = east_boundary
        gainesville_districts = gainesville_districts.append(pd.Series(['{}-{}'.format(row, column), row_center, column_center], index=gainesville_districts.columns), ignore_index=True)
    north_boundary = south_boundary
gainesville_districts.head()

Unnamed: 0,District,Lat,Long
0,0-0,29.70617,-82.447966
1,0-1,29.70617,-82.435976
2,0-2,29.70617,-82.423986
3,0-3,29.70617,-82.411996
4,0-4,29.70617,-82.400005


In [6]:
# Create map of Gainesville to see our districts.
general_map = folium.Map(location=[GAINESVILLE_LATITUDE, GAINESVILLE_LONGITUDE], zoom_start=12)

# Add markers to the map for each districts.
for index, row in gainesville_districts.iterrows():
    folium.CircleMarker(
        [row['Lat'], row['Long']],
        radius=17,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(general_map)  
    
general_map

## Part 2 - Getting businesses for each district

In [7]:
# The code was removed by Watson Studio for sharing.

In [8]:
# Prepares our venue DataFrame.
district_venues = pd.DataFrame(columns=[
                            'District',
                            'Lat',
                            'Long', 
                            'Venue', 
                            'Venue Latitude', 
                            'Venue Longitude', 
                            'Venue Category'])

In [9]:
# Function for getting all venues in an area
def get_venues(lat, long, radius, limit):
    # create the API request URL.
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION,
        lat, 
        long, 
        radius, 
        limit)
    
    # Load our results.
    r = requests.get(url)
    results = r.json()
    
    # Get the venues.
    try:
        venues = results["response"]['groups'][0]['items']
        return venues
    except KeyError:
        print('Trouble finding venues for {}. Returned response was:'.format(row['District']), results["response"])
    return []
        

In [10]:
district_count = len(gainesville_districts)
for index, row in gainesville_districts.iterrows():
    if index % 10 == 0:
        print('District {} of {}...'.format(index, district_count))
    venues = get_venues(row['Lat'], row['Long'], 500, 100)
    # Add each venue to our DataFrame.
    for venue in venues:
        district_venues = district_venues.append(pd.Series([
            row['District'],
            row['Lat'],
            row['Long'],
            venue['venue']['name'],
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']], index=district_venues.columns), ignore_index=True)

District 0 of 176...
District 10 of 176...
District 20 of 176...
District 30 of 176...
District 40 of 176...
District 50 of 176...
District 60 of 176...
District 70 of 176...
District 80 of 176...
District 90 of 176...
District 100 of 176...
District 110 of 176...
District 120 of 176...
District 130 of 176...
District 140 of 176...
District 150 of 176...
District 160 of 176...
District 170 of 176...


In [11]:
# Quick preview of our venues.
print(district_venues.shape)
district_venues.head()

(771, 7)


Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0-0,29.70617,-82.447966,The Hammock Lake,29.707468,-82.44342,Lake
1,0-1,29.70617,-82.435976,"Agile Sports Analytics, LLC",29.704679,-82.431546,Sports Club
2,0-2,29.70617,-82.423986,Flying Ten Airport-OJ8,29.702759,-82.425678,Airport Terminal
3,0-4,29.70617,-82.400005,Db tennis club,29.704029,-82.396687,Tennis Court
4,0-5,29.70617,-82.388015,Starbucks,29.704383,-82.389313,Coffee Shop


## Part 3 - Get just restaurants for each district

In [12]:
# Sets up our categories
food_category = '4d4b7105d754a06374d81259'

In [39]:
# Prepares our venue DataFrame.
district_restaurants = pd.DataFrame(columns=[
                            'District',
                            'Lat',
                            'Long',
                            'Venue', 
                            'Venue Latitude', 
                            'Venue Longitude', 
                            'Venue Category'])

In [40]:
# Function for getting all venues in an area
def get_restaurants(lat, long, radius, limit):
    # create the API request URL.
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&categoryId={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION,
        food_category,
        lat, 
        long, 
        radius, 
        limit)
    
    # Load our results.
    r = requests.get(url)
    results = r.json()
    
    # Get the venues.
    try:
        venues = results["response"]['groups'][0]['items']
        return venues
    except KeyError:
        print('Trouble finding venues for {}. Returned response was:'.format(row['District']), results["response"])
    return []
        

In [41]:
district_count = len(gainesville_districts)
for index, row in gainesville_districts.iterrows():
    if index % 10 == 0:
        print('District {} of {}...'.format(index, district_count))
    venues = get_restaurants(row['Lat'], row['Long'], 500, 100)
    # Add each venue to our DataFrame.
    for venue in venues:
        district_restaurants = district_restaurants.append(pd.Series([
            row['District'],
            row['Lat'],
            row['Long'],
            venue['venue']['name'],
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']], index=district_restaurants.columns), ignore_index=True)

District 0 of 176...
District 10 of 176...
District 20 of 176...
District 30 of 176...
District 40 of 176...
District 50 of 176...
District 60 of 176...
District 70 of 176...
District 80 of 176...
District 90 of 176...
District 100 of 176...
District 110 of 176...
District 120 of 176...
District 130 of 176...
District 140 of 176...
District 150 of 176...
District 160 of 176...
District 170 of 176...


In [42]:
# Quick preview of our venues.
print(district_restaurants.shape)
district_restaurants.head()

(352, 7)


Unnamed: 0,District,Lat,Long,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0-5,29.70617,-82.388015,China Bowl,29.702655,-82.390303,Chinese Restaurant
1,0-5,29.70617,-82.388015,Cedar River Seafood,29.701723,-82.387995,Seafood Restaurant
2,0-5,29.70617,-82.388015,SUBWAY,29.702775,-82.390566,Sandwich Place
3,0-5,29.70617,-82.388015,Flowers Bakery,29.702798,-82.387151,Bakery
4,0-5,29.70617,-82.388015,Volcanic Sushi + Sake,29.702961,-82.390351,Sushi Restaurant


In [17]:
# Let's see how many districts have at least one restaurant.
print('Total districts with at least one restaurant: {}'.format(len(district_restaurants.groupby('District').count())))

Total districts with at least one restaurant: 78


## Part 4 - Determining which districts are best for new bakery

### First, cluster our districts using Kmeans

In [18]:
# Get our dummified categories.
venue_dummified = pd.get_dummies(district_venues[['Venue Category']], prefix="", prefix_sep="")

# Add our district back to dataframe.
venue_dummified['District'] = district_venues['District'] 

# Move district column to the beginning.
# Thanks to https://stackoverflow.com/a/56479671 😅
venue_dummified = venue_dummified[ ['District'] + [ col for col in venue_dummified.columns if col != 'District' ] ]

In [19]:
# Review our dataframe.
print('Shape:', venue_dummified.shape)
venue_dummified.head()

Shape: (771, 206)


Unnamed: 0,District,ATM,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Art Gallery,...,Tourist Information Center,Toy / Game Store,Trail,Tree,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store
0,0-0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0-1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0-2,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0-4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0-5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
# Calculate our mean venue categories per district.
venue_groups = venue_dummified.groupby('District').mean().reset_index()
print('Shape:',venue_groups.shape)
venue_groups.head()

Shape: (126, 206)


Unnamed: 0,District,ATM,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Art Gallery,...,Tourist Information Center,Toy / Game Store,Trail,Tree,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store
0,0-0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0-2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
num_top_venues = 10

# Create columns according to number of top venues.
indicators = ['st', 'nd', 'rd']
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new empty dataframe with our new columns and add in our districts.
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = venue_groups['District']

# Cycle over district groups...
for index, row in venue_groups.iterrows():
    # And add in num_top_venues of the top venue categories to each district.
    district_venues_sorted.iloc[index, 1:] = row.iloc[1:].sort_values(ascending=False).index.values[0:num_top_venues]

district_venues_sorted.head()

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0-0,Lake,Women's Store,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
1,0-1,Sports Club,Women's Store,Garden,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court,Food
2,0-10,Electronics Store,Women's Store,Fast Food Restaurant,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court,Food
3,0-14,Pet Store,Convenience Store,Women's Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court,Food
4,0-2,Airport Terminal,Women's Store,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court


In [28]:
# Our number of clusters.
kclusters = 10

# Calculate our KMeans.
district_groups_clustering = venue_groups.drop('District', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(district_groups_clustering)

In [29]:
# Add our clustering labels to our dataframe.
district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [30]:
# Start preparing our final dataframe.
district_df_final = gainesville_districts.copy()

# Merge in our district clustering results.
district_df_final = district_df_final.join(district_venues_sorted.set_index('District'), on='District')

# If any district didn't have venues or ended with NaN scores, let's drop it.
district_df_final = district_df_final.dropna()

# Make sure the cluster labels are in int for our calculations.
district_df_final['Cluster Labels'] = district_df_final['Cluster Labels'].astype('int32')

district_df_final

Unnamed: 0,District,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0-0,29.706170,-82.447966,1,Lake,Women's Store,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
1,0-1,29.706170,-82.435976,7,Sports Club,Women's Store,Garden,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court,Food
2,0-2,29.706170,-82.423986,1,Airport Terminal,Women's Store,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
4,0-4,29.706170,-82.400005,5,Tennis Court,Women's Store,Garden,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court,Food
5,0-5,29.706170,-82.388015,1,Pharmacy,Business Service,Sandwich Place,Skate Park,Electronics Store,Seafood Restaurant,Coffee Shop,Bank,Chinese Restaurant,Sushi Restaurant
6,0-6,29.706170,-82.376025,9,Accessories Store,Women's Store,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
7,0-7,29.706170,-82.364035,1,Breakfast Spot,Cosmetics Shop,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
8,0-8,29.706170,-82.352045,1,Farmers Market,Park,Sandwich Place,Golf Course,Liquor Store,Construction & Landscaping,Convenience Store,Moving Target,Video Game Store,Video Store
9,0-9,29.706170,-82.340055,1,Business Service,American Restaurant,Concert Hall,Women's Store,Fish & Chips Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service
10,0-10,29.706170,-82.328065,1,Electronics Store,Women's Store,Fast Food Restaurant,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court,Food


In [31]:
# Create our map.
map_clusters = folium.Map(location=[GAINESVILLE_LATITUDE, GAINESVILLE_LONGITUDE], zoom_start=10)

# Set up different colors for each cluster
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add each neighborhood as a marker on the map.
markers_colors = []
for lat, lon, poi, cluster in zip(district_df_final['Lat'], district_df_final['Long'], district_df_final['District'], district_df_final['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Next, determine which clusters contain the most bakeries

We'll use this to determine which type of district best works for a bakery.

In [33]:
# Now, get the districts with a bakery in it.
bakery_districts = district_restaurants[district_restaurants['Venue Category'] == 'Bakery']
print('Total districts with bakeries: {}'.format(len(bakery_districts.groupby('District').count())))
bakery_districts.head()

Total districts with bakeries: 9


Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
3,0-5,29.70617,-82.388015,Flowers Bakery,29.702798,-82.387151,Bakery
8,0-8,29.70617,-82.352045,Walmart Bakery,29.70644,-82.35684,Bakery
29,2-11,29.685326,-82.316075,Sunbeam Bakery-Wholesale,29.687512,-82.319959,Bakery
30,2-13,29.685326,-82.292094,Country Hearth Bakery,29.681937,-82.291785,Bakery
38,3-5,29.674903,-82.388015,Uppercrust,29.674301,-82.387022,Bakery


In [35]:
# Determine which cluster has most bakeries in it.
bakery_districts.merge(district_df_final)['Cluster Labels'].value_counts()

1    9
2    1
Name: Cluster Labels, dtype: int64

### Finally, determine which districts in that cluster do not have any bakery

In [60]:
# Get all districts with at least one restaurant.
districts_with_restaurants = district_df_final[district_df_final['District'].isin(district_restaurants['District'])]
print(districts_with_restaurants.shape)
districts_with_restaurants.head()

(76, 14)


Unnamed: 0,District,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,0-5,29.70617,-82.388015,1,Pharmacy,Business Service,Sandwich Place,Skate Park,Electronics Store,Seafood Restaurant,Coffee Shop,Bank,Chinese Restaurant,Sushi Restaurant
7,0-7,29.70617,-82.364035,1,Breakfast Spot,Cosmetics Shop,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
8,0-8,29.70617,-82.352045,1,Farmers Market,Park,Sandwich Place,Golf Course,Liquor Store,Construction & Landscaping,Convenience Store,Moving Target,Video Game Store,Video Store
9,0-9,29.70617,-82.340055,1,Business Service,American Restaurant,Concert Hall,Women's Store,Fish & Chips Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service
25,1-9,29.695748,-82.340055,1,Furniture / Home Store,Boutique,Automotive Shop,Business Service,Park,Coffee Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck


In [61]:
# Get all districts with at least one restaurant within cluster 1 (the one with most bakeries).
cluster_one_restaurant_districts = districts_with_restaurants[districts_with_restaurants['Cluster Labels'] == 1]
print(cluster_one_restaurant_districts.shape)
cluster_one_restaurant_districts.head()

(67, 14)


Unnamed: 0,District,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,0-5,29.70617,-82.388015,1,Pharmacy,Business Service,Sandwich Place,Skate Park,Electronics Store,Seafood Restaurant,Coffee Shop,Bank,Chinese Restaurant,Sushi Restaurant
7,0-7,29.70617,-82.364035,1,Breakfast Spot,Cosmetics Shop,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
8,0-8,29.70617,-82.352045,1,Farmers Market,Park,Sandwich Place,Golf Course,Liquor Store,Construction & Landscaping,Convenience Store,Moving Target,Video Game Store,Video Store
9,0-9,29.70617,-82.340055,1,Business Service,American Restaurant,Concert Hall,Women's Store,Fish & Chips Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service
25,1-9,29.695748,-82.340055,1,Furniture / Home Store,Boutique,Automotive Shop,Business Service,Park,Coffee Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck


In [62]:
# Get all districts with at least one restaurant within cluster 1 that do not have a bakery in it.
potential_districts = cluster_one_restaurant_districts[False == cluster_one_restaurant_districts['District'].isin(bakery_districts['District'])]
print(potential_districts.shape)
potential_districts.head()

(59, 14)


Unnamed: 0,District,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,0-7,29.70617,-82.364035,1,Breakfast Spot,Cosmetics Shop,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service,Food Court
9,0-9,29.70617,-82.340055,1,Business Service,American Restaurant,Concert Hall,Women's Store,Fish & Chips Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service
25,1-9,29.695748,-82.340055,1,Furniture / Home Store,Boutique,Automotive Shop,Business Service,Park,Coffee Shop,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck
30,1-14,29.695748,-82.280104,1,Airport,Airport Lounge,Airport Service,Food,Women's Store,Fish & Chips Shop,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant
32,2-0,29.685326,-82.447966,1,Bookstore,Ice Cream Shop,Donut Shop,Golf Course,Mexican Restaurant,Seafood Restaurant,Women's Store,Fish Market,Fried Chicken Joint,French Restaurant


In [64]:
# Create our map.
map_clusters = folium.Map(location=[GAINESVILLE_LATITUDE, GAINESVILLE_LONGITUDE], zoom_start=12)

# Set up different colors for each cluster
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add each neighborhood as a marker on the map.
markers_colors = []
for lat, lon, poi, cluster in zip(potential_districts['Lat'], potential_districts['Long'], potential_districts['District'], potential_districts['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters