In [68]:
##################################################################################################################
#                                                                                                                #
# Project Title: Find a place to open a restaurant                                                               #
#                                                                                                                #
# Description:                                                                                                   #
# A customer is looking for a place to open restaurant in Baltimore County, Maryland, USA. In order              #
# to select the place with high population but less restaurants (at least not much fast food), use data from     #
# Foursquare to do analysis and recommend couple location for customer.                                          #
#                                                                                                                #
# Method:                                                                                                        #
# Use the geographic information from Internet (source: unitedstateszipcodes.org) and merge with the venue       #
# information from Foursquare to find out the potential places to open fast food/American style restaurant in    # 
# Baltimore County, Maryland, USA.                                                                               #
#                                                                                                                #
##################################################################################################################

In [69]:
from traitlets.config import Config
import nbformat as nbf
from nbconvert.exporters import HTMLExporter

c = Config()

c.TagRemovePreprocessor.remove_cell_tags = ("remove_cell",)

In [70]:
# Import the libraries we need in the project
# Import requests #Library to handle requests

import pandas as pd #Library for data analysis
import numpy as np #Library to handle data in a vectorized manner
import json
import random #Library for random number generation

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0
import folium as folium

# Libraries for displaying images
from IPython.display import Image
from IPython.core.display import HTML

print('Libraries imported.')


Libraries imported.


In [71]:
# Use pandas read_cvs to read the postal data
postalData = pd.read_csv('zip_code_database.csv')
postalData.head()

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
0,501,UNIQUE,0,Holtsville,,I R S Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,562
1,544,UNIQUE,0,Holtsville,,Irs Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,0
2,601,STANDARD,0,Adjuntas,,"Colinas Del Gigante, Jard De Adjuntas, Urb San...",PR,Adjuntas Municipio,America/Puerto_Rico,787939,,US,18.16,-66.72,0
3,602,STANDARD,0,Aguada,,"Alts De Aguada, Bo Guaniquilla, Comunidad Las ...",PR,Aguada Municipio,America/Puerto_Rico,787939,,US,18.38,-67.18,0
4,603,STANDARD,0,Aguadilla,Ramey,"Bda Caban, Bda Esteves, Bo Borinquen, Bo Ceiba...",PR,Aguadilla Municipio,America/Puerto_Rico,787,,US,18.43,-67.15,0


In [72]:
# Now, clean up the data and keep only the column and data we need. The target area is Baltimore County, Maryland, USA.
MDData = postalData.drop(columns=['decommissioned', 'unacceptable_cities', 'timezone', 'world_region'])
MDData.sort_values(by='irs_estimated_population_2015', ascending=False)

Unnamed: 0,zip,type,primary_city,acceptable_cities,state,county,area_codes,country,latitude,longitude,irs_estimated_population_2015
26692,60629,STANDARD,Chicago,Bedford Park,IL,Cook County,312773872,US,41.78,-87.71,114420
4118,11220,STANDARD,Brooklyn,,NY,Kings County,718,US,40.64,-74.02,111430
34023,77449,STANDARD,Katy,Park Row,TX,Harris County,281346832,US,29.84,-95.73,109280
3135,8701,STANDARD,Lakewood,,NJ,Ocean County,732848908,US,40.09,-74.21,105330
34065,77494,STANDARD,Katy,Park Row,TX,Fort Bend County,281832,US,29.74,-95.83,104450
...,...,...,...,...,...,...,...,...,...,...,...
10029,24042,UNIQUE,Roanoke,,VA,Roanoke City,540,US,37.27,-79.95,0
10030,24043,UNIQUE,Roanoke,,VA,Roanoke City,540,US,37.27,-79.95,0
10031,24044,UNIQUE,Roanoke,,VA,Roanoke City,540,US,37.27,-79.94,0
10032,24045,UNIQUE,Roanoke,,VA,Roanoke City,540,US,37.27,-79.94,0


In [73]:
# Clean up the data and filter to Baltimore County, Maryland, USA. And the population is above 20,000

MDData = MDData[(MDData['type'] == 'UNIQUE') | (MDData['type'] == 'STANDARD')]  
MDData = MDData[(MDData['state'] == 'MD') & (MDData['county'] == 'Baltimore County') & (MDData['irs_estimated_population_2015'] > 20000)]
MDData.sort_values(by='irs_estimated_population_2015', ascending=False, inplace=True)
MDData.groupby(['primary_city'])
MDData

Unnamed: 0,zip,type,primary_city,acceptable_cities,state,county,area_codes,country,latitude,longitude,irs_estimated_population_2015
8929,21234,STANDARD,Parkville,Baltimore,MD,Baltimore County,410443,US,39.38,-76.55,62620
8871,21117,STANDARD,Owings Mills,Garrison,MD,Baltimore County,410,US,39.41,-76.79,52350
8918,21222,STANDARD,Dundalk,Baltimore,MD,Baltimore County,410443,US,39.26,-76.49,50150
8924,21228,STANDARD,Catonsville,Baltimore,MD,Baltimore County,410443,US,39.26,-76.74,44840
8903,21207,STANDARD,Gwynn Oak,"Baltimore, Woodlawn",MD,Baltimore County,667410443,US,39.32,-76.72,42570
8917,21221,STANDARD,Essex,Baltimore,MD,Baltimore County,410443,US,39.3,-76.44,38480
8916,21220,STANDARD,Middle River,Baltimore,MD,Baltimore County,410443,US,39.33,-76.43,38290
8931,21236,STANDARD,Nottingham,Baltimore,MD,Baltimore County,410,US,39.39,-76.48,37560
8860,21093,STANDARD,Lutherville Timonium,"Lutherville, Luthvle Timon, Timonium",MD,Baltimore County,410,US,39.43,-76.64,36780
8936,21244,STANDARD,Windsor Mill,Baltimore,MD,Baltimore County,410,US,39.33,-76.78,34200


In [74]:
MDData.shape

(17, 11)

In [75]:
tags:["remove_cell"]
#Test this 