# Dallas Cultural Centers Capstone Project
###### First, I will download all the packages and data needed for this analysis

In [2]:
import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0           conda-forge
    geopy:          

#### Downloading Data from Dallas Open Data ('Dallas Cultural Centers') and cleaning it.

In [31]:
!wget -q -O 'dallas_data.json' https://www.dallasopendata.com/resource/6cte-99qc.json
print('Data downloaded!')

Data downloaded!


In [32]:
with open('dallas_data.json') as json_data:
    dallas_data = json.load(json_data)
dallas_data  
    

[{'facility_name': 'Latino Cultural Center',
  'address': '2600 Live Oak St',
  'city': 'Dallas',
  'state': 'TX',
  'zip_code': '75204',
  'phone_number': '214.671.0045',
  'latitude': '32.787858700000001',
  'longitude': '-96.792098600000003',
  'hours': 'The Latino Cultural Center is open Tuesday through Saturday from 10 am to 5 pm, and is closed on Sunday and Monday.',
  'description': 'The Latino Cultural Center (LCC), was designed by world-renowned architect Ricardo Legorreta and dedicated on September 16, 2003. A multidisciplinary arts center that presents Latino artists in the visual and performing arts, film, and literature, the LCC also supports local artists and arts organizations by developing and celebrating Latino art and culture.',
  'geolocation': {'latitude': '32.787859',
   'longitude': '-96.792099',
   'human_address': '{"address": "2600 Live Oak St", "city": "Dallas", "state": "TX", "zip": "75204"}'},
  ':@computed_region_28rh_izyk': '3162',
  ':@computed_region_3qu

In [33]:
# define the dataframe columns
column_names = ['FACILITY NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP CODE','PHONE NUMBER', 'LATITUDE', 'LONGITUDE', 'HOURS', 'DESCRIPTION', 'GEOLOCATION'] 

# instantiate the dataframe
centers = pd.DataFrame(columns=column_names)

In [34]:
for data in dallas_data:
    facility_name = data['facility_name'] 
    facility_address = data['address']
    facility_city = data['city']
    facility_state = data['state']
    facility_zipcode = data['zip_code']
    facility_phone = data['phone_number']
    
    facility_lat = data['latitude']
    facility_lon = data['longitude']
    
   
    facility_description = data['description']
    facility_geo = data['geolocation']
    
    centers = centers.append({'FACILITY NAME': facility_name,
                                          'ADDRESS': facility_address,
                                          'CITY': facility_city,
                                          'STATE': facility_state,
                                          'ZIP CODE': facility_zipcode,
                                          'PHONE NUMBER':facility_phone,
                                          'LATITUDE': facility_lat,
                                          'LONGITUDE': facility_lon,
                
                                          'DESCRIPTION': facility_description,
                                          'GEOLOCATION': facility_geo}, ignore_index=True)

In [35]:
centers

Unnamed: 0,FACILITY NAME,ADDRESS,CITY,STATE,ZIP CODE,PHONE NUMBER,LATITUDE,LONGITUDE,HOURS,DESCRIPTION,GEOLOCATION
0,Latino Cultural Center,2600 Live Oak St,Dallas,TX,75204,214.671.0045,32.7878587,-96.7920986,,"The Latino Cultural Center (LCC), was designed...","{'latitude': '32.787859', 'longitude': '-96.79..."
1,Bath House Cultural Center,521 E. Lawther Drive,Dallas,TX,75218,214.670.8749,32.8407873,-96.7175475,,Located on the shores of beautiful White Rock ...,"{'latitude': '32.840787', 'longitude': '-96.71..."
2,Oak Cliff Cultural Center,223 W. Jefferson Blvd.,Dallas,TX,75208,214.670.3777,32.7434762,-96.8279692,,The Oak Cliff Cultural Center provides communi...,"{'latitude': '32.743476', 'longitude': '-96.82..."
3,Dallas City Performance Hall,2520 Flora St.,Dallas,TX,75201,214.671.1450,32.7906865,-96.7979007,,The Dallas City Performance Hall is a multi-di...,"{'latitude': '32.790687', 'longitude': '-96.79..."
4,Majestic Theatre,1925 Elm Street,Dallas,TX,75201,214.670.3687,32.7821214,-96.7967078,,One of the most elegant and historic performin...,"{'latitude': '32.782121', 'longitude': '-96.79..."
5,Morton H. Meyerson Symphony Center,2301 Flora,Dallas,TX,75201,214.670.3600,32.7898987,-96.8007843,,"The Morton H. Meyerson Symphony Center, design...","{'latitude': '32.789899', 'longitude': '-96.80..."
6,South Dallas Cultural Center,3400 South Fitzhugh,Dallas,TX,75210,214.939.2787,32.7715501,-96.7596742,,The South Dallas Cultural Center became a real...,"{'latitude': '32.77155', 'longitude': '-96.759..."


In [36]:
centers= centers.drop(['PHONE NUMBER', 'HOURS', 'DESCRIPTION', 'GEOLOCATION'], axis=1)
centers

Unnamed: 0,FACILITY NAME,ADDRESS,CITY,STATE,ZIP CODE,LATITUDE,LONGITUDE
0,Latino Cultural Center,2600 Live Oak St,Dallas,TX,75204,32.7878587,-96.7920986
1,Bath House Cultural Center,521 E. Lawther Drive,Dallas,TX,75218,32.8407873,-96.7175475
2,Oak Cliff Cultural Center,223 W. Jefferson Blvd.,Dallas,TX,75208,32.7434762,-96.8279692
3,Dallas City Performance Hall,2520 Flora St.,Dallas,TX,75201,32.7906865,-96.7979007
4,Majestic Theatre,1925 Elm Street,Dallas,TX,75201,32.7821214,-96.7967078
5,Morton H. Meyerson Symphony Center,2301 Flora,Dallas,TX,75201,32.7898987,-96.8007843
6,South Dallas Cultural Center,3400 South Fitzhugh,Dallas,TX,75210,32.7715501,-96.7596742


#### Convert latitude and longitude to float to create a map of Dallas with the cultural centers.

In [37]:
centers['LATITUDE']=centers['LATITUDE'].astype(float)
centers['LONGITUDE']=centers['LONGITUDE'].astype(float)

In [38]:
address = 'Dallas, TX'

geolocator = Nominatim(user_agent="dallas_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Dallas are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Dallas are 32.7762719, -96.7968559.


#### Created a map to show the location of the cultural centers with their zipcodes.

In [39]:
# create map of New York using latitude and longitude values
map_dallas = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, name, zipcode in zip(centers['LATITUDE'], centers['LONGITUDE'], centers['FACILITY NAME'], centers['ZIP CODE']):
    label = '{}, {}'.format(name, zipcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_dallas)  
    
map_dallas

# Methodology

#### Now, I will used the foursquare API to explore the area surrounding the cultural centers. I will start by exploring the venues around the Latino Cultural Center.

In [40]:
CLIENT_ID = 'ZJTZRXYC1EG4C1LKMK3AOWDXEBTATXORJEN1HILYXO3EI5ZE'
CLIENT_SECRET = '02YL4XROAQK2WJRQPRRSZBKW2TJ5SU2IWJBINGOVHGO2GQ3E'
VERSION = '20180605'

In [41]:
centers.loc[0, 'FACILITY NAME']

'Latino Cultural Center'

In [42]:
facility_latitude = centers.loc[0, 'LATITUDE']
facility_longitude = centers.loc[0, 'LONGITUDE'] 

facility_name = centers.loc[0, 'FACILITY NAME']

print('Latitude and longitude values of {} are {}, {}.'.format(facility_name, 
                                                               facility_latitude, 
                                                               facility_longitude))

Latitude and longitude values of Latino Cultural Center are 32.7878587, -96.7920986.


In [43]:
LIMIT=50
radius=500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    facility_latitude, 
    facility_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=ZJTZRXYC1EG4C1LKMK3AOWDXEBTATXORJEN1HILYXO3EI5ZE&client_secret=02YL4XROAQK2WJRQPRRSZBKW2TJ5SU2IWJBINGOVHGO2GQ3E&v=20180605&ll=32.7878587,-96.7920986&radius=500&limit=50'

In [44]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ebc7ea3949393001bd5d439'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Central Dallas',
  'headerFullLocation': 'Central Dallas, Dallas',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 25,
  'suggestedBounds': {'ne': {'lat': 32.7923587045, 'lng': -96.78675578807776},
   'sw': {'lat': 32.7833586955, 'lng': -96.79744141192225}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4dd06bede4cd130e169844f7',
       'name': 'Wyly Theatre',
       'location': {'address': '2400 Flora St',
        'crossStreet': 'Leonard street',
        'lat': 32.79008761997244,
        'lng': -96.7964906852331

#### After extracting the top 50 venues in a 500 miles radius, I got the categories of the venues and organized them in a pandas dataframe.

In [45]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [47]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues)


filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]


nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)


nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(25)

Unnamed: 0,name,categories,lat,lng
0,Wyly Theatre,Theater,32.790088,-96.796491
1,Lizard Lounge,Nightclub,32.784884,-96.790225
2,Levú,Nightclub,32.784939,-96.78922
3,Smoothie King,Smoothie Shop,32.787142,-96.795705
4,Dunlap-Swain Tire Co. Inc,Auto Garage,32.790072,-96.794021
5,Dallas Theater Center,Theater,32.790035,-96.796617
6,Sheraton Fitness Center,Gym,32.784972,-96.794999
7,Andrew's Fine Beverages,Liquor Store,32.790195,-96.795006
8,Sheraton Dallas Hotel,Hotel,32.785128,-96.794956
9,Freshii,Salad Place,32.787039,-96.795685


In [48]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

25 venues were returned by Foursquare.


#### Now I'll do the same exploration on the remaining cultural centers.

In [49]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
    
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
       
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['FACILITY NAME', 
                  'LATITUDE', 
                  'LONGITUDE', 
                  'VENUE', 
                  'VENUE LATITUDE', 
                  'VENUE LONGITUDE', 
                  'VENUE CATEGORY']
    
    return(nearby_venues)

In [51]:
dallas_centers_venues = getNearbyVenues(names=centers['FACILITY NAME'],
                                   latitudes=centers['LATITUDE'],
                                   longitudes=centers['LONGITUDE']
                                  )

Latino Cultural Center
Bath House Cultural Center
Oak Cliff Cultural Center
Dallas City Performance Hall
Majestic Theatre
Morton H. Meyerson Symphony Center
South Dallas Cultural Center


In [52]:
print(dallas_centers_venues.shape)
dallas_centers_venues.head()

(201, 7)


Unnamed: 0,FACILITY NAME,LATITUDE,LONGITUDE,VENUE,VENUE LATITUDE,VENUE LONGITUDE,VENUE CATEGORY
0,Latino Cultural Center,32.787859,-96.792099,Wyly Theatre,32.790088,-96.796491,Theater
1,Latino Cultural Center,32.787859,-96.792099,Lizard Lounge,32.784884,-96.790225,Nightclub
2,Latino Cultural Center,32.787859,-96.792099,Levú,32.784939,-96.78922,Nightclub
3,Latino Cultural Center,32.787859,-96.792099,Smoothie King,32.787142,-96.795705,Smoothie Shop
4,Latino Cultural Center,32.787859,-96.792099,Dunlap-Swain Tire Co. Inc,32.790072,-96.794021,Auto Garage


#### The amount of venues returned for each cultural center. We can see that Bath House Cultural Center and South Dallas Cultural Center's amount of near venues is low compared to ther other centers. 

In [53]:
dallas_centers_venues.groupby('FACILITY NAME').count()

Unnamed: 0_level_0,LATITUDE,LONGITUDE,VENUE,VENUE LATITUDE,VENUE LONGITUDE,VENUE CATEGORY
FACILITY NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bath House Cultural Center,3,3,3,3,3,3
Dallas City Performance Hall,33,33,33,33,33,33
Latino Cultural Center,25,25,25,25,25,25
Majestic Theatre,50,50,50,50,50,50
Morton H. Meyerson Symphony Center,50,50,50,50,50,50
Oak Cliff Cultural Center,32,32,32,32,32,32
South Dallas Cultural Center,8,8,8,8,8,8


There's oportunity to further develop the offerings around the Bath House Cultural Center and South Dallas Cultural Center.

In [54]:
print('There are {} uniques categories.'.format(len(dallas_centers_venues['VENUE CATEGORY'].unique())))

There are 80 uniques categories.


# Further analyzed each center area. Added dummies for each venue category.


In [55]:
dallas_onehot = pd.get_dummies(dallas_centers_venues[['VENUE CATEGORY']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dallas_onehot['FACILITY NAME'] = dallas_centers_venues['FACILITY NAME'] 

# move neighborhood column to the first column
fixed_columns = [dallas_onehot.columns[-1]] + list(dallas_onehot.columns[:-1])
dallas_onehot = dallas_onehot[fixed_columns]

dallas_onehot.head()

Unnamed: 0,FACILITY NAME,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Athletics & Sports,Auto Garage,BBQ Joint,Bank,Bar,Basketball Court,Bistro,Botanical Garden,Boutique,Breakfast Spot,Brewery,Building,Burger Joint,Café,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Diner,Discount Store,Dive Bar,Dog Run,Fast Food Restaurant,Food Truck,French Restaurant,Fried Chicken Joint,Gift Shop,Grocery Store,Gym,Harbor / Marina,Hotel,Hotel Bar,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Laundromat,Liquor Store,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Movie Theater,New American Restaurant,Nightclub,Noodle House,Opera House,Park,Performing Arts Venue,Pet Store,Pharmacy,Pie Shop,Pizza Place,Planetarium,Playground,Plaza,Pool,Public Art,Resort,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Smoothie Shop,Sports Bar,Steakhouse,Supermarket,Taco Place,Thai Restaurant,Theater,Volleyball Court,Yoga Studio
0,Latino Cultural Center,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,Latino Cultural Center,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Latino Cultural Center,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Latino Cultural Center,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4,Latino Cultural Center,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [56]:
dallas_grouped = dallas_onehot.groupby('FACILITY NAME').mean().reset_index()
dallas_grouped

Unnamed: 0,FACILITY NAME,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Athletics & Sports,Auto Garage,BBQ Joint,Bank,Bar,Basketball Court,Bistro,Botanical Garden,Boutique,Breakfast Spot,Brewery,Building,Burger Joint,Café,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Diner,Discount Store,Dive Bar,Dog Run,Fast Food Restaurant,Food Truck,French Restaurant,Fried Chicken Joint,Gift Shop,Grocery Store,Gym,Harbor / Marina,Hotel,Hotel Bar,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Laundromat,Liquor Store,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Movie Theater,New American Restaurant,Nightclub,Noodle House,Opera House,Park,Performing Arts Venue,Pet Store,Pharmacy,Pie Shop,Pizza Place,Planetarium,Playground,Plaza,Pool,Public Art,Resort,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Smoothie Shop,Sports Bar,Steakhouse,Supermarket,Taco Place,Thai Restaurant,Theater,Volleyball Court,Yoga Studio
0,Bath House Cultural Center,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Dallas City Performance Hall,0.060606,0.0,0.060606,0.030303,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.121212,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.060606,0.030303,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.030303,0.030303,0.090909,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.030303,0.0,0.060606,0.0,0.030303,0.0,0.030303,0.0,0.0,0.0,0.060606,0.0,0.0
2,Latino Cultural Center,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.16,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0
3,Majestic Theatre,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.06,0.08,0.08,0.0,0.0,0.0,0.02,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.04,0.0,0.12,0.02,0.02,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.04,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.02,0.0,0.0,0.04,0.02,0.02,0.0,0.0
4,Morton H. Meyerson Symphony Center,0.04,0.0,0.06,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.02,0.02,0.0,0.0,0.04,0.06,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.04,0.0,0.04,0.02,0.0,0.0,0.1,0.0,0.0,0.0,0.04,0.0,0.02
5,Oak Cliff Cultural Center,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.03125,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.21875,0.0,0.03125,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.03125,0.0,0.0,0.03125,0.03125,0.03125,0.0,0.0,0.0,0.0
6,South Dallas Cultural Center,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0


Top 10 most common venues per center.

In [58]:
num_top_venues = 10

for hood in dallas_grouped['FACILITY NAME']:
    print("----"+hood+"----")
    temp = dallas_grouped[dallas_grouped['FACILITY NAME'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bath House Cultural Center----
                     venue  freq
0          Harbor / Marina  0.33
1                     Park  0.33
2               Public Art  0.33
3      American Restaurant  0.00
4  New American Restaurant  0.00
5                Pet Store  0.00
6    Performing Arts Venue  0.00
7              Opera House  0.00
8             Noodle House  0.00
9                Nightclub  0.00


----Dallas City Performance Hall----
                   venue  freq
0             Food Truck  0.12
1  Performing Arts Venue  0.09
2    American Restaurant  0.06
3             Art Museum  0.06
4         Sandwich Place  0.06
5    Japanese Restaurant  0.06
6                Theater  0.06
7                Dog Run  0.03
8      Convenience Store  0.03
9         Cosmetics Shop  0.03


----Latino Cultural Center----
                  venue  freq
0                 Hotel  0.16
1               Theater  0.08
2             Nightclub  0.08
3           Coffee Shop  0.08
4   American Restaurant  0.04
5        

In [60]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [61]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['FACILITY NAME']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
centers_venues_sorted = pd.DataFrame(columns=columns)
centers_venues_sorted['FACILITY NAME'] = dallas_grouped['FACILITY NAME']

for ind in np.arange(dallas_grouped.shape[0]):
    centers_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dallas_grouped.iloc[ind, :], num_top_venues)

centers_venues_sorted

Unnamed: 0,FACILITY NAME,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bath House Cultural Center,Harbor / Marina,Park,Public Art,Discount Store,Convenience Store,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Diner
1,Dallas City Performance Hall,Food Truck,Performing Arts Venue,Theater,Sandwich Place,Japanese Restaurant,American Restaurant,Art Museum,Steakhouse,Breakfast Spot,Dog Run
2,Latino Cultural Center,Hotel,Theater,Nightclub,Coffee Shop,American Restaurant,Plaza,Gym,Fast Food Restaurant,Liquor Store,Mexican Restaurant
3,Majestic Theatre,Hotel,Cocktail Bar,Coffee Shop,Café,New American Restaurant,Taco Place,Gym,French Restaurant,Mobile Phone Shop,Mexican Restaurant
4,Morton H. Meyerson Symphony Center,Steakhouse,Food Truck,Art Museum,Burger Joint,Mexican Restaurant,Performing Arts Venue,American Restaurant,Theater,BBQ Joint,New American Restaurant
5,Oak Cliff Cultural Center,Mexican Restaurant,Fried Chicken Joint,Discount Store,Modern European Restaurant,Movie Theater,Coffee Shop,Café,Burger Joint,Pet Store,Brewery
6,South Dallas Cultural Center,Pharmacy,Volleyball Court,Athletics & Sports,Pizza Place,Planetarium,Basketball Court,Botanical Garden,Miscellaneous Shop,Discount Store,Cosmetics Shop


# K Means Clustering 

#### Not necessary because of data size, but added cluster labels to uncover any other insights. Discovered that two of the centers don't belong to any cluster, other two belong in the same cluster, and the other centers are unique. If I had more data about other cultural centers in other parts of Texas or adjacent cities, I could compare these centers furhter and determine if there's any pattern.

In [78]:
kclusters = 5

dallas_grouped_clustering = dallas_grouped.drop('FACILITY NAME', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dallas_grouped_clustering)

kmeans.labels_[0:10]

array([2, 0, 3, 3, 0, 4, 1], dtype=int32)

In [79]:


dallas_venues_merged = centers

dallas_venues_merged = dallas_venues_merged.join(centers_venues_sorted.set_index('FACILITY NAME'), on='FACILITY NAME', how= 'right')

dallas_venues_merged



Unnamed: 0,FACILITY NAME,ADDRESS,CITY,STATE,ZIP CODE,LATITUDE,LONGITUDE,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Latino Cultural Center,2600 Live Oak St,Dallas,TX,75204,32.787859,-96.792099,3,Hotel,Theater,Nightclub,Coffee Shop,American Restaurant,Plaza,Gym,Fast Food Restaurant,Liquor Store,Mexican Restaurant
1,Bath House Cultural Center,521 E. Lawther Drive,Dallas,TX,75218,32.840787,-96.717547,2,Harbor / Marina,Park,Public Art,Discount Store,Convenience Store,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Diner
2,Oak Cliff Cultural Center,223 W. Jefferson Blvd.,Dallas,TX,75208,32.743476,-96.827969,4,Mexican Restaurant,Fried Chicken Joint,Discount Store,Modern European Restaurant,Movie Theater,Coffee Shop,Café,Burger Joint,Pet Store,Brewery
3,Dallas City Performance Hall,2520 Flora St.,Dallas,TX,75201,32.790686,-96.797901,0,Food Truck,Performing Arts Venue,Theater,Sandwich Place,Japanese Restaurant,American Restaurant,Art Museum,Steakhouse,Breakfast Spot,Dog Run
4,Majestic Theatre,1925 Elm Street,Dallas,TX,75201,32.782121,-96.796708,3,Hotel,Cocktail Bar,Coffee Shop,Café,New American Restaurant,Taco Place,Gym,French Restaurant,Mobile Phone Shop,Mexican Restaurant
5,Morton H. Meyerson Symphony Center,2301 Flora,Dallas,TX,75201,32.789899,-96.800784,0,Steakhouse,Food Truck,Art Museum,Burger Joint,Mexican Restaurant,Performing Arts Venue,American Restaurant,Theater,BBQ Joint,New American Restaurant
6,South Dallas Cultural Center,3400 South Fitzhugh,Dallas,TX,75210,32.77155,-96.759674,1,Pharmacy,Volleyball Court,Athletics & Sports,Pizza Place,Planetarium,Basketball Court,Botanical Garden,Miscellaneous Shop,Discount Store,Cosmetics Shop


In [80]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)


x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(dallas_venues_merged['LATITUDE'], dallas_venues_merged['LONGITUDE'], dallas_venues_merged['FACILITY NAME'], dallas_venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Since two pairs of centers belong to two different clusters, I want to further examine these two clusters.

In [81]:
dallas_venues_merged.loc[dallas_venues_merged['Cluster Labels'] == 0, dallas_venues_merged.columns[[1] + list(range(5, dallas_venues_merged.shape[1]))]]



Unnamed: 0,ADDRESS,LATITUDE,LONGITUDE,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,2520 Flora St.,32.790686,-96.797901,0,Food Truck,Performing Arts Venue,Theater,Sandwich Place,Japanese Restaurant,American Restaurant,Art Museum,Steakhouse,Breakfast Spot,Dog Run
5,2301 Flora,32.789899,-96.800784,0,Steakhouse,Food Truck,Art Museum,Burger Joint,Mexican Restaurant,Performing Arts Venue,American Restaurant,Theater,BBQ Joint,New American Restaurant


In [82]:
dallas_venues_merged.loc[dallas_venues_merged['Cluster Labels'] == 3, dallas_venues_merged.columns[[1] + list(range(5, dallas_venues_merged.shape[1]))]]


Unnamed: 0,ADDRESS,LATITUDE,LONGITUDE,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2600 Live Oak St,32.787859,-96.792099,3,Hotel,Theater,Nightclub,Coffee Shop,American Restaurant,Plaza,Gym,Fast Food Restaurant,Liquor Store,Mexican Restaurant
4,1925 Elm Street,32.782121,-96.796708,3,Hotel,Cocktail Bar,Coffee Shop,Café,New American Restaurant,Taco Place,Gym,French Restaurant,Mobile Phone Shop,Mexican Restaurant
