# Best place to open a new Movie Theater as per business competition in Hyderbad, India

##### Importing the necessary libraries.

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from IPython.display import display
import numpy as np

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

from geopy.geocoders import Nominatim
import folium

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)

##### Scraping neighborhood data of hyderabad from wikipedia [page](https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Hyderabad,_India) 

Function to find latitude and longitude of a place.

In [2]:
geolocator = Nominatim(user_agent="ny_explorer")

def latlng(place):
    address = 'Hyderabad, '+place
    location = None
    i = 0
    while location is None:
        i += 1
        if(i == 10):
            return ('Not Found', 'Not Found')
        try:
            location = geolocator.geocode(address)
        except:
            pass
    
    return (location.latitude, location.longitude)

Scarapping and generating a dataframe.

In [5]:
source = requests.get('https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Hyderabad,_India').text
soup = BeautifulSoup(source, 'lxml')

hyd_neighborhood = {'Neighborhood':[],'Latitude':[],'Longitude':[]}
divs = soup.find_all('div', class_="mw-category-group")

for div in divs:
    ulist = div.find_all('a')
    for li in ulist:
        hyd_neighborhood['Neighborhood'].append(li.text)
        lat, lng = latlng(li.text)
        # print(li.text, lat, lng)
        hyd_neighborhood['Latitude'].append(lat)
        hyd_neighborhood['Longitude'].append(lng)

hyd_neighborhood = pd.DataFrame(hyd_neighborhood)
hyd_neighborhood = hyd_neighborhood[hyd_neighborhood['Latitude'] != 'Not Found']
display(hyd_neighborhood.head(10))
print('Shape of the resulting dataframe is '+str(hyd_neighborhood.shape))
hyd_neighborhood.to_csv('hyderabad.csv')

Unnamed: 0,Neighborhood,Latitude,Longitude
0,A. S. Rao Nagar,17.4799,78.5568
1,A.C. Guards,17.4028,78.4595
2,Abhyudaya Nagar,17.3377,78.5647
3,Abids,17.3895,78.4772
4,Adikmet,17.4095,78.5131
6,Aghapura,17.3892,78.4653
8,Alijah Kotla,17.3605,78.4801
9,Allwyn Colony,17.5044,78.415
10,Alwal,17.5022,78.5089
11,Amberpet,17.3903,78.5165


Shape of the resulting dataframe is (159, 3)


Plotting the neighborhood points on map.

In [8]:
location = None
while location is None:
    try:
        location = geolocator.geocode('Hyderabad, India')
    except:
        pass

latitude = location.latitude
longitude = location.longitude
    
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, neighborhood in zip(hyd_neighborhood['Latitude'], hyd_neighborhood['Longitude'], hyd_neighborhood['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

##### Using the foursquare api to get venue details.

Define API credentials.

In [10]:
clientID = 'LKYB2D43HOP4CK5MYWZ0YGHHJFLCKMDVIJVFS2WRWZJVVF03'
clientSecret = 'IE4HFIDTJHUSLMSNIBJ40UGURYBBRGXFD4UIPKX5YUKC0RO4'
version = '20180605'

print('Credentails:')
print('CLIENT ID: ' + clientID)
print('CLIENT SECRET:' + clientSecret)

Credentails:
CLIENT ID: LKYB2D43HOP4CK5MYWZ0YGHHJFLCKMDVIJVFS2WRWZJVVF03
CLIENT SECRET:IE4HFIDTJHUSLMSNIBJ40UGURYBBRGXFD4UIPKX5YUKC0RO4


Using the API to to get the venue data.

In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500, limit=100):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id='+clientID+'&client_secret='+clientSecret+'&v='+version+'&ll='+str(lat)+','+str(lng)+'&radius='+str(radius)+'&limit='+str(limit)     
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

venues = getNearbyVenues(names=hyd_neighborhood['Neighborhood'], latitudes=hyd_neighborhood['Latitude'], longitudes=hyd_neighborhood['Longitude'])
display(venues.head(10))
print('Shape of the resulting dataframe is'+str(venues.shape))

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,A. S. Rao Nagar,17.47995,78.556834,The Coffee Cup,17.48318,78.552104,Café
1,A. S. Rao Nagar,17.47995,78.556834,Cafe Coffee Day,17.481262,78.555077,Café
2,A. S. Rao Nagar,17.47995,78.556834,Domino's Pizza,17.475035,78.553141,Pizza Place
3,A. S. Rao Nagar,17.47995,78.556834,Woodland Restaurant,17.476646,78.566404,Snack Place
4,A. S. Rao Nagar,17.47995,78.556834,Parivaar Restaurant,17.47685,78.563525,Indian Restaurant
5,A. S. Rao Nagar,17.47995,78.556834,KFC,17.47504,78.553137,Fast Food Restaurant
6,A. S. Rao Nagar,17.47995,78.556834,Swagath Grand,17.482022,78.553261,Indian Restaurant
7,A. S. Rao Nagar,17.47995,78.556834,McDonald's,17.476961,78.564754,Fast Food Restaurant
8,A. S. Rao Nagar,17.47995,78.556834,HDFC Bank,17.480961,78.55558,Bank
9,A. S. Rao Nagar,17.47995,78.556834,Ratnadeep Supermarket,17.481483,78.55416,Department Store


Shape of the resulting dataframe is(4557, 7)


Number of unique venue categories.

In [14]:
categories = venues['Venue Category'].unique()
print(categories)

['Café' 'Pizza Place' 'Snack Place' 'Indian Restaurant'
 'Fast Food Restaurant' 'Bank' 'Department Store' 'Diner' 'Shopping Mall'
 'Hotel' 'Gastropub' 'Hyderabadi Restaurant' 'Bakery' 'Lounge'
 'Middle Eastern Restaurant' 'South Indian Restaurant' 'Bistro'
 'Ice Cream Shop' 'Vegetarian / Vegan Restaurant' 'Science Museum'
 'Multiplex' 'Chaat Place' 'Hotel Bar' 'Park' 'Performing Arts Venue'
 'Coffee Shop' 'Stadium' 'Pub' 'Shoe Store' 'Electronics Store'
 'Chinese Restaurant' 'Breakfast Spot' 'Dessert Shop' 'Clothing Store'
 "Men's Store" 'Movie Theater' 'Juice Bar' 'Food Truck' 'Neighborhood'
 'Mobile Phone Shop' 'Smoke Shop' 'Bar' 'Farmers Market'
 'Indie Movie Theater' 'Fried Chicken Joint' 'Bookstore' 'Sandwich Place'
 'Gym' 'Light Rail Station' 'Convenience Store' 'Pharmacy' 'Flea Market'
 'Food' 'History Museum' 'Monument / Landmark' 'Food Court'
 'Train Station' 'Athletics & Sports' 'Grocery Store' 'Bus Station'
 'Food & Drink Shop' 'Bengali Restaurant' 'Thai Restaurant'
 'Furnit