## Analysis of business opportunities near Metro stations in Delhi

Before we get the data and start exploring it, let's download all the dependencies that we will need.

In [1]:
import requests # library to handle requests
from lxml import html # library to read html page
from bs4 import BeautifulSoup

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00   5.33 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  23.35 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  39.68 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  32.65 MB/s
Libraries imported.


## 1. Download and Explore Dataset

In [16]:
#Fetch metro stations list from wikipedia page.
r = requests.get("https://en.wikipedia.org/wiki/List_of_Delhi_Metro_stations")
soup = BeautifulSoup(r.content, 'html.parser')
data = []
table = soup.find('table', class_="wikitable sortable")
table_body = table.find('tbody')
rows = table_body.find_all('tr')

for row in rows:    
    cols = row.find_all(['td','th'])
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele]) # Get rid of empty values

# Create dataframe
df = pd.DataFrame(data)
df = df[1:]

# Define column header
df.columns = ['Sr','StationName','Hindi Name','Line','Opened','Layout','Notes','Refs','Platform Layout']

# Ignore additional columns
df=df[['StationName','Line','Opened','Layout']]

# Ignore cells with metro lines name as station name
df=df[~df.StationName.str.contains("Line")]

# Ignore cells with station layout 'At Grade', as these stations are for interchange only and have no exit points.
df=df[~df.Layout.str.contains("Grade")]

# Remove special symbols from column names.
df['StationName']=df['StationName'].str.replace('*','')
df['StationName']=df['StationName'].str.replace('†','')
df['StationName']=df['StationName'].str.replace('¤','')

# Converting station opening date to datetime formt.
df['Opened'] = pd.to_datetime(df['Opened'])

df.reset_index(drop=True)

Unnamed: 0,StationName,Line,Opened,Layout
0,Adarsh Nagar,Yellow Line,2009-02-04,Elevated
1,AIIMS,Yellow Line,2010-09-03,Underground
2,Akshardham,Blue Line,2009-11-12,Elevated
3,Anand Vihar ISBT,Blue Line branch,2010-01-06,Elevated
4,Arjan Garh,Yellow Line,2010-06-21,Elevated
5,Arthala,Red Line,2019-03-08,Elevated
6,Ashok Park Main,Green Line,2010-04-02,Elevated
7,Ashram,Pink Line,2018-12-31,Underground
8,Azadpur,Yellow Line,2009-02-04,Elevated
9,Badarpur Border,Violet Line,2011-01-14,Elevated


#### Let's create a function to get latitude, longitude of each cell

In [17]:
def getLatiLong(row):
    address=row['StationName']
    add=''
    latitude = 0
    longitude = 0
    geolocator = Nominatim(user_agent="ny_explorer")
    try:
        add = address+', National Capital Territory of Delhi, IN'
        location = geolocator.geocode(add)
        latitude = location.latitude
        longitude = location.longitude
    except:
        if '–' in address:
            add = address.split('–')[0]
            add+=', National Capital Territory of Delhi, IN'
        else:  
            add = address+', IN'
        try:
            location = geolocator.geocode(add)
            latitude = location.latitude
            longitude = location.longitude 
        except:
            latitude = 0
            longitude = 0
            
    return pd.Series([latitude,longitude])

In [2]:
df[['Latitude','Longitude']] = df.apply(getLatiLong, axis=1)

NameError: name 'df' is not defined

In [3]:
df = df[df.Latitude != 0.000000]

NameError: name 'df' is not defined

In [20]:
df.reset_index(drop=True)

Unnamed: 0,StationName,Line,Opened,Layout,Latitude,Longitude
0,Adarsh Nagar,Yellow Line,2009-02-04,Elevated,28.614193,77.071541
1,AIIMS,Yellow Line,2010-09-03,Underground,28.569016,77.207612
2,Akshardham,Blue Line,2009-11-12,Elevated,28.612517,77.277318
3,Anand Vihar ISBT,Blue Line branch,2010-01-06,Elevated,28.646702,77.315509
4,Arjan Garh,Yellow Line,2010-06-21,Elevated,28.480716,77.125784
5,Ashok Park Main,Green Line,2010-04-02,Elevated,28.671633,77.155301
6,Ashram,Pink Line,2018-12-31,Underground,28.575177,77.256932
7,Azadpur,Yellow Line,2009-02-04,Elevated,28.707069,77.180383
8,Badarpur Border,Violet Line,2011-01-14,Elevated,28.493416,77.303334
9,Bahadurgarh City,Green Line,2018-06-24,Elevated,28.693324,76.933237


#### Save final processed data as DelhiMetroData.csv file. 

In [21]:
# The code was removed by Watson Studio for sharing.

{'asset_id': 'b32ec158-1b7e-466b-9d78-44cfafcbd5f9',
 'bucket_name': 'advancedatascience-donotdelete-pr-w2gymqcfyoewwc',
 'file_name': 'DelhiMetroData.csv',
 'message': 'File saved to project storage.'}

In [2]:
# The code was removed by Watson Studio for sharing.

In [3]:
df.head()

Unnamed: 0,StationName,Line,Opened,Layout,Latitude,Longitude
0,Adarsh Nagar,Yellow Line,2009-02-04,Elevated,28.614192,77.071541
1,AIIMS,Yellow Line,2010-09-03,Underground,28.569016,77.207612
2,Akshardham,Blue Line,2009-11-12,Elevated,28.612517,77.277318
3,Anand Vihar ISBT,Blue Line branch,2010-01-06,Elevated,28.646702,77.315509
4,Arjan Garh,Yellow Line,2010-06-21,Elevated,28.480716,77.125784


#### Use geopy library to get the latitude and longitude values of National Capital Territory of Delhi, India.

In [4]:
address = 'National Capital Territory of Delhi, IN'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of National Capital Territory of Delhi, India are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of National Capital Territory of Delhi, India are 28.6273928, 77.1716954.


#### Create a map of Delhi Metro with stations superimposed on top.

In [5]:
# create map of National Capital Territory of Delhi using latitude and longitude values
map_nct = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, stationName in zip(df['Latitude'], df['Longitude'], df['StationName']):
    label = '{}'.format(stationName)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        zoom_start=100,
        parse_html=False).add_to(map_nct)  
    
map_nct

Next, we are going to start utilizing the Foursquare API to explore the metro stations and segment them.

#### Define Foursquare Credentials and Version

In [6]:
# The code was removed by Watson Studio for sharing.

#### Let's explore the first metro station in our dataframe.

Get the station's name.

In [7]:
df.loc[0, 'StationName']

'Adarsh Nagar'

Get the station's latitude and longitude values.

In [8]:
station_latitude = df.loc[0, 'Latitude'] # station latitude value
station_longitude = df.loc[0, 'Longitude'] # station longitude value

station_name = df.loc[0, 'StationName'] # station name

print('Latitude and longitude values of {} are {}, {}.'.format(station_name, 
                                                               station_latitude, 
                                                               station_longitude))

Latitude and longitude values of Adarsh Nagar are 28.614192499999998, 77.0715411848447.


#### Now, let's get the top venues that are in Adarsh Nagarl within a radius of 500 meters.

Let's create the GET request URL.

In [9]:
# type your answer here
LIMIT = 100 
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, station_latitude, station_longitude, VERSION, radius, LIMIT)


Send the GET request and examine the results

In [10]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c9fc2c44c1f6729012d7bb7'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4e664a628877954de9d0030d-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/indian_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d10f941735',
         'name': 'Indian Restaurant',
         'pluralName': 'Indian Restaurants',
         'primary': True,
         'shortName': 'Indian'}],
       'id': '4e664a628877954de9d0030d',
       'location': {'cc': 'IN',
        'country': 'India',
        'distance': 452,
        'formattedAddress': ['India'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 28.61339089253902,
          'lng': 77.07608403328531}],
        'lat': 28.61339089253902,
        'lng': 77.07608403328531},
       'n

From the Foursquare lab in the previous module, we know that all the information is in the *items* key. Before we proceed, let's borrow the **get_category_type** function from the Foursquare lab.

In [11]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a *pandas* dataframe.

In [12]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Bikanerwala,Indian Restaurant,28.613391,77.076084
1,McDonald's,Burger Joint,28.61633,77.067034


In [13]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

2 venues were returned by Foursquare.


## 2. Explore Metro Stations in NCT of Delhi

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['StationName', 
                  'Station Latitude', 
                  'Station Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the above function on each metro station and create a new dataframe called *metro_venues*.

In [15]:
metro_venues = getNearbyVenues(names=df['StationName'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Adarsh Nagar
AIIMS
Akshardham
Anand Vihar ISBT
Arjan Garh
Ashok Park Main
Ashram
Azadpur
Badarpur Border
Bahadurgarh City
Barakhambha Road
Bata Chowk
Bhikaji Cama Place
Botanical Garden
Central Secretariat
Chandni Chowk
Chawri Bazar
Chhatarpur
Chirag Delhi
Dabri Mor-Janakpuri South
Dashrath Puri
Delhi Aerocity
Delhi Cantonment
Delhi Gate
Dhaula Kuan
Dilshad Garden
Durgabai Deshmukh South Campus
Dwarka
Dwarka Mor
Dwarka Sector 8
Dwarka Sector 9
Dwarka Sector 10
Dwarka Sector 11
Dwarka Sector 12
Dwarka Sector 13
Dwarka Sector 14
Dwarka Sector 21
East Azad Nagar
East Vinod Nagar – Mayur Vihar-II
Escorts Mujesar
ESI Hospital
Ghitorni
Gokulpuri
Golf Course
Govind Puri
Greater Kailash
Green Park
GTB Nagar
Guru Dronacharya
Haiderpur
Hauz Khas
Hazrat Nizamuddin
Hindon
HUDA City Centre
IFFCO Chowk
IIT Delhi
INA
Inderlok
Indira Gandhi International Airport
Indraprastha
IP Extension
ITO
Jaffrabad
Jahangirpuri
Jama Masjid
Jamia Millia Islamia
Janakpuri East
Janakpuri West
Jangpura
Janpath
Jasola V

Let's check the size of the resulting dataframe

In [36]:
print(metro_venues.shape)
metro_venues.head()

(1496, 7)


Unnamed: 0,StationName,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,,,,,,,
1,,,,,,,
2,,,,,,,
3,,,,,,,
4,,,,,,,


Let's check how many venues were returned for each metro station

In [33]:
metro_venue_count=metro_venues.groupby('StationName').count()
metro_venue_count=metro_venue_count[['Venue']].sort_values(by=['Venue'], ascending=False).reset_index()
metro_venue_count

Unnamed: 0,StationName,Venue


#### Let's find most common venue types

In [18]:
metro_venue_category_count=metro_venues.groupby('Venue Category').count()
metro_venue_category_count=metro_venue_category_count[['Venue']].sort_values(by=['Venue'], ascending=False).reset_index()
metro_venue_category_count

Unnamed: 0,Venue Category,Venue
0,Indian Restaurant,144
1,Hotel,80
2,Café,80
3,Fast Food Restaurant,70
4,Coffee Shop,68
5,Pizza Place,53
6,Light Rail Station,51
7,Chinese Restaurant,40
8,Restaurant,35
9,Train Station,32


#### Dropping venues which are metro stations itself.

In [20]:
metro_venue_category_count=metro_venue_category_count[~metro_venue_category_count['Venue Category'].str.contains("Station")]

#### Taking into consideration top 25 venue types.

In [21]:
metro_venue_category_count.reset_index(drop=True).head(25)

Unnamed: 0,Venue Category,Venue
0,Indian Restaurant,144
1,Hotel,80
2,Café,80
3,Fast Food Restaurant,70
4,Coffee Shop,68
5,Pizza Place,53
6,Chinese Restaurant,40
7,Restaurant,35
8,Shopping Mall,27
9,Bar,25


#### Let's find out how many unique categories can be curated from all the returned venues

In [22]:
print('There are {} uniques categories.'.format(len(metro_venues['Venue Category'].unique())))

There are 217 uniques categories.


## 3. Analyze Each Metro Station

In [23]:
# one hot encoding
metro_onehot = pd.get_dummies(metro_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
metro_onehot['StationName'] = metro_venues['StationName'] 

# move neighborhood column to the first column
fixed_columns = [metro_onehot.columns[-1]] + list(metro_onehot.columns[:-1])
metro_onehot = metro_onehot[fixed_columns]

metro_onehot.head()

Unnamed: 0,StationName,ATM,Accessories Store,Advertising Agency,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Bakery,Bank,Bar,Basketball Court,Bed & Breakfast,Beer Bar,Beer Garden,Belgian Restaurant,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Building,Burger Joint,Bus Station,Bus Stop,Business Center,Business Service,Cafeteria,Café,Campground,Candy Store,Chaat Place,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cricket Ground,Czech Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,Gift Shop,Go Kart Track,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Health & Beauty Service,Health Food Store,Herbs & Spices Store,Hindu Temple,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Light Rail Station,Liquor Store,Lounge,Luggage Store,Market,Massage Studio,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mosque,Motel,Motorcycle Shop,Movie Theater,Moving Target,Multicuisine Indian Restaurant,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Nightlife Spot,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Outdoor Supply Store,Outlet Store,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Service,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Public Art,Punjabi Restaurant,Racetrack,Ramen Restaurant,Recreation Center,Rental Car Location,Resort,Restaurant,River,Road,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Shoe Store,Shop & Service,Shopping Mall,Ski Chalet,Smoke Shop,Snack Place,Soccer Stadium,South Indian Restaurant,Souvenir Shop,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Sushi Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adarsh Nagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Adarsh Nagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,AIIMS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,AIIMS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,AIIMS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [24]:
metro_onehot.shape

(1496, 218)

#### Next, let's group rows by StationName and by taking the mean of the frequency of occurrence of each category

In [25]:
metro_grouped = metro_onehot.groupby('StationName').mean().reset_index()
metro_grouped

Unnamed: 0,StationName,ATM,Accessories Store,Advertising Agency,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Bakery,Bank,Bar,Basketball Court,Bed & Breakfast,Beer Bar,Beer Garden,Belgian Restaurant,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Building,Burger Joint,Bus Station,Bus Stop,Business Center,Business Service,Cafeteria,Café,Campground,Candy Store,Chaat Place,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Cricket Ground,Czech Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,Gift Shop,Go Kart Track,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Health & Beauty Service,Health Food Store,Herbs & Spices Store,Hindu Temple,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Light Rail Station,Liquor Store,Lounge,Luggage Store,Market,Massage Studio,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mosque,Motel,Motorcycle Shop,Movie Theater,Moving Target,Multicuisine Indian Restaurant,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Nightlife Spot,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Outdoor Supply Store,Outlet Store,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Service,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Public Art,Punjabi Restaurant,Racetrack,Ramen Restaurant,Recreation Center,Rental Car Location,Resort,Restaurant,River,Road,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Shoe Store,Shop & Service,Shopping Mall,Ski Chalet,Smoke Shop,Snack Place,Soccer Stadium,South Indian Restaurant,Souvenir Shop,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Sushi Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,AIIMS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Adarsh Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Akshardham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Anand Vihar ISBT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Arjan Garh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2
5,Ashok Park Main,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Ashram,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Azadpur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Badarpur Border,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Bahadurgarh City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Let's confirm the new size

In [26]:
metro_grouped.shape

(195, 218)

First, let's write a function to sort the venues in descending order.

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each station.

In [29]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['StationName']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
metro_venues_sorted = pd.DataFrame(columns=columns)
metro_venues_sorted['StationName'] = metro_grouped['StationName']

for ind in np.arange(metro_grouped.shape[0]):
    metro_venues_sorted.iloc[ind, 1:] = return_most_common_venues(metro_grouped.iloc[ind, :], num_top_venues)

metro_venues_sorted

Unnamed: 0,StationName,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,AIIMS,Indian Restaurant,Memorial Site,Market,Restaurant,Yoga Studio
1,Adarsh Nagar,Indian Restaurant,Burger Joint,Yoga Studio,Fabric Shop,Food Truck
2,Akshardham,Athletics & Sports,Hindu Temple,Yoga Studio,Food Truck,Food Court
3,Anand Vihar ISBT,Clothing Store,Multiplex,Indian Restaurant,Café,Gym
4,Arjan Garh,Hotel,Light Rail Station,Furniture / Home Store,Yoga Studio,History Museum
5,Ashok Park Main,Train Station,Yoga Studio,Electronics Store,Food Truck,Food Court
6,Ashram,Dessert Shop,Bowling Alley,Bakery,Neighborhood,Yoga Studio
7,Azadpur,Indian Restaurant,Park,Bus Station,Yoga Studio,Electronics Store
8,Badarpur Border,IT Services,Train Station,Yoga Studio,Eastern European Restaurant,Food Court
9,Bahadurgarh City,Pharmacy,Bus Station,French Restaurant,Food Truck,Food Court


## 4. Cluster Metro Stations

Run *k*-means to cluster the neighborhood into 5 clusters.

In [30]:
# set number of clusters
kclusters =5

metro_grouped_clustering = metro_grouped.drop('StationName', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(metro_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 2, 2, 2, 0, 2, 1, 0, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each station.

In [31]:
# add clustering labels
metro_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
metro_merged = df

metro_merged = metro_merged.join(metro_venues_sorted.set_index('StationName'), on='StationName')

metro_merged.head() # check the last columns!

Unnamed: 0,StationName,Line,Opened,Layout,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Adarsh Nagar,Yellow Line,2009-02-04,Elevated,28.614192,77.071541,1.0,Indian Restaurant,Burger Joint,Yoga Studio,Fabric Shop,Food Truck
1,AIIMS,Yellow Line,2010-09-03,Underground,28.569016,77.207612,1.0,Indian Restaurant,Memorial Site,Market,Restaurant,Yoga Studio
2,Akshardham,Blue Line,2009-11-12,Elevated,28.612517,77.277318,2.0,Athletics & Sports,Hindu Temple,Yoga Studio,Food Truck,Food Court
3,Anand Vihar ISBT,Blue Line branch,2010-01-06,Elevated,28.646702,77.315509,2.0,Clothing Store,Multiplex,Indian Restaurant,Café,Gym
4,Arjan Garh,Yellow Line,2010-06-21,Elevated,28.480716,77.125784,2.0,Hotel,Light Rail Station,Furniture / Home Store,Yoga Studio,History Museum


In [37]:
metro_merged['Cluster Labels'] = metro_merged['Cluster Labels'].fillna(0).astype(int).astype(int)

Finally, let's visualize the resulting clusters

In [38]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(metro_merged['Latitude'], metro_merged['Longitude'], metro_merged['StationName'], metro_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters

#### Cluster 1

In [63]:
cluster1=metro_merged.loc[metro_merged['Cluster Labels'] == 0, metro_merged.columns[[0] + list(range(2, metro_merged.shape[1]))]]
print(cluster1.shape)
cluster1

(25, 11)


Unnamed: 0,StationName,Opened,Layout,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
5,Ashok Park Main,2010-04-02,Elevated,28.671633,77.155301,0,Train Station,Yoga Studio,Electronics Store,Food Truck,Food Court
8,Badarpur Border,2011-01-14,Elevated,28.493416,77.303334,0,IT Services,Train Station,Yoga Studio,Eastern European Restaurant,Food Court
30,Dwarka Sector 9,2006-04-01,Elevated,28.574282,77.065351,0,Herbs & Spices Store,Train Station,Fast Food Restaurant,Chaat Place,Burger Joint
43,Golf Course,2009-11-12,Elevated,28.597781,77.159817,0,Snack Place,Train Station,Sake Bar,Golf Course,Yoga Studio
50,Hauz Khas,2010-09-03,Underground,28.544258,77.20674,0,Bar,Train Station,Chinese Restaurant,Lounge,Nightclub
51,Hazrat Nizamuddin,2018-12-31,Underground,28.588749,77.257249,0,Train Station,Food Court,Moving Target,Café,Yoga Studio
52,Hindon,2019-03-08,Elevated,29.082006,77.458046,0,,,,,
78,Kanhaiya Nagar,2004-03-31,Elevated,28.680063,77.164845,0,Train Station,Bakery,Light Rail Station,Yoga Studio,Fabric Shop
83,Kaushambi,2011-07-14,Elevated,25.543086,81.44866,0,,,,,
93,Madipur,2010-04-02,Elevated,28.676443,77.119353,0,Train Station,Yoga Studio,Electronics Store,Food Truck,Food Court


#### Cluster 2

In [64]:
cluster2=metro_merged.loc[metro_merged['Cluster Labels'] == 1, metro_merged.columns[[0] + list(range(2, metro_merged.shape[1]))]]
print(cluster2.shape)
cluster2

(39, 11)


Unnamed: 0,StationName,Opened,Layout,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Adarsh Nagar,2009-02-04,Elevated,28.614192,77.071541,1,Indian Restaurant,Burger Joint,Yoga Studio,Fabric Shop,Food Truck
1,AIIMS,2010-09-03,Underground,28.569016,77.207612,1,Indian Restaurant,Memorial Site,Market,Restaurant,Yoga Studio
7,Azadpur,2009-02-04,Elevated,28.707069,77.180383,1,Indian Restaurant,Park,Bus Station,Yoga Studio,Electronics Store
10,Barakhambha Road,2005-12-30,Underground,28.629753,77.224996,1,Indian Restaurant,Hotel Bar,Coffee Shop,Molecular Gastronomy Restaurant,Gastropub
14,Central Secretariat,2005-07-03,Underground,28.614128,77.205615,1,Music Venue,Indian Restaurant,Spiritual Center,Yoga Studio,Food Court
15,Chandni Chowk,2005-07-03,Underground,28.656265,77.229909,1,Indian Restaurant,Market,Snack Place,Dessert Shop,Train Station
16,Chawri Bazar,2005-07-03,Underground,28.65016,77.229501,1,Indian Restaurant,Hotel,Mosque,Paper / Office Supplies Store,Snack Place
25,Dilshad Garden,2008-06-04,Elevated,28.680682,77.322064,1,Convenience Store,Indian Restaurant,Outlet Store,Fast Food Restaurant,Electronics Store
39,Escorts Mujesar,2015-09-06,Elevated,28.370234,77.31492,1,Shopping Mall,Indian Restaurant,Burger Joint,Fabric Shop,Food Truck
44,Govind Puri,2010-10-03,Elevated,28.544377,77.264266,1,Mobile Phone Shop,Park,Beer Garden,Indian Restaurant,Train Station


#### Cluster 3

In [65]:
cluster3=metro_merged.loc[metro_merged['Cluster Labels'] == 2, metro_merged.columns[[0] + list(range(2, metro_merged.shape[1]))]]
print(cluster3.shape)
cluster3

(133, 11)


Unnamed: 0,StationName,Opened,Layout,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Akshardham,2009-11-12,Elevated,28.612517,77.277318,2,Athletics & Sports,Hindu Temple,Yoga Studio,Food Truck,Food Court
3,Anand Vihar ISBT,2010-01-06,Elevated,28.646702,77.315509,2,Clothing Store,Multiplex,Indian Restaurant,Café,Gym
4,Arjan Garh,2010-06-21,Elevated,28.480716,77.125784,2,Hotel,Light Rail Station,Furniture / Home Store,Yoga Studio,History Museum
6,Ashram,2018-12-31,Underground,28.575177,77.256932,2,Dessert Shop,Bowling Alley,Bakery,Neighborhood,Yoga Studio
9,Bahadurgarh City,2018-06-24,Elevated,28.693324,76.933237,2,Pharmacy,Bus Station,French Restaurant,Food Truck,Food Court
11,Bata Chowk,2015-09-06,Elevated,28.385836,77.313462,2,Asian Restaurant,Shopping Mall,Hotel,Multiplex,Food
12,Bhikaji Cama Place,2018-08-06,Underground,28.5679,77.187016,2,Lounge,Hotel,Café,Asian Restaurant,Gym / Fitness Center
13,Botanical Garden,2009-11-12,Elevated,28.563914,77.33435,2,Bus Station,Hotel,Light Rail Station,Flea Market,Bakery
17,Chhatarpur,2010-08-26,Elevated,28.506709,77.175027,2,Flea Market,Japanese Restaurant,Metro Station,Indian Restaurant,Arts & Crafts Store
18,Chirag Delhi,2018-05-29,Underground,28.538141,77.228069,2,Yoga Studio,Trail,Motel,Hotel,Afghan Restaurant


#### Cluster 4

In [66]:
cluster4=metro_merged.loc[metro_merged['Cluster Labels'] == 3, metro_merged.columns[[0] + list(range(2, metro_merged.shape[1]))]]
print(cluster4.shape)
cluster4

(3, 11)


Unnamed: 0,StationName,Opened,Layout,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
129,Noida Sector 59,2019-03-09,Elevated,28.606529,77.372642,3,Business Service,Electronics Store,Food Truck,Food Court,Food & Drink Shop
172,Shaheed Nagar,2019-03-08,Elevated,28.67823,77.335647,3,Business Service,Music Venue,Yoga Studio,Food Court,Food & Drink Shop
177,Shiv Vihar,2018-10-31,Elevated,28.648123,77.050612,3,Business Service,Electronics Store,Food Truck,Food Court,Food & Drink Shop


#### Cluster 5

In [67]:
cluster5=metro_merged.loc[metro_merged['Cluster Labels'] == 4, metro_merged.columns[[0] + list(range(2, metro_merged.shape[1]))]]
print(cluster5.shape)
cluster5

(3, 11)


Unnamed: 0,StationName,Opened,Layout,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
22,Delhi Cantonment,2018-03-14,Elevated,28.588856,77.132399,4,ATM,Electronics Store,Food Truck,Food Court,Food & Drink Shop
163,Sadar Bazaar Cantonment,2018-05-29,Elevated,28.577151,77.111153,4,ATM,Food,Electronics Store,Food Truck,Food Court
181,Sikandarpur,2010-06-21,Elevated,28.528424,77.415394,4,ATM,Electronics Store,Food Truck,Food Court,Food & Drink Shop
