In [40]:
from pymongo import MongoClient
import folium
import numpy as np
import pandas as pd

In [41]:
def connectCollection(database, collection):
    client = MongoClient()
    db = client[database]
    coll = db[collection]
    return db, coll

In [42]:
db, coll = connectCollection('companies','companies')

In [43]:
def getLocation(comp,i):
    latitude = comp['offices'][i]['latitude']
    longitude = comp['offices'][i]['longitude']
    loc = {
        'type':'Point',
        'coordinates':[longitude, latitude]
    }
    return loc

In [44]:
def getName(comp,i):
    name = comp['name']
    return name

I am going to filter all companies that have been founded after 2003, that have raised at least 1M dollars and that have a category code similar to a gaming company.

In [45]:
pipeline = [
    { "$unwind": "$offices"},
    {"$match":{ "$and": [  {"founded_year":{"$gt":2003}},
            {"funding_rounds.raised_amount":{"$gt":1000000}}, { "$or":[{"category_code":"web"},
            {"category_code":"software"},{"category_code":"games_video"},
            {"category_code":"hardware"},{"category_code":"mobile"},{"category_code":"music"},
            {"category_code":"photo_video"},{"category_code":"design"}]}] }}
#    {"$match":"}
    ]
results = list(coll.aggregate(pipeline))

In [46]:
name = []
city = []
country_code = []
longitude = []
latitude = []

for e in results:
    name.append(e["name"])
    city.append(e['offices']["city"])
    country_code.append(e['offices']["country_code"])
    for c in e["offices"].items():
        if c[0]=="longitude":
            longitude.append(c[1])
        elif c[0]=="latitude":
            latitude.append(c[1])

In [47]:
print(len(name),len(city),len(country_code),len(longitude),len(latitude))

1211 1211 1211 1211 1211


In [48]:
myData = {"name":name, "city":city, "country":country_code, "latitude":latitude, "longitude":longitude}

In [49]:
startup_df = pd.DataFrame(data=myData)

In [50]:
startup_df

Unnamed: 0,name,city,country,latitude,longitude
0,Wetpaint,Seattle,USA,47.603122,-122.333253
1,Wetpaint,New York,USA,40.723731,-73.996431
2,Geni,West Hollywood,USA,34.090368,-118.393064
3,Slacker,San Diego,USA,33.022176,-117.081406
4,Joost,New York,USA,40.746497,-74.009447
5,Babelgum,London,GBR,53.344104,-6.267494
6,Mahalo,Culver City,USA,34.017606,-118.487267
7,Kyte,San Francisco,USA,37.788482,-122.409173
8,Veoh,San Diego,USA,32.902266,-117.20834
9,Wesabe,San Francisco,USA,37.793148,-122.402567


Here I could use geocoding to obtain the null coordinates with the address of each offices, but for simplifying purposes, I am going to get rid of them.

In [51]:
startup_clean = startup_df.dropna()

In [52]:
startup_clean

Unnamed: 0,name,city,country,latitude,longitude
0,Wetpaint,Seattle,USA,47.603122,-122.333253
1,Wetpaint,New York,USA,40.723731,-73.996431
2,Geni,West Hollywood,USA,34.090368,-118.393064
3,Slacker,San Diego,USA,33.022176,-117.081406
4,Joost,New York,USA,40.746497,-74.009447
5,Babelgum,London,GBR,53.344104,-6.267494
6,Mahalo,Culver City,USA,34.017606,-118.487267
7,Kyte,San Francisco,USA,37.788482,-122.409173
8,Veoh,San Diego,USA,32.902266,-117.20834
9,Wesabe,San Francisco,USA,37.793148,-122.402567


In [53]:
startup_clean.index = pd.RangeIndex(len(startup_clean.index))

In [54]:
startup_clean.head(30)

Unnamed: 0,name,city,country,latitude,longitude
0,Wetpaint,Seattle,USA,47.603122,-122.333253
1,Wetpaint,New York,USA,40.723731,-73.996431
2,Geni,West Hollywood,USA,34.090368,-118.393064
3,Slacker,San Diego,USA,33.022176,-117.081406
4,Joost,New York,USA,40.746497,-74.009447
5,Babelgum,London,GBR,53.344104,-6.267494
6,Mahalo,Culver City,USA,34.017606,-118.487267
7,Kyte,San Francisco,USA,37.788482,-122.409173
8,Veoh,San Diego,USA,32.902266,-117.20834
9,Wesabe,San Francisco,USA,37.793148,-122.402567


In [55]:
import folium

In [56]:
map_city=folium.Map(location=[40.7221,-73.9712], zoom_start=12)
for index, row in startup_clean.iterrows():
    folium.Marker((row['latitude'],row['longitude']),
                    radius=2,
                    icon=folium.Icon(icon='cloud',color='red'), 
                   ).add_to(map_city)
    


In [57]:
map_city

## APIs

In [58]:
import os
from dotenv import load_dotenv
import requests
load_dotenv()


True

In [59]:
def googleRequestAuthorized(lat,lon,r,keyword):

    # Function
    authToken = os.getenv("GOOGLE_API_TOKEN")
    if not authToken:
        raise ValueError("NECESITAS UN TOKEN")
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&keyword={}&key={}".format(lat,lon,r,keyword,authToken)

    res = requests.get(url)
    data = res.json()
    return data

In [47]:
starbucks = googleRequestAuthorized("40.723731","-73.996431","200","starbucks")

In [23]:
vegan = googleRequestAuthorized("40.723731","-73.996431","200","vegan+restaurant")

In [49]:
club = googleRequestAuthorized("40.723731","-73.996431","500","club")

In [50]:
airport = googleRequestAuthorized("40.723731","-73.996431","30000","airport")

In [106]:
kindergarten = googleRequestAuthorized("40.723731","-73.996431","200","daycare")

In [24]:
l = vegan['results']

In [25]:
print(l[0]['name'])
print(l[0]['geometry']['location']['lat'])
print(l[0]['geometry']['location']['lng'])

by CHLOE.
40.7227818
-73.9972192


## There is a Starbucks in a radius of 100 meters

In [60]:
for i in range(len(startup_clean)) : 
    lat = startup_clean.loc[i, "latitude"]
    lon = startup_clean.loc[i, "longitude"]
    starbucks = googleRequestAuthorized(str(lat),str(lon),"100","starbucks")
    try:
        startup_clean.loc[i, 'starbucks'] = starbucks['results'][0]['name']
        startup_clean.loc[i, 'starbucks_lat'] = starbucks['results'][0]['geometry']['location']['lat']
        startup_clean.loc[i, 'starbucks_lon'] = starbucks['results'][0]['geometry']['location']['lng']

    except:
        startup_clean.loc[i, 'starbucks'] = np.nan
        startup_clean.loc[i, 'starbucks_lat'] = np.nan
        startup_clean.loc[i, 'starbucks_lon'] = np.nan

In [61]:
pd.set_option('display.max_rows', None)
display(startup_clean)

Unnamed: 0,name,city,country,latitude,longitude,starbucks,starbucks_lat,starbucks_lon
0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827
1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942
2,Geni,West Hollywood,USA,34.090368,-118.393064,,,
3,Slacker,San Diego,USA,33.022176,-117.081406,Starbucks,33.023491,-117.081837
4,Joost,New York,USA,40.746497,-74.009447,,,
5,Babelgum,London,GBR,53.344104,-6.267494,Starbucks Crampton Quay,53.346055,-6.262395
6,Mahalo,Culver City,USA,34.017606,-118.487267,Starbucks,34.01724,-118.489752
7,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.789146,-122.408456
8,Veoh,San Diego,USA,32.902266,-117.20834,,,
9,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792183,-122.40377


In [88]:
startup_clean.to_csv(r'./input/starbucks_with_nan.csv')

In [62]:
starbucks_clean = startup_clean.dropna()

In [63]:
starbucks_clean.index = pd.RangeIndex(len(starbucks_clean.index))

In [67]:
pd.set_option('display.max_rows', None)
display(starbucks_clean)

Unnamed: 0,name,city,country,latitude,longitude,starbucks,starbucks_lat,starbucks_lon
0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827
1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942
2,Slacker,San Diego,USA,33.022176,-117.081406,Starbucks,33.023491,-117.081837
3,Babelgum,London,GBR,53.344104,-6.267494,Starbucks Crampton Quay,53.346055,-6.262395
4,Mahalo,Culver City,USA,34.017606,-118.487267,Starbucks,34.01724,-118.489752
5,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.789146,-122.408456
6,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792183,-122.40377
7,AdaptiveBlue,NYC,USA,40.801358,-74.3372,Starbucks,40.798594,-74.340772
8,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942
9,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485


In [66]:
starbucks_clean.to_csv(r'./input/starbucks_without_nan.csv')

## There is an airport in a radius of 20 km.

In [68]:
for i in range(len(starbucks_clean)) : 
    lat = starbucks_clean.loc[i, "latitude"]
    lon = starbucks_clean.loc[i, "longitude"]
    airport = googleRequestAuthorized(str(lat),str(lon),"20000","airport")
    try:
        starbucks_clean.loc[i, 'airport'] = airport['results'][0]['name']
        starbucks_clean.loc[i, 'air_lat'] = airport['results'][0]['geometry']['location']['lat']
        starbucks_clean.loc[i, 'air_lon'] = airport['results'][0]['geometry']['location']['lng']

    except:
        starbucks_clean.loc[i, 'airport'] = np.nan
        starbucks_clean.loc[i, 'air_lat'] = np.nan
        starbucks_clean.loc[i, 'air_lon'] = np.nan

In [72]:
pd.set_option('display.max_rows', None)
display(airports_clean)

Unnamed: 0,name,city,country,latitude,longitude,starbucks,starbucks_lat,starbucks_lon,airport,air_lat,air_lon
0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,47.45025,-122.308817
1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139
2,Slacker,San Diego,USA,33.022176,-117.081406,Starbucks,33.023491,-117.081837,McClellan-Palomar Airport,33.126822,-117.279241
3,Babelgum,London,GBR,53.344104,-6.267494,Starbucks Crampton Quay,53.346055,-6.262395,Dublin Airport,53.426448,-6.24991
4,Mahalo,Culver City,USA,34.017606,-118.487267,Starbucks,34.01724,-118.489752,Los Angeles International Airport,33.941589,-118.40853
5,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.789146,-122.408456,San Francisco International Airport,37.621313,-122.378955
6,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792183,-122.40377,San Francisco International Airport,37.621313,-122.378955
7,AdaptiveBlue,NYC,USA,40.801358,-74.3372,Starbucks,40.798594,-74.340772,Newark Liberty International Airport,40.689531,-74.174462
8,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139
9,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,40.641311,-73.778139


In [70]:
airports_clean = starbucks_clean.dropna()

In [71]:
airports_clean.index = pd.RangeIndex(len(airports_clean.index))

In [73]:
airports_clean.to_csv(r'./input/airports_without_nan.csv')

## There is a Day Care Center in a radius of 100 meters

In [74]:
def add_places(df,place,radius):
    for i in range(len(df)) : 
        lat = df.loc[i, "latitude"]
        lon = df.loc[i, "longitude"]
        answer = googleRequestAuthorized(str(lat),str(lon),str(radius),str(place))
        try:
            df.loc[i, str(place)] = answer['results'][0]['name']
            df.loc[i, str(place)+'_lat'] = answer['results'][0]['geometry']['location']['lat']
            df.loc[i, str(place)+'_lon'] = answer['results'][0]['geometry']['location']['lng']

        except:
            df.loc[i, str(place)] = np.nan
            df.loc[i, str(place)+'_lat'] = np.nan
            df.loc[i, str(place)+'_lon'] = np.nan

In [75]:
add_places(airports_clean,"daycare","100")

In [76]:
airports_clean

Unnamed: 0,name,city,country,latitude,longitude,starbucks,starbucks_lat,starbucks_lon,airport,air_lat,air_lon,daycare,daycare_lat,daycare_lon
0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,47.45025,-122.308817,Cosmopolitan Kids Downtown Academy,47.605686,-122.334997
1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,,,
2,Slacker,San Diego,USA,33.022176,-117.081406,Starbucks,33.023491,-117.081837,McClellan-Palomar Airport,33.126822,-117.279241,,,
3,Babelgum,London,GBR,53.344104,-6.267494,Starbucks Crampton Quay,53.346055,-6.262395,Dublin Airport,53.426448,-6.24991,,,
4,Mahalo,Culver City,USA,34.017606,-118.487267,Starbucks,34.01724,-118.489752,Los Angeles International Airport,33.941589,-118.40853,,,
5,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.789146,-122.408456,San Francisco International Airport,37.621313,-122.378955,,,
6,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792183,-122.40377,San Francisco International Airport,37.621313,-122.378955,Bright Horizons,37.792111,-122.403927
7,AdaptiveBlue,NYC,USA,40.801358,-74.3372,Starbucks,40.798594,-74.340772,Newark Liberty International Airport,40.689531,-74.174462,,,
8,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,SoHo Child Care,40.723374,-74.00106
9,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,40.641311,-73.778139,Sompit Child Care,40.727806,-73.997972


In [77]:
daycare_clean = airports_clean.dropna()

In [78]:
daycare_clean.index = pd.RangeIndex(len(daycare_clean.index))

In [79]:
daycare_clean.to_csv(r'./input/daycare_without_nan.csv')

In [80]:
daycare_clean

Unnamed: 0,name,city,country,latitude,longitude,starbucks,starbucks_lat,starbucks_lon,airport,air_lat,air_lon,daycare,daycare_lat,daycare_lon
0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,47.45025,-122.308817,Cosmopolitan Kids Downtown Academy,47.605686,-122.334997
1,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792183,-122.40377,San Francisco International Airport,37.621313,-122.378955,Bright Horizons,37.792111,-122.403927
2,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,SoHo Child Care,40.723374,-74.00106
3,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,40.641311,-73.778139,Sompit Child Care,40.727806,-73.997972
4,Netvibes,Paris,FRA,48.870806,2.34668,Starbucks,48.870675,2.346912,Paris-Charles De Gaulle,49.009691,2.547925,Municipal Collective Nursery - Thorel,48.869689,2.349065
5,FeedBurner,Chicago,USA,41.889474,-87.628912,Starbucks,41.89016,-87.631251,Chicago Midway International Airport,41.786776,-87.752188,Nannies of Children's Learning Place,41.889873,-87.627482
6,TVtrip,Paris,FRA,48.856667,2.350987,Starbucks,48.85682,2.354893,Paris-Charles De Gaulle,49.009691,2.547925,Day Care Municipal - Lobau,48.856031,2.352951
7,Bebo,San Francisco,USA,37.782103,-122.401116,Starbucks,37.784833,-122.39996,San Francisco International Airport,37.621313,-122.378955,Modern Education Family Childcare - Yerba Buena,37.781321,-122.401216
8,Loomia,San Francisco,USA,37.796396,-122.404869,Starbucks,37.794632,-122.402728,San Francisco International Airport,37.621313,-122.378955,Wu Yee Children's Services - Lok Yuen CDC & Jo...,37.794183,-122.407689
9,Terabitz,Palo Alto,USA,37.437328,-122.159928,Peet's Coffee,37.438064,-122.159322,San Carlos Airport,37.515375,-122.250305,Odyssey Montessori,37.439599,-122.157198


## There is a nightclub in less than 200 meters

In [15]:
add_places(daycare_clean,"nightclub","200")

In [16]:
daycare_clean

Unnamed: 0.1,Unnamed: 0,name,city,country,latitude,longitude,Starbucks,stbk_lat,stbk_lon,airport,air_lat,air_lon,daycare,daycare_lat,daycare_lon,nightclub,nightclub_lat,nightclub_lon
0,0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,47.45025,-122.308817,Cosmopolitan Kids Downtown Academy,47.605686,-122.334997,Trinity,47.601513,-122.333296
1,1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,SoHo Child Care,40.723374,-74.00106,Vandal Nightclub,40.721201,-73.993455
2,2,Mahalo,Culver City,USA,34.017606,-118.487267,Starbucks,34.01724,-118.489752,Los Angeles International Airport,33.941589,-118.40853,Piper Preschool,34.021145,-118.492888,,,
3,3,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.787288,-122.407432,San Francisco International Airport,37.621313,-122.378955,Bright Horizons,37.792111,-122.403927,Feinstein's at the Nikko,37.785469,-122.409306
4,4,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792846,-122.404304,San Francisco International Airport,37.621313,-122.378955,Bright Horizons,37.792111,-122.403927,15 Whaleship Plaza,37.796551,-122.398939
5,5,Jangl SMS,Pleasanton,USA,37.697805,-121.907768,Starbucks,37.700953,-121.910039,Oakland International Airport,37.712569,-122.219743,Child Care Links,37.697898,-121.90804,Mavericks Country Lounge,37.692343,-121.901755
6,6,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,SoHo Child Care,40.723374,-74.00106,GoldBar,40.720375,-73.997118
7,7,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,40.641311,-73.778139,Learning and Fun Daycare at St. George Academy,40.728055,-73.98966,House of Hunter,40.724738,-73.997656
8,8,YouTube,San Bruno,USA,37.627971,-122.426804,Starbucks,37.625729,-122.42607,San Francisco International Airport,37.621313,-122.378955,Crestmoor Montessori Care,37.624456,-122.43318,,,
9,9,CastTV,San Francisco,USA,37.780716,-122.393913,Starbucks,37.77867,-122.396701,San Francisco International Airport,37.621313,-122.378955,Bright Horizons at 2nd Street,37.784808,-122.394899,The Grand,37.7791,-122.3979


In [17]:
nightclub_clean = daycare_clean.dropna()

In [18]:
nightclub_clean.index = pd.RangeIndex(len(nightclub_clean.index))

In [19]:
nightclub_clean

Unnamed: 0.1,Unnamed: 0,name,city,country,latitude,longitude,Starbucks,stbk_lat,stbk_lon,airport,air_lat,air_lon,daycare,daycare_lat,daycare_lon,nightclub,nightclub_lat,nightclub_lon
0,0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,47.45025,-122.308817,Cosmopolitan Kids Downtown Academy,47.605686,-122.334997,Trinity,47.601513,-122.333296
1,1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,SoHo Child Care,40.723374,-74.00106,Vandal Nightclub,40.721201,-73.993455
2,3,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.787288,-122.407432,San Francisco International Airport,37.621313,-122.378955,Bright Horizons,37.792111,-122.403927,Feinstein's at the Nikko,37.785469,-122.409306
3,4,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792846,-122.404304,San Francisco International Airport,37.621313,-122.378955,Bright Horizons,37.792111,-122.403927,15 Whaleship Plaza,37.796551,-122.398939
4,5,Jangl SMS,Pleasanton,USA,37.697805,-121.907768,Starbucks,37.700953,-121.910039,Oakland International Airport,37.712569,-122.219743,Child Care Links,37.697898,-121.90804,Mavericks Country Lounge,37.692343,-121.901755
5,6,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,40.641311,-73.778139,SoHo Child Care,40.723374,-74.00106,GoldBar,40.720375,-73.997118
6,7,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,40.641311,-73.778139,Learning and Fun Daycare at St. George Academy,40.728055,-73.98966,House of Hunter,40.724738,-73.997656
7,9,CastTV,San Francisco,USA,37.780716,-122.393913,Starbucks,37.77867,-122.396701,San Francisco International Airport,37.621313,-122.378955,Bright Horizons at 2nd Street,37.784808,-122.394899,The Grand,37.7791,-122.3979
8,10,EQO,Vancouver,CAN,49.263588,-123.138565,Starbucks,49.263138,-123.138847,Vancouver International Airport,49.196691,-123.181512,False Creek YMCA Child Care,49.267916,-123.135252,Bunker Studios at The Avalon,49.262727,-123.129692
9,11,iSkoot,Cambridge,USA,42.375392,-71.118487,Starbucks Reserve,42.373333,-71.119214,Boston Logan International Airport,42.365613,-71.00956,Oxford Street Day Care Co-Op,42.379076,-71.11309,Club Latinos Fridays,42.374195,-71.12065


In [26]:
nightclub_clean.to_csv(r'./input/nightclub_without_nan.csv')

## There is a vegan restaurant in a radius of 200 meters

In [27]:
add_places(nightclub_clean,"vegan+restaurant","200")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [28]:
nightclub_clean

Unnamed: 0.1,Unnamed: 0,name,city,country,latitude,longitude,Starbucks,stbk_lat,stbk_lon,airport,...,air_lon,daycare,daycare_lat,daycare_lon,nightclub,nightclub_lat,nightclub_lon,vegan+restaurant,vegan+restaurant_lat,vegan+restaurant_lon
0,0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,...,-122.308817,Cosmopolitan Kids Downtown Academy,47.605686,-122.334997,Trinity,47.601513,-122.333296,Cafe Zum Zum,47.604655,-122.333455
1,1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,...,-73.778139,SoHo Child Care,40.723374,-74.00106,Vandal Nightclub,40.721201,-73.993455,by CHLOE.,40.722782,-73.997219
2,3,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.787288,-122.407432,San Francisco International Airport,...,-122.378955,Bright Horizons,37.792111,-122.403927,Feinstein's at the Nikko,37.785469,-122.409306,Million Thai Restaurant,37.785729,-122.411463
3,4,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792846,-122.404304,San Francisco International Airport,...,-122.378955,Bright Horizons,37.792111,-122.403927,15 Whaleship Plaza,37.796551,-122.398939,The Plant Cafe Organic,37.79169,-122.402312
4,5,Jangl SMS,Pleasanton,USA,37.697805,-121.907768,Starbucks,37.700953,-121.910039,Oakland International Airport,...,-122.219743,Child Care Links,37.697898,-121.90804,Mavericks Country Lounge,37.692343,-121.901755,Gotta Eatta Pita-Pleasanton,37.699598,-121.9034
5,6,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,...,-73.778139,SoHo Child Care,40.723374,-74.00106,GoldBar,40.720375,-73.997118,by CHLOE.,40.722782,-73.997219
6,7,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,...,-73.778139,Learning and Fun Daycare at St. George Academy,40.728055,-73.98966,House of Hunter,40.724738,-73.997656,Sacred Chow,40.729616,-73.99949
7,9,CastTV,San Francisco,USA,37.780716,-122.393913,Starbucks,37.77867,-122.396701,San Francisco International Airport,...,-122.378955,Bright Horizons at 2nd Street,37.784808,-122.394899,The Grand,37.7791,-122.3979,SAJJ Mediterranean SoMa,37.781306,-122.391719
8,10,EQO,Vancouver,CAN,49.263588,-123.138565,Starbucks,49.263138,-123.138847,Vancouver International Airport,...,-123.181512,False Creek YMCA Child Care,49.267916,-123.135252,Bunker Studios at The Avalon,49.262727,-123.129692,Heirloom Vegetarian Restaurant,49.260897,-123.139159
9,11,iSkoot,Cambridge,USA,42.375392,-71.118487,Starbucks Reserve,42.373333,-71.119214,Boston Logan International Airport,...,-71.00956,Oxford Street Day Care Co-Op,42.379076,-71.11309,Club Latinos Fridays,42.374195,-71.12065,Dado Tea,42.374166,-71.120683


In [29]:
all_clean = nightclub_clean.dropna()

In [30]:
all_clean.index = pd.RangeIndex(len(all_clean.index))

In [31]:
all_clean.to_csv(r'./input/all_clean.csv')

In [32]:
all_clean

Unnamed: 0.1,Unnamed: 0,name,city,country,latitude,longitude,Starbucks,stbk_lat,stbk_lon,airport,...,air_lon,daycare,daycare_lat,daycare_lon,nightclub,nightclub_lat,nightclub_lon,vegan+restaurant,vegan+restaurant_lat,vegan+restaurant_lon
0,0,Wetpaint,Seattle,USA,47.603122,-122.333253,Starbucks,47.604156,-122.330827,Seattle-Tacoma International Airport,...,-122.308817,Cosmopolitan Kids Downtown Academy,47.605686,-122.334997,Trinity,47.601513,-122.333296,Cafe Zum Zum,47.604655,-122.333455
1,1,Wetpaint,New York,USA,40.723731,-73.996431,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,...,-73.778139,SoHo Child Care,40.723374,-74.00106,Vandal Nightclub,40.721201,-73.993455,by CHLOE.,40.722782,-73.997219
2,3,Kyte,San Francisco,USA,37.788482,-122.409173,Starbucks,37.787288,-122.407432,San Francisco International Airport,...,-122.378955,Bright Horizons,37.792111,-122.403927,Feinstein's at the Nikko,37.785469,-122.409306,Million Thai Restaurant,37.785729,-122.411463
3,4,Wesabe,San Francisco,USA,37.793148,-122.402567,Starbucks,37.792846,-122.404304,San Francisco International Airport,...,-122.378955,Bright Horizons,37.792111,-122.403927,15 Whaleship Plaza,37.796551,-122.398939,The Plant Cafe Organic,37.79169,-122.402312
4,5,Jangl SMS,Pleasanton,USA,37.697805,-121.907768,Starbucks,37.700953,-121.910039,Oakland International Airport,...,-122.219743,Child Care Links,37.697898,-121.90804,Mavericks Country Lounge,37.692343,-121.901755,Gotta Eatta Pita-Pleasanton,37.699598,-121.9034
5,6,Pando Networks,New York,USA,40.722655,-73.99873,Starbucks,40.722553,-73.997942,John F. Kennedy International Airport,...,-73.778139,SoHo Child Care,40.723374,-74.00106,GoldBar,40.720375,-73.997118,by CHLOE.,40.722782,-73.997219
6,7,Livestream,New York,USA,40.726155,-73.995625,Starbucks,40.727212,-73.995485,John F. Kennedy International Airport,...,-73.778139,Learning and Fun Daycare at St. George Academy,40.728055,-73.98966,House of Hunter,40.724738,-73.997656,Sacred Chow,40.729616,-73.99949
7,9,CastTV,San Francisco,USA,37.780716,-122.393913,Starbucks,37.77867,-122.396701,San Francisco International Airport,...,-122.378955,Bright Horizons at 2nd Street,37.784808,-122.394899,The Grand,37.7791,-122.3979,SAJJ Mediterranean SoMa,37.781306,-122.391719
8,10,EQO,Vancouver,CAN,49.263588,-123.138565,Starbucks,49.263138,-123.138847,Vancouver International Airport,...,-123.181512,False Creek YMCA Child Care,49.267916,-123.135252,Bunker Studios at The Avalon,49.262727,-123.129692,Heirloom Vegetarian Restaurant,49.260897,-123.139159
9,11,iSkoot,Cambridge,USA,42.375392,-71.118487,Starbucks Reserve,42.373333,-71.119214,Boston Logan International Airport,...,-71.00956,Oxford Street Day Care Co-Op,42.379076,-71.11309,Club Latinos Fridays,42.374195,-71.12065,Dado Tea,42.374166,-71.120683


In [33]:
city_count = all_clean.groupby(['city']).count()


In [34]:
city_count

Unnamed: 0_level_0,Unnamed: 0,name,country,latitude,longitude,Starbucks,stbk_lat,stbk_lon,airport,air_lat,air_lon,daycare,daycare_lat,daycare_lon,nightclub,nightclub_lat,nightclub_lon,vegan+restaurant,vegan+restaurant_lat,vegan+restaurant_lon
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Amsterdam,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Austin,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Bangalore,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
Barcelona,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Bellevue,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
Belmont,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Berkeley,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Berlin,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Beverly Hills,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Birmingham,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [38]:
city_sorted = city_count.sort_values(['name'], ascending=[False]).head(30)


In [39]:
city_sorted['name']

city
San Francisco    83
New York         59
Seattle          19
London           13
Palo Alto        12
Cambridge        10
Paris             7
Chicago           7
Boston            6
Sunnyvale         5
Los Angeles       5
Mountain View     5
Santa Monica      4
San Mateo         4
Bangalore         4
New York City     4
Menlo Park        4
Boulder           4
Burlington        3
Singapore         3
San Jose          3
Redwood City      3
Bellevue          3
Toronto           3
Foster City       3
Vancouver         3
Pleasanton        2
Reston            2
San Francsico     2
San Diego         2
Name: name, dtype: int64