In [1]:
from pymongo import MongoClient
import pandas as pd
from near_funciones import getCompaniesNear, getEmployees, getMoney
import requests
import time
import os
from dotenv import load_dotenv
load_dotenv()


True

In [2]:
client = MongoClient("mongodb://localhost:27017/")

In [3]:
db = client.companies

In [4]:
collection_companies = db.clean_companies

In [5]:
query = collection_companies.find()

In [6]:
data = pd.DataFrame(query)

In [7]:
data.head()

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,deadpooled_year,description,latitude,longitude,country,state,city,monedas,total_amount_raised,geo
0,5d83a90255488441b3fff55a,Plaxo,web,50,2002.0,,Contact Management,37.387845,-122.055197,USA,CA,Sunnyvale,Dolares estadounidenses,28300000.0,"{'type': 'Point', 'coordinates': [-122.055197,..."
1,5d83a90255488441b3fff55b,Kyte,games_video,40,2006.0,,Online & Mobile Video Platform,37.788482,-122.409173,USA,CA,San Francisco,Dolares estadounidenses,23400000.0,"{'type': 'Point', 'coordinates': [-122.409173,..."
2,5d83a90255488441b3fff55c,Twitter,social,1300,2006.0,,Real time communication platform,37.776805,-122.416924,USA,CA,San Francisco,Dolares estadounidenses,1160000000.0,"{'type': 'Point', 'coordinates': [-122.4169244..."
3,5d83a90255488441b3fff55d,eBay,web,15000,1995.0,,Online Marketplace,37.295005,-121.930035,USA,CA,San Jose,Dolares estadounidenses,6700000.0,"{'type': 'Point', 'coordinates': [-121.930035,..."
4,5d83a90255488441b3fff55e,Jajah,mobile,110,2005.0,,IP Communications Platform,37.42339,-122.089951,USA,CA,Mountain View,Dolares estadounidenses,33000000.0,"{'type': 'Point', 'coordinates': [-122.0899512..."


In [8]:
data.shape

(200, 15)

In [9]:
# Creo una nueva columna con el número de empresas que están a un km de distancia
data["num_companies"] = data["geo"].apply(lambda x: getCompaniesNear(collection_companies, x))

In [10]:
# Creo una nueva columna con el número de empleados totales en un radio de un km de distancia

In [11]:
data["num_employees"] = data["geo"].apply(lambda x: getEmployees(collection_companies, x))

In [12]:
# Creo una nueva columna con el dinero total a un kilometro de distancia
data["total_money"] = data["geo"].apply(lambda x: getMoney(collection_companies, x))

In [13]:
data.head(3)

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,deadpooled_year,description,latitude,longitude,country,state,city,monedas,total_amount_raised,geo,num_companies,num_employees,total_money
0,5d83a90255488441b3fff55a,Plaxo,web,50,2002.0,,Contact Management,37.387845,-122.055197,USA,CA,Sunnyvale,Dolares estadounidenses,28300000.0,"{'type': 'Point', 'coordinates': [-122.055197,...",1,50,28300000.0
1,5d83a90255488441b3fff55b,Kyte,games_video,40,2006.0,,Online & Mobile Video Platform,37.788482,-122.409173,USA,CA,San Francisco,Dolares estadounidenses,23400000.0,"{'type': 'Point', 'coordinates': [-122.409173,...",14,1532,545170000.0
2,5d83a90255488441b3fff55c,Twitter,social,1300,2006.0,,Real time communication platform,37.776805,-122.416924,USA,CA,San Francisco,Dolares estadounidenses,1160000000.0,"{'type': 'Point', 'coordinates': [-122.4169244...",3,1487,1233400000.0


In [14]:
# San Francisco, Nueva York y Seattle son las tres ciudades que más empresas de videojuegos tienen.
# Selecciono la empresa de videojuegos que en cada una de estas tres ciudadesque ha conseguido más dinero. 
# Utilizo estas tres empresas como una posible referencia para situar la mía.
# Luego llamo a la api para buscar bares y cafes. Y después hago los dos mapas con Folium

In [16]:
video_games = data.loc[data["category_code"] == "games_video"]

In [17]:
san_francisco = video_games.loc[video_games["city"] == "San Francisco"]
new_york = video_games.loc[video_games["city"] == "New York"]
seattle = video_games.loc[video_games["city"] == "Seattle"]

In [80]:
# Oficina de San Francisco con más dinero
sf_office = san_francisco.loc[san_francisco["total_amount_raised"].idxmax()]

In [65]:
# Coordenadas de la oficina de San Francisco
sf_coord = (sf_office["longitude"], sf_office["latitude"])
sf_coord

(-122.404234, 37.765158)

In [70]:
# Oficina de Nueva York con más dinero
ny_office = new_york.loc[new_york["total_amount_raised"].idxmax()]

In [66]:
# Coordenadas de la oficina de Nueva York
ny_coord = (ny_office["longitude"], ny_office["latitude"])
ny_coord

(-73.97593, 40.752672)

In [68]:
# Oficina de Seattle con más dinero
s_office = seattle.loc[seattle["total_amount_raised"].idxmax()]

In [69]:
# Coordenadas de la oficina de Seattle
s_coord = (s_office["longitude"], s_office["latitude"])
s_coord

(-122.323408, 47.615313)

In [21]:
# Exporto el dataframe entero como un csv para visualizar los datos

In [22]:
data.to_csv("./output/data.csv", index=False)

In [23]:
# Extracción de datos de la Api

In [24]:
zomato_key = os.getenv("zomato_key")

In [25]:
def getVeganRestaurants(lat, lon):
    
    # Hacer una llamada a la api para buscar restaurantes veganos
    # Utilizo un radio de 1 km a la redonda
    # utilizo la clave 308, la cifra que se corresponde con los locales vegetarianos
    
    headers = {
        "user-key": "{}".format(zomato_key)
    }
    url = "https://developers.zomato.com/api/v2.1/search?lat={}&lon={}&radius=1000&cuisines=308".format(lat, lon)
    response = requests.get(url, headers=headers)
    response = response.json()
    return response

In [26]:
restaurants = data.apply(lambda x: getVeganRestaurants(x["latitude"], x["longitude"]), axis=1)

In [27]:
def getInfoRestaurants(data):
    
    # función para extraer la info del json que devuelve la api
    
    info_restaurants = []
    for rest in data:
        for element in rest["restaurants"]:
            restaurant_dict = {
                   "name": element["restaurant"]["name"],
                    "address": element["restaurant"]["location"]["address"],
                   "lat": element["restaurant"]["location"]["latitude"],
                   "lon": element["restaurant"]["location"]["longitude"],
                   "timings": element["restaurant"]["timings"]
               }
            info_restaurants.append(restaurant_dict)
    return info_restaurants

In [28]:
vegan_restaurants = pd.DataFrame(getInfoRestaurants(restaurants))

In [29]:
vegan_restaurants.to_csv("./output/vegan_restaurants.csv", index=False)

In [81]:
vegan_restaurants.head()

Unnamed: 0,name,address,lat,lon,timings
0,Merit Vegetarian,"548 Lawrence Expressway, Sunnyvale 94085",37.3849611111,-121.9949333333,11 AM to 9 PM (Mon-Sun)
1,Swathi Tiffins,"1202 Apollo Way, Sunnyvale 94085",37.379015,-121.994962,"10 AM to 10 PM (Mon-Fri),8:30 AM to 10 PM (Sat..."
2,Great Vegi Land,"562 S Murphy Avenue, Sunnyvale 94086",37.369241,-122.032456,"11 AM to 2:30 PM, 5 PM to 9 PM (Mon-Sun)"
3,Panchavati Kitchen Indian Vegetarian Restaurant,"460 Persian Dr, Sunnyvale 94089",37.405595,-122.013319,"4 PM to 8 PM (Mon),11 AM to 8 PM (Tue-Sun)"
4,City Kabob,"755 S Mathilda Avenue, Sunnyvale 94087",37.366032,-122.036183,"11 AM to 7 PM (Mon-Fri),11 AM to 5 PM (Sat-Sun)"


In [None]:
# llamada api para buscar bares

In [31]:
def getBares(lat, lon):
    # Hacer una llamada a la api para buscar bares
    # Utilizo un radio de 1 km a la redonda
    # utilizo la clave 3, la cifra que se corresponde con los locales de ocio nocturno
    
    headers = {
        "user-key": "{}".format(zomato_key)
    }
    url = "https://developers.zomato.com/api/v2.1/search?entity_type=zone&lat={}&lon={}&radius=1000&category=3".format(lat, lon)
    response = requests.get(url, headers=headers)
    response = response.json()
    return response

In [None]:
# Para agilizar las llamadas a la api solo busco por una localización concreta y no por las 200 empresas
#bares = data.apply(lambda x: getBares(x["latitude"], x["longitude"]), axis=1)

In [83]:
lon, lat = sf_coord
sf_bares = getBares(lat, lon)

In [88]:
sf_bares["restaurants"]

[{'restaurant': {'R': {'has_menu_status': {'delivery': -1, 'takeaway': -1},
    'res_id': 16843658},
   'apikey': '3d0f2fd0075d2cd6d2c5bea504648158',
   'id': '16843658',
   'name': 'Foreign Cinema',
   'url': 'https://www.zomato.com/san-francisco/foreign-cinema-mission-district?utm_source=api_basic_user&utm_medium=api&utm_campaign=v2.1',
   'location': {'address': '2534 Mission District Street, San Francisco 94110',
    'locality': 'Mission District',
    'city': 'San Francisco',
    'city_id': 306,
    'latitude': '37.7565305556',
    'longitude': '-122.4190444444',
    'zipcode': '94110',
    'country_id': 216,
    'locality_verbose': 'Mission District, San Francisco'},
   'switch_to_order_menu': 0,
   'cuisines': 'California, Seafood',
   'timings': '5:30 PM to 10 PM (Mon-Wed),5:30 PM to 11 PM (Thu-Fri),11 AM to 2:30 PM, 5:30 PM to 11 PM (Sat),11 AM to 2:30 PM, 5:30 PM to 10 PM (Sun)',
   'average_cost_for_two': 120,
   'price_range': 4,
   'currency': '$',
   'highlights': ['Dinne

In [91]:
def getInfoBares(data):
    
    # función para extraer la info del json que devuelve la api
    
    info_bares = []
    for bar in data["restaurants"]:
        bares_dict = {
                   "name": bar["restaurant"]["name"],
                    "address": bar["restaurant"]["location"]["address"],
                   "lat": bar["restaurant"]["location"]["latitude"],
                   "lon": bar["restaurant"]["location"]["longitude"],
                   "timings": bar["restaurant"]["timings"]
               }
        info_bares.append(bares_dict)
    return info_bares

In [92]:
data_sfbares = pd.DataFrame(getInfoBares(sf_bares))

In [93]:
data_sfbares

Unnamed: 0,name,address,lat,lon,timings
0,Foreign Cinema,"2534 Mission District Street, San Francisco 94110",37.7565305556,-122.4190444444,"5:30 PM to 10 PM (Mon-Wed),5:30 PM to 11 PM (T..."
1,Gary Danko,"800 North Point Street, San Francisco 94109",37.8057055556,-122.4205638889,5 PM to 12 Midnight (Mon-Sun)
2,Nopa,"560 Divisadero Street, San Francisco 94117",37.7748527778,-122.4378805556,"5 PM to 1 AM (Mon-Fri),11 AM to 2:30 PM, 5 PM ..."
3,The Buena Vista,"2765 Hyde Street, San Francisco 94109",37.806625,-122.4206611111,"9 AM to 2 AM (Mon-Fri),8 AM to 2 AM (Sat-Sun)"
4,Absinthe Brasserie & Bar,398 Hayes Street 94102,37.77697,-122.42294,"11:30 AM to 11 PM (Mon-Wed), 11:30 AM to 12 Mi..."
5,Tommy's Joynt,"1101 Geary Boulevard, San Francisco 94109",37.785519,-122.421811,10 AM to 2 AM
6,21st Amendment Brewery,"563 2nd Street, San Francisco 94107",37.782427,-122.392611,"11:30 AM to 12 Midnight (Mon-Sat),10 AM to 12 ..."
7,Thirsty Bear Brewing Company,"661 Howard Street, SOMA 94105",37.78556,-122.39964,"11:30 AM to 10 PM (Mon-Thu), 11:30 AM to 11 PM..."
8,Range,"842 Valencia Street, San Francisco 94110",37.7594305556,-122.4214555556,"Closed (Mon),6 PM to 10 PM (Tue-Thu),5:30 PM t..."
9,Johnny Foley's,"243 O'Farrell Street, San Francisco 94102",37.7864611111,-122.4088833333,11:30 AM to 1:30 AM (Mon-Sun)


In [None]:
# llamada api para buscar cafés

In [51]:
def getCafes(lat, lon):
    # Hacer una llamada a la api para buscar cafes
    # Utilizo un radio de 1 km a la redonda
    # Utilizo la clave 6, la cifra que se corresponde con los cafes
    
    headers = {
        "user-key": "{}".format(zomato_key)
    }
    url = "https://developers.zomato.com/api/v2.1/search?entity_type=zone&lat={}&lon={}&radius=1000&category=6".format(lat, lon)
    response = requests.get(url, headers=headers)
    response = response.json()
    return response

In [52]:
# cafes = data.apply(lambda x: getCafes(x["latitude"], x["longitude"]), axis=1)

In [None]:
# Exporto la información a un json para no tener que hacer más llamadas a la api

In [None]:
def getInfoCafes(data):
    
    # función para extraer la info del json que devuelve la api
    
    info_restaurants = []
    for rest in data:
        for element in rest["restaurants"]:
            restaurant_dict = {
                   "name": element["restaurant"]["name"],
                    "address": element["restaurant"]["location"]["address"],
                   "lat": element["restaurant"]["location"]["latitude"],
                   "lon": element["restaurant"]["location"]["longitude"],
                   "timings": element["restaurant"]["timings"]
               }
            info_restaurants.append(restaurant_dict)
    return info_restaurants

In [None]:
for index, row in starbucks_spain.iterrows():
    folium.CircleMarker([row['Latitude'], row['Longitude']],
                        radius=9,
                        popup="City: {}, latitude {}, longitude {}".format(row['City'], row['Latitude'], row['Longitude']),
                        fill_color="#F35C50",
                       ).add_to(map_starbucks)
    
    folium.Marker([latitude_restaurant, longitude_restaurant],
                        radius=2,
                        icon=folium.Icon(icon='cloud'), # Icono nube, hay más en la documentación
                        fill_color="#F35C50",
                       ).add_to(map_starbucks)
    folium.Circle(location=[40.42, -3.7],
                    radius=100
                   ).add_to(map_starbucks)
map_starbucks.add_child(MeasureControl())

In [None]:
# Visualizaciones

In [94]:
import folium

In [121]:
sf_map = folium.Map(location=[37.765158, -122.404234], tiles='Stamen Toner',zoom_start=14)

folium.Circle(
    radius=100,
    location=[37.765158, -122.404234],
    popup='Office reference',
    color='crimson',
    fill=False,
).add_to(sf_map)


for lat, lon in list(zip(data_sfbares["lat"], data_sf ["lon"])):
    folium.Marker([lat, lon], radius=2, icon=folium.Icon(icon="cloud"), fill_color="#F35C50").add_to(sf_map)

SyntaxError: invalid syntax (<ipython-input-121-bf22f443dd13>, line 12)

In [109]:
sf_map