In [18]:
import pandas as pd
from pymongo import MongoClient
import pprint
import re
import numpy as np
import folium
import os
import requests
from dotenv import load_dotenv
load_dotenv()

if not "KEY" in os.environ:
    raise ValueError("You should pass a KEY")
KEY = os.environ["KEY"]

In [19]:
client = MongoClient('mongodb://localhost:27017/')
db = client.companies

In [20]:
companies = db.data_companies_clean.find({ 
    "total_money_raised":{
        "$gte": 1000000
    }
}
)  

#Primero filtro en las ganancias. Queremos empresas con al menos 1000000 de ingresos

In [21]:
data_companies = pd.DataFrame(companies)

In [22]:
data_companies[data_companies['total_money_raised']==data_companies['total_money_raised'].max()]

#Para hacer una primera aproximación veo qué empresa tiene más dinero.

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,total_money_raised,geoDescription,latitude,longitude,city,country
216,5dd655028accc5aeaf32f7f6,Groupon,tech,10000.0,2008,114999999999,"{'type': 'Point', 'coordinates': [-87.643622, ...",41.896516,-87.643622,Chicago,USA


In [23]:
list_geo = []
for e in range(len(data_companies)):
    list_geo.append(data_companies.geoDescription[e]) #Lista con todas las geoDescription

In [24]:
#Función para saber el número de empresas que hay alrededor de cada una de ellas.

def findNear(list_geo, radio_meters):
        geopoint = list_geo
        return list(db.data_companies_clean.find({
        "geoDescription": {
         "$near": {
             "$geometry": geopoint,
             "$maxDistance": radio_meters
         }
       }
    }
    )
    ) 

radio_meters = 5000
list_number_offices = []
list_offices=[]
for i in range(len(data_companies)):
    num_offices =  findNear(list_geo[i], radio_meters)
    list_offices.append(num_offices)
    list_number_offices.append(len(num_offices))
data_companies['number of offices near'] = list_number_offices

In [25]:
data_companies.head()

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,total_money_raised,geoDescription,latitude,longitude,city,country,number of offices near
0,5dd655028accc5aeaf32f6c0,AdaptiveBlue,tech,15.0,2007,24000000,"{'type': 'Point', 'coordinates': [-74.3372, 40...",40.801358,-74.3372,NYC,USA,1
1,5dd655028accc5aeaf32f6c1,Livestream,tech,120.0,2007,14700000,"{'type': 'Point', 'coordinates': [-73.995625, ...",40.726155,-73.995625,New York,USA,42
2,5dd655028accc5aeaf32f6c2,Seesmic,tech,13.0,2007,16000000,"{'type': 'Point', 'coordinates': [-122.419204,...",37.775196,-122.419204,San Francisco,USA,80
3,5dd655028accc5aeaf32f6c5,MeetMoi,social,15.0,2007,5580000,"{'type': 'Point', 'coordinates': [-73.985506, ...",40.757929,-73.985506,New York City,USA,45
4,5dd655028accc5aeaf32f6c7,Scrybe,enterprise,20.0,2007,1310000,"{'type': 'Point', 'coordinates': [-74.761403, ...",40.270666,-74.761403,San Francisco,USA,1


In [26]:
data_companies['money by offices'] = data_companies['total_money_raised']/data_companies['number of offices near']

#Media de dinero por oficinas y creo nueva columna

In [27]:
def sumEmployees(list_offices):
    list_employee = []
    res=0
    for company in list_offices:
        lst=[]
        for i in company:
            lst.append(i['number_of_employees'])
        res+=i['number_of_employees']
        list_employee.append(lst)
    result = [sum(b) for b in list_employee]
    return result

#función para calcular la suma de empleados por oficina

In [28]:
data_companies['employee by offices'] = sumEmployees(list_offices)/data_companies['number of offices near']

In [29]:
data_companies = data_companies[data_companies['number of offices near'] > 1]

#Quiero que al menos haya una oficina cerca

In [30]:
#Unifico los valores para poder hacer el ranking 

data_companies['Rank_employee'] = data_companies['employee by offices'].rank()
data_companies['Rank_money'] = data_companies['money by offices'].rank()
data_companies['Rank_offices'] = data_companies['number of offices near'].rank()

data_companies.head()

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,total_money_raised,geoDescription,latitude,longitude,city,country,number of offices near,money by offices,employee by offices,Rank_employee,Rank_money,Rank_offices
1,5dd655028accc5aeaf32f6c1,Livestream,tech,120.0,2007,14700000,"{'type': 'Point', 'coordinates': [-73.995625, ...",40.726155,-73.995625,New York,USA,42,350000.0,50.0,206.0,118.5,237.0
2,5dd655028accc5aeaf32f6c2,Seesmic,tech,13.0,2007,16000000,"{'type': 'Point', 'coordinates': [-122.419204,...",37.775196,-122.419204,San Francisco,USA,80,200000.0,52.575,251.0,80.0,292.0
3,5dd655028accc5aeaf32f6c5,MeetMoi,social,15.0,2007,5580000,"{'type': 'Point', 'coordinates': [-73.985506, ...",40.757929,-73.985506,New York City,USA,45,124000.0,46.711111,187.0,51.0,251.5
5,5dd655028accc5aeaf32f6c8,Wakoopa,tech,8.0,2007,1000000,"{'type': 'Point', 'coordinates': [4.8948623, 5...",52.374523,4.894862,Amsterdam,NLD,6,166666.7,12.0,19.5,67.5,128.0
6,5dd655028accc5aeaf32f6c9,Dropbox,tech,350.0,2007,257000000,"{'type': 'Point', 'coordinates': [-122.4084994...",37.790943,-122.408499,San Francisco,USA,80,3212500.0,52.575,251.0,272.0,292.0


In [31]:
data_companies['ranking_total'] =  data_companies['Rank_money']*0.8 + data_companies['Rank_employee']*0.6 + data_companies['Rank_offices']*0.4

#Hago un ranking total

In [32]:
data_companies = data_companies.sort_values(['ranking_total'], ascending=False)
data_companies.head()

#Ordeno por ranking total

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,total_money_raised,geoDescription,latitude,longitude,city,country,number of offices near,money by offices,employee by offices,Rank_employee,Rank_money,Rank_offices,ranking_total
216,5dd655028accc5aeaf32f7f6,Groupon,tech,10000.0,2008,114999999999,"{'type': 'Point', 'coordinates': [-87.643622, ...",41.896516,-87.643622,Chicago,USA,9,12777780000.0,1126.666667,322.5,328.0,151.0,516.3
47,5dd655028accc5aeaf32f6fb,Zynga,tech,115.0,2007,860000000,"{'type': 'Point', 'coordinates': [-122.404234,...",37.765158,-122.404234,San Francisco,USA,81,10617280.0,52.148148,221.0,315.0,322.0,513.4
15,5dd655028accc5aeaf32f6d3,Ooyala,tech,300.0,2007,122000000,"{'type': 'Point', 'coordinates': [-122.084171,...",37.386665,-122.084171,Mountain View,USA,18,6777778.0,54.055556,278.5,304.0,215.0,496.3
370,5dd655028accc5aeaf32f8dd,Sunrun,tech,180.0,2007,337000000,"{'type': 'Point', 'coordinates': [-122.4041075...",37.787131,-122.404107,San Francisco,USA,80,4212500.0,52.575,251.0,285.0,292.0,495.4
342,5dd655028accc5aeaf32f8b6,HootSuite,social,370.0,2008,190000000,"{'type': 'Point', 'coordinates': [-123.0952381...",49.285173,-123.095238,Vancouver,CAN,6,31666670.0,78.5,307.5,324.0,128.0,494.9


In [33]:
BASE_URL = "https://maps.googleapis.com/maps/api/place/nearbysearch"

In [34]:
res = requests.get("{}/json?location=49.285173,-123.095238&radius=1500&type=school&key={}".format(BASE_URL, KEY)).json()

In [35]:
#Con la Api de google busco las escuelas cerca sobre la empresa localizada en Vancouver que es la primera según ranking

coordinates_schools_latitude = []
coordinates_schools_longitude = []
name_schools = []
for i in range(len(res['results'])):
    coordinates_schools_latitude.append(res['results'][i]['geometry']['location']["lat"])
    coordinates_schools_longitude.append(res['results'][i]['geometry']['location']["lng"])
    name_schools.append(res['results'][i]['name'])
print(coordinates_schools_latitude, coordinates_schools_longitude, name_schools)

[49.28071019999999, 49.2724496, 49.2847903, 49.2803269, 49.2819276, 49.2811966, 49.283943, 49.2842096, 49.2786576, 49.2777529, 49.285232, 49.28302299999999, 49.2787275, 49.28409800000001, 49.2813601, 49.27962349999999, 49.28030099999999, 49.2843832, 49.28005579999999, 49.276186] [-123.1115084, -123.0957245, -123.1136753, -123.1060349, -123.1077491, -123.1083781, -123.1114593, -123.1094381, -123.0802777, -123.0983909, -123.1153523, -123.1127501, -123.093274, -123.0939572, -123.0924816, -123.1006497, -123.1025561, -123.1070491, -123.0867231, -123.0947707] ['Vancouver Community College', 'Eton College Canada', 'ILAC - International Language Academy of Canada', 'International House Vancouver - Modern Languages', 'London School', 'Kalev Fitness Solution', 'Cambridge Western Academy', 'GEOS Language Plus', 'Admiral Seymour Elementary School', 'Brandywine Bartending School', 'Sprott Shaw College Downtown Vancouver - Pender', 'Fine Art Bartending School Vancouver', 'Benedict Marsh: Composer | 

In [76]:
dicc = {'latitude_school':coordinates_schools_latitude,
       'longitude_school': coordinates_schools_longitude,
       'name_school': name_schools}

In [77]:
dataframe_schools = pd.DataFrame(dicc)

In [78]:
display(dataframe_schools)

Unnamed: 0,latitude_school,longitude_school,name_school
0,49.28071,-123.111508,Vancouver Community College
1,49.27245,-123.095725,Eton College Canada
2,49.28479,-123.113675,ILAC - International Language Academy of Canada
3,49.280327,-123.106035,International House Vancouver - Modern Languages
4,49.281928,-123.107749,London School
5,49.281197,-123.108378,Kalev Fitness Solution
6,49.283943,-123.111459,Cambridge Western Academy
7,49.28421,-123.109438,GEOS Language Plus
8,49.278658,-123.080278,Admiral Seymour Elementary School
9,49.277753,-123.098391,Brandywine Bartending School


In [79]:
data_companies.head()

Unnamed: 0,_id,name,category_code,number_of_employees,founded_year,total_money_raised,geoDescription,latitude,longitude,city,country,number of offices near,money by offices,employee by offices,Rank_employee,Rank_money,Rank_offices,ranking_total
216,5dd655028accc5aeaf32f7f6,Groupon,tech,10000.0,2008,114999999999,"{'type': 'Point', 'coordinates': [-87.643622, ...",41.896516,-87.643622,Chicago,USA,9,12777780000.0,1126.666667,322.5,328.0,151.0,516.3
47,5dd655028accc5aeaf32f6fb,Zynga,tech,115.0,2007,860000000,"{'type': 'Point', 'coordinates': [-122.404234,...",37.765158,-122.404234,San Francisco,USA,81,10617280.0,52.148148,221.0,315.0,322.0,513.4
15,5dd655028accc5aeaf32f6d3,Ooyala,tech,300.0,2007,122000000,"{'type': 'Point', 'coordinates': [-122.084171,...",37.386665,-122.084171,Mountain View,USA,18,6777778.0,54.055556,278.5,304.0,215.0,496.3
370,5dd655028accc5aeaf32f8dd,Sunrun,tech,180.0,2007,337000000,"{'type': 'Point', 'coordinates': [-122.4041075...",37.787131,-122.404107,San Francisco,USA,80,4212500.0,52.575,251.0,285.0,292.0,495.4
342,5dd655028accc5aeaf32f8b6,HootSuite,social,370.0,2008,190000000,"{'type': 'Point', 'coordinates': [-123.0952381...",49.285173,-123.095238,Vancouver,CAN,6,31666670.0,78.5,307.5,324.0,128.0,494.9


In [80]:
data_companies = data_companies.reset_index(drop=True)

In [81]:
data_companies.to_csv('./data_companies_select.csv')

In [82]:
res = requests.get("{}/json?location=49.285173,-123.095238&radius=1500&type=cafe&key={}&name=Starbucks".format(BASE_URL, KEY))

In [83]:
res = res.json()

In [84]:
coordinates_starbucks_latitude = []
coordinates_starbucks_longitude = []
for i in range(len(res['results'])):
    coordinates_starbucks_latitude.append(res['results'][i]['geometry']['location']["lat"])
    coordinates_starbucks_longitude.append(res['results'][i]['geometry']['location']["lng"])
print(coordinates_starbucks_latitude, coordinates_starbucks_longitude)

[49.2844427, 49.28460980000001, 49.2827536, 49.2856442, 49.27997, 49.2859207, 49.2730065, 49.2876188, 49.282976, 49.2767968, 49.2810692, 49.28765929999999, 49.271067, 49.2885143, 49.2844893, 49.27930010000001, 49.2820826, 49.2798197] [-123.108513, -123.1119558, -123.0856851, -123.111855, -123.1072445, -123.1151827, -123.0996974, -123.1155677, -123.11564, -123.1148211, -123.0739561, -123.1131222, -123.0877751, -123.1176877, -123.1134266, -123.1169541, -123.1184525, -123.1179367]


In [85]:
dicc_starbucks = {'latitude_starbucks':coordinates_starbucks_latitude,
       'longitude_starbucks': coordinates_starbucks_longitude}
dataframe_starbucks = pd.DataFrame(dicc_starbucks)
display(dataframe_starbucks)

Unnamed: 0,latitude_starbucks,longitude_starbucks
0,49.284443,-123.108513
1,49.28461,-123.111956
2,49.282754,-123.085685
3,49.285644,-123.111855
4,49.27997,-123.107244
5,49.285921,-123.115183
6,49.273007,-123.099697
7,49.287619,-123.115568
8,49.282976,-123.11564
9,49.276797,-123.114821


In [86]:
#Mapa con toda las compañías

In [87]:
map_companies = folium.Map(location=[51.510880, -0.141897], width=750, height=500, zoom_start=2)
for index, row in data_companies.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=3,
                        popup="Country: {}. Money: {}. Name:{}".format(row['country'], row['total_money_raised'], row['name']),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_companies)

map_companies.save('map_companies.html')
map_companies

In [88]:
#Mapa con las empresas del radio en la zona, escuelas cercanas (en rojo), Starbucks (verde) y la calle donde 

In [32]:
map_vancouver = folium.Map(location=[49.285173, -123.095238], width=750, height=500, zoom_start=15)
for index, row in data_companies.iterrows():
    folium.Marker([row['latitude'], row['longitude']],
                        radius=7,
                        popup="Name company: {}. Money: {}. Employees{}".format(row['name'], row['total_money_raised'], row['number_of_employees']),
                        icon=folium.Icon(color='red', icon='info-sign'),
                        fill_color="#F45649", 
                       ).add_to(map_vancouver)
for index, row in dataframe_schools.iterrows():
    folium.Marker([row['latitude_school'], row['longitude_school']],
                        radius=4,
                        popup="School: {}".format(row['name_school']),
                        icon=folium.Icon(icon='cloud'),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
for index, row in dataframe_starbucks.iterrows():
    folium.Marker([row['latitude_starbucks'], row['longitude_starbucks']],
                        radius=4,
                        icon=folium.Icon(color='darkgreen',icon='info-sign'),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
folium.Marker([49.282223,-123.109113],
                        radius=4,
                        icon=folium.Icon(color='pink',icon='info-sign'),
                        popup="Our company",
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
folium.Circle([49.282223, -123.109113],
                    radius=300
                   ).add_to(map_vancouver)

map_vancouver.save('map_vancouver.html')
map_vancouver