In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from pymongo import MongoClient
from bs4 import BeautifulSoup
import geocoder
import requests
import folium
import geopy.distance
import json
from folium.plugins import MousePosition


load_dotenv()


def connectCollection(database, collection):
    client = MongoClient()
    db = client[database]
    coll = db[collection]
    return db, coll

db, austin = connectCollection('companies','companies')
db, start = connectCollection('companies','startups')
db, filtered = connectCollection('companies','filtered')

In [2]:
# encontramos todas las empresas con oficinas en Boston, MA
# estas empresas deben seguir existiendo (deadpooled==None)
# debemos saber el año en el que fueron fundadas (founded_year==exists)


austin_comp = list(austin.find({"$and":[{"offices.city":"Boston"},{"deadpooled_year":None},
                                        {"founded_year":{"$exists":True}}]}))



In [3]:
# creamos una nueva coleccion con los datos filtrados
# filtramos por el año en el que fueron fundadas (no mas de 10 años)
# separamos las empresas en sus distintas oficinas


for y in austin_comp:
    if y["founded_year"]==None or y["founded_year"]>2009:
        pass
    else:
        for of in range(len(y["offices"])):
            if y["offices"][of]["city"]=="Boston":
                db.filtered.insert_one(
                {"name":y["name"],"homepage_url":y["homepage_url"],"category_code":y["category_code"],
                "number_of_employees":y["number_of_employees"],"founded_year":y["founded_year"],
                "total_money_raised":y["total_money_raised"],"offices":y["offices"][of]})
                

In [4]:
# sacamos la posicion de todas las empresas posibles de coleccion filtrada
# guardamos esta posicion en formato GEOJSON

def getLocation(company):
    longitude = company["offices"]["longitude"]
    latitude = company["offices"]["latitude"]
    try:
        loc = {
            'type':'Point',
            'coordinates':[float(longitude), float(latitude)]
        }
        return loc
    except:
        pass
    
filter_list = filtered.find()
for i in filter_list:
    value = {"$set": {'location':getLocation(i)}}
    filtered.update_one(i,value)


In [5]:
# de angel.co sacamos un csv filtrado de startups en Boston que han ganado entre 2 y 100 M
# https://angel.co/
# subimos ese csv a mongo db compass desde la terminal
# mongoimport --db companies --collection companies --file startups.csv

startups = list(start.find())
startups = [startups[s] for s in range(len(startups)) if startups[s]["Total Raised"]>1000000]


In [6]:
# funcion para geolocalizar direcciones

def place_request(direction):
    if not os.getenv("google"):
        raise ValueError("No API token!")
    else:
        g = geocoder.google(direction,key=os.getenv("google"))
        return g.json
    

In [7]:
# obtenemos la direccion de todas las startups en Boston
# buscamos por nombre y estado

startup_loc = []
for startup in startups:
    x = place_request(startup["Name"]+",MA")
    try:
        latitude = x["raw"]["geometry"]["location"]["lat"]
        longitude = x["raw"]["geometry"]["location"]["lng"]
        startup_loc.append([latitude,longitude])
    except:
        pass
    

In [8]:
# obtenemos la direccion de todas las compañias filtradas de GEOJSON

companies = []
filtered_c = list(filtered.find())
for s in range(len(filtered_c)):
    try:
        companies.append(filtered_c[s]["location"]["coordinates"])
    except:
        pass
    

In [9]:
# https://www.kaggle.com/crawford/boston-public-schools
# CSV con los colegios publicos en Boston de los que sacamos la localizacion

schools = pd.read_csv("public_schools.csv")
school_loc = []
for s in range(len(schools)):
    longitude = schools["X"][s]
    latitude = schools["Y"][s]
    school_loc.append([latitude,longitude])
    

In [10]:
def request_json(url):
    res = requests.get(url).json()
    return res["results"]

def getPosition(results):
    output = []
    for i in range(len(results)):
        latitude = results[i]["geometry"]["location"]["lat"]
        longitude = results[i]["geometry"]["location"]["lng"]
        output.append([latitude,longitude])
    return output

def request_soup(url,search_class):
    output = []
    res = requests.get(url)
    soup = BeautifulSoup(res.content)
    bars = soup.find_all(search_class)
    for b in range(20):
        club = bars[b].text.strip()
        url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query={club}+Boston&key={os.getenv('google')}"
        output.append(request_json(url))
    return output

    
### starbucks search
url_starbucks = (f"https://maps.googleapis.com/maps/api/place/textsearch/json?query=starbucks+in+Boston&key={os.getenv('google')}")
#starbucks_info = request_json(url_starbucks) 
#starbucks = getPosition(starbucks_info)

### night search
url_night = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query=night+bars+Boston&key={os.getenv('google')}"
#night_info = request_json(url_starbucks) 
#night = getPosition(starbucks_info)
    

In [None]:
# we set the initial location as the center of Austin, from googlemaps

#city = place_request("boston,MA")
#airport = place_request("boston airport")
#air_coord = airport.get('lat'),airport.get('lng')

In [360]:
m = folium.Map([city.get("lat"),city.get("lng")], zoom_start=15,tiles='cartodbpositron')
     
school_group = folium.FeatureGroup(name="Schools").add_to(m)
start_group = folium.FeatureGroup(name="Startups").add_to(m)
companies_group = folium.FeatureGroup(name="Companies").add_to(m)
starbucks_group = folium.FeatureGroup(name="Starbucks").add_to(m) 
night_group = folium.FeatureGroup(name="Night bars").add_to(m)
plane_group = folium.FeatureGroup(name="Airport").add_to(m)
    
for school in school_loc:
    icon=folium.Icon(color='darkblue', icon='home', icon_color="red", prefix='School')
    school_group.add_child(folium.Marker(school,popup="School "+str(school),icon=icon))

for start_loc in startup_loc:
    icon=folium.Icon(color='orange', prefix='startup')
    start_group.add_child(folium.Marker(start_loc,popup="Startup "+str(start_loc),icon=icon))
    
for c in companies:
    icon=folium.Icon(color='black',prefix='companies')
    companies_group.add_child(folium.Marker(c[::-1],popup="Old company "+str(c[::-1]),icon=icon))
    
for star in starbucks:
    icon=folium.Icon(color='darkgreen', prefix='starbucks')
    starbucks_group.add_child(folium.Marker(star,popup="Starbucks "+str(star),icon=icon))
    
#for club in night:
 #   icon=folium.Icon(color='purple', prefix='Night bar')
  #  start_group.add_child(folium.Marker(club,popup="Night bar "+str(club),icon=icon))
    
icon=folium.Icon(icon='cloud',color='lightblue', prefix='Airport')
plane_group.add_child(folium.Marker(air_coord,popup="Airport "+str(air_coord),icon=icon))
    
folium.LayerControl(collapsed=True).add_to(m)
folium.LatLngPopup().add_to(m)

draw = Draw(export=True)



formatter = "function(num) {return L.Util.formatNum(num, 3) + ' º ';};"
MousePosition(
    position='topright',
    separator=' | ',
    lng_first=True,
    prefix='Coordinates:',
    lat_formatter=formatter,
    lng_formatter=formatter,
).add_to(m)



draw.add_to(m)
m

In [342]:
def minDist(c1,c2):
    dist = geopy.distance.vincenty(c1,c2).km
    return f"Distance: {round(dist*100,2)} Km"
        
minDist([-71.056168,42.363618],[-71.0540,42.3644])

  


'Distance: 24.36 Km'