In [11]:
import os
import pandas as pd
from dotenv import load_dotenv
from pymongo import MongoClient
from bs4 import BeautifulSoup
import geocoder
import requests
import folium
import geopy.distance
load_dotenv()


def connectCollection(database, collection):
    client = MongoClient()
    db = client[database]
    coll = db[collection]
    return db, coll

db, austin = connectCollection('companies','companies')
db, start = connectCollection('companies','startups')
db, filtered = connectCollection('companies','filtered')

In [12]:
# encontramos todas las empresas con oficinas en Boston, MA
# estas empresas deben seguir existiendo (deadpooled==None)
# debemos saber el año en el que fueron fundadas (founded_year==exists)


austin_comp = list(austin.find({"$and":[{"offices.city":"Boston"},{"deadpooled_year":None},
                                        {"founded_year":{"$exists":True}}]}))



In [13]:
# creamos una nueva coleccion con los datos filtrados
# filtramos por el año en el que fueron fundadas (no mas de 10 años)
# separamos las empresas en sus distintas oficinas


for y in austin_comp:
    if y["founded_year"]==None or y["founded_year"]>2009:
        pass
    else:
        for of in range(len(y["offices"])):
            if y["offices"][of]["city"]=="Boston":
                db.filtered.insert_one(
                {"name":y["name"],"homepage_url":y["homepage_url"],"category_code":y["category_code"],
                "number_of_employees":y["number_of_employees"],"founded_year":y["founded_year"],
                "total_money_raised":y["total_money_raised"],"offices":y["offices"][of]})

In [14]:
# sacamos la posicion de todas las empresas posibles de coleccion filtrada
# guardamos esta posicion en formato GEOJSON

def getLocation(company):
    longitude = company["offices"]["longitude"]
    latitude = company["offices"]["latitude"]
    try:
        loc = {
            'type':'Point',
            'coordinates':[float(longitude), float(latitude)]
        }
        return loc
    except:
        pass
    
filter_list = filtered.find()
for i in filter_list:
    value = {"$set": {'location':getLocation(i)}}
    filtered.update_one(i,value)


In [15]:
# de angel.co sacamos un csv filtrado de startups en Boston que han ganado entre 2 y 100 M
# https://angel.co/
# subimos ese csv a mongo db compass desde la terminal
# $ mongoimport --db companies --collection companies --file startups.csv

startups = list(start.find())
startups = [startups[s] for s in range(len(startups)) if startups[s]["Total Raised"]>1000000]

In [16]:
# funcion para geolocalizar direcciones

def place_request(direction):
    if not os.getenv("google"):
        raise ValueError("No API token!")
    else:
        g = geocoder.google(direction,key=os.getenv("google"))
        return g.json

In [22]:
# obtenemos la direccion de todas las startups en Boston
# buscamos por nombre y estado

startup_loc = []
for startup in startups:
    x = place_request(startup["Name"]+",MA")
    try:
        latitude = x["raw"]["geometry"]["location"]["lat"]
        longitude = x["raw"]["geometry"]["location"]["lng"]
        startup_loc.append([latitude,longitude])
    except:
        pass

In [23]:
# obtenemos la direccion de todas las compañias filtradas de GEOJSON

companies = []
filtered_c = list(filtered.find())
for s in range(len(filtered_c)):
    try:
        companies.append(filtered_c[s]["location"]["coordinates"])
    except:
        pass

In [290]:
# https://www.kaggle.com/crawford/boston-public-schools
# CSV con los colegios publicos en Boston de los que sacamos la localizacion

schools = pd.read_csv("public_schools.csv")
school_loc = []
for s in range(len(schools)):
    longitude = schools["X"][s]
    latitude = schools["Y"][s]
    school_loc.append([latitude,longitude])

In [286]:
def request_json(url):
    res = requests.get(url).json()
    return res["results"]

def getPosition(results):
    output = []
    for i in range(len(results)):
        latitude = results[i]["geometry"]["location"]["lat"]
        longitude = results[i]["geometry"]["location"]["lng"]
        output.append([latitude,longitude])
    return output

def request_soup(url,search_class):
    output = []
    res = requests.get(url)
    soup = BeautifulSoup(res.content)
    bars = soup.find_all(search_class)
    for b in range(20):
        club = bars[b].text.strip()
        url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query={club}+Boston&key={os.getenv('google')}"
        output.append(request_json(url))
    return output

    
### starbucks search
url_starbucks = (f"https://maps.googleapis.com/maps/api/place/textsearch/json?query=starbucks+in+Boston&key={os.getenv('google')}")
starbucks_info = request_json(url_starbucks) 
starbucks = getPosition(starbucks_info)

### night search
url_night = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query=night+bars+Boston&key={os.getenv('google')}"
night_info = request_json(url_starbucks) 
night = getPosition(starbucks_info)
    

In [294]:
# we set the initial location as the center of Austin, from googlemaps

#city = place_request("boston,MA")
#airport = place_request("boston airport")
#air_coord = airport.get('lat'),airport.get('lng')

map_city=folium.Map(location=[city.get("lat"),city.get("lng")], zoom_start=13.45)

for school in school_loc:
    folium.Marker(school,popup="School "+str(school),icon=folium.Icon(icon="home",color='blue')).add_to(map_city)

for start_loc in startup_loc:
    folium.Marker(start_loc,popup="Startup "+str(start_loc), icon=folium.Icon(color='green')).add_to(map_city)

folium.Marker(air_coord,popup="Airport "+str(air_coord),icon=folium.Icon(icon='cloud',color='black')).add_to(map_city)
    
for c in companies:
    folium.Marker(c[::-1],popup="Company "+str(c),icon=folium.Icon(color='red')).add_to(map_city)

for star in starbucks:
    folium.Marker(star,popup="Starbucks "+str(c),icon=folium.Icon(icon='cloud',color='pink')).add_to(map_city)
    
for club in night:
    folium.Marker(club,popup="Night club "+str(c),icon=folium.Icon(color='purple')).add_to(map_city)



In [None]:
def minDist(air_coord,companies):
    position = 0
    for i in range(len(companies)):
        c1 = companies[i][::-1]
        c2 = air_coord 
        min_dist = geopy.distance.vincenty(c1,c2).km
        dist = geopy.distance.vincenty(c1,c2).km
        if min_dist>dist:
            min_dist=dist
            position = i
    return min_dist, position, companies[i][::-1]
        
minDist(air_coord,companies)

In [297]:
display(map_city)