In [125]:
# antes de empezar importar a mongo compass los archivos csv con los siguientes comandos:
# mongoimport --db companies --collection companies --file input/companies.json
# mongoimport --db companies --collection startups --input/startups.csv
# mongoimport --db companies --collection schools --input/public_schools.csv

import os
import pandas as pd
from dotenv import load_dotenv
from pymongo import MongoClient
from bs4 import BeautifulSoup
import geocoder
import requests
import folium
import geopy.distance
import json
from folium.plugins import MousePosition
from folium.plugins import Draw
load_dotenv()


def connectCollection(database, collection):
    client = MongoClient()
    db = client[database]
    coll = db[collection]
    return db, coll

db, boston = connectCollection('companies','companies')
db, start = connectCollection('companies','startups')
db, old_comp = connectCollection('companies','old_comp')
db, bankrupt = connectCollection('companies','bankrupt')
db, starbucks = connectCollection('companies','starbucks')
db, schools = connectCollection('companies','schools')
db, night = connectCollection('companies','night')

In [126]:
# encontramos todas las empresas con oficinas en Boston, MA
# estas empresas deben seguir existiendo (deadpooled==None)
# debemos saber el año en el que fueron fundadas (founded_year==exists)


boston_old = list(boston.find({"$and":[{"offices.city":"Boston"},
                                        {"deadpooled_year":None},
                                        {"founded_year":{"$exists":True}}]}))

# filtramos por el año en el que fueron fundadas (no mas de 10 años)
# separamos las empresas en sus distintas oficinas

for y in boston_old:
    if y["founded_year"]==None or y["founded_year"]>2009:
        pass
    else:
        for of in range(len(y["offices"])):
            if y["offices"][of]["city"]=="Boston":
                db.old_comp.insert_one(
                {"name":y["name"],"homepage_url":y["homepage_url"],"category_code":y["category_code"],
                "number_of_employees":y["number_of_employees"],"founded_year":y["founded_year"],
                "total_money_raised":y["total_money_raised"],"offices":y["offices"][of]})
            
                

In [127]:
boston_bankrupt = list(boston.find({"$and":[{"offices.city":"Boston"},{"deadpooled_year":{"$ne":None}}]}))

for y in boston_bankrupt:
    for of in range(len(y["offices"])):
        if y["offices"][of]["city"]=="Boston":
            db.bankrupt.insert_one(
            {"name":y["name"],"homepage_url":y["homepage_url"],"category_code":y["category_code"],                
             "number_of_employees":y["number_of_employees"],"deadpooled_year":y["deadpooled_year"],
                "total_money_raised":y["total_money_raised"],"offices":y["offices"][of]})
            
            
                

In [128]:
def getLocation(_lng,_lat):
    try:
        loc = {
            'type':'Point',
            'coordinates':[float(_lng), float(_lat)]
        }
        return loc
    except:
        pass

def createLocation(lng,lat,coll,i):
    geocode = {"$set": {'location':getLocation(lng,lat)}}
    coll.update_one(i,geocode)
    
def setLocation(coll):
    for i in list(coll.find()):
        lng = i["offices"].get("longitude")
        lat = i["offices"].get("latitude")
        coll.delete_one({"offices.longitude": None})
        createLocation(lng,lat,coll,i)
        


In [129]:
setLocation(old_comp)
setLocation(bankrupt)

In [130]:
# funcion para geolocalizar direcciones

def place_request(direction):
    if not os.getenv("google"):
        raise ValueError("No API token!")
    else:
        g = geocoder.google(direction,key=os.getenv("google"))
        return g.json
    

In [131]:
# de angel.co sacamos un csv filtrado de startups en Boston que han ganado entre 2 y 100 M

startups = list(start.find())
startups = [startups[s] for s in range(len(startups)-1) if int(startups[s]["Total Raised"])>1000000]

# obtenemos la direccion de todas las startups en Boston (nombre y estado)

for startup in startups[:-1]:
    #x = place_request(startup["Name"]+",MA")
    try:
        lat = x["raw"]["geometry"]["location"]["lat"]
        lng = x["raw"]["geometry"]["location"]["lng"]
        createLocation(lng,lat,start,startup)
    except:
        pass


In [132]:
# https://www.kaggle.com/crawford/boston-public-schools

school = list(schools.find())
for s in school:
    lng = s["\ufeffX"]
    lat = s["Y"]
    createLocation(lng,lat,schools,s)


In [133]:
def request_json(url):
    res = requests.get(url).json()
    return res["results"]

def getAdress(results):
    return results["formatted_address"]

def getPosition(results):
    lng = results["geometry"]["location"]["lng"]
    lat = results["geometry"]["location"]["lat"]
    return lng,lat
    

In [134]:
url = "https://maps.googleapis.com/maps/api/place/textsearch/json?query="

def insertMongo(coll,s,lng,lat):
    coll.insert_one({'location':getLocation(lng,lat),'adress':getAdress(s)})
    

### starbucks search
url_starbucks = url+f"starbucks+in+Boston&key={os.getenv('google')}"
#starbucks_info = request_json(url_starbucks)
for s in starbucks_info:
    lng,lat = getPosition(s)
    starbucks.insert_one({'location':getLocation(lng,lat),'adress':getAdress(s)})
    insertMongo(starbucks,s,lng,lat)

### night search
url_night = url+f"night+bars+Boston&key={os.getenv('google')}"
#night_info = request_json(url_night) 
for n in night_info:
    lng,lat = getPosition(n)
    insertMongo(night,n,lng,lat)

    

In [135]:
# we set the initial location as the center of Boston, from googlemaps

#city = place_request("boston,MA")
#airport = place_request("boston airport")

air_coord = [airport.get('lat'),airport.get('lng')]
city_coord = [city.get("lat"),city.get("lng")]


In [166]:
coll = [old_comp,bankrupt,starbucks,night,schools,start]
colls = []
for i in range(6):
    colls.append(list(coll[i].find({"location.coordinates":{"$exists":True}})))
    

In [152]:
 m = folium.Map(city_coord, zoom_start=15,tiles='cartodbpositron')
     
school_group = folium.FeatureGroup(name="Schools").add_to(m)
start_group = folium.FeatureGroup(name="Startups").add_to(m)
old_comp_group = folium.FeatureGroup(name="Old companies").add_to(m)
bankrupt_group = folium.FeatureGroup(name="Bankrupt companies").add_to(m)
starbucks_group = folium.FeatureGroup(name="Starbucks").add_to(m) 
night_group = folium.FeatureGroup(name="Night bars").add_to(m)
plane_group = folium.FeatureGroup(name="Airport").add_to(m)
    
    
for c in colls[0]:
    coord = c["location"]["coordinates"][::-1]
    icon=folium.Icon(color='red')
    old_comp_group.add_child(folium.Marker(coord,popup="Old company "+str(coord),icon=icon))   
    
for c in colls[1]:
    coord = c["location"]["coordinates"][::-1]
    icon=folium.Icon(color='blue',prefix='bankrupt companies')
    bankrupt_group.add_child(folium.Marker(coord,popup="bankrupt company "+str(coord),icon=icon)) 
    
for star in colls[2]:
    coord = star["location"]["coordinates"][::-1]
    icon=folium.Icon(color='darkgreen')
    starbucks_group.add_child(folium.Marker(coord,popup="Starbucks "+str(coord),icon=icon))

for club in colls[3]:
    coord = club["location"]["coordinates"][::-1]
    icon=folium.Icon(color='lightgreen')
    night_group.add_child(folium.Marker(coord,popup="Night bar "+str(coord),icon=icon))
    
for school in colls[4]:
    coord = school["location"]["coordinates"][::-1]
    icon=folium.Icon(color='green', icon='home', icon_color="lightblue")
    school_group.add_child(folium.Marker(coord,popup="School "+str(coord),icon=icon))

for start_loc in colls[5]:
    coord = start_loc["location"]["coordinates"][::-1]
    icon=folium.Icon(color='green')
    start_group.add_child(folium.Marker(coord,popup="Startup "+str(coord),icon=icon))

icon=folium.Icon(icon='cloud',color='lightblue', prefix='Airport')
plane_group.add_child(folium.Marker(air_coord,popup="Airport "+str(air_coord),icon=icon))
    
folium.LayerControl(collapsed=True).add_to(m)
folium.LatLngPopup().add_to(m)

draw = Draw(export=True)



formatter = "function(num) {return L.Util.formatNum(num, 3) + ' º ';};"
MousePosition(
    position='topright',
    separator=' | ',
    lng_first=True,
    prefix='Coordinates:',
    lat_formatter=formatter,
    lng_formatter=formatter,
).add_to(m)



draw.add_to(m)



<folium.plugins.draw.Draw at 0x11d26d450>

In [None]:
# crear geoindex desde mongo compass

In [198]:
def nearestPlaces(collection,selector,dist):
    conv_factor = 0.0001572065389467467/100
    x = collection.find({"location":
    {"$geoWithin":{"$centerSphere":[position,conv_factor*dist]}}})
    return list(x)

In [199]:
for pos in range(len(colls[1])):
    position = colls[1][pos]["location"].get("coordinates")
    for i in range(1,6):
        d = nearestPlaces(old_comp,position,i*100)