In [1]:
from pymongo import MongoClient
import pandas as pd
import pymongo
import folium
from folium import plugins
from folium.plugins import MiniMap
from folium.plugins import FastMarkerCluster
import numpy as np
import re
import requests as req

In [2]:
#Connecting the database with the queried data (geo_offices)

client = MongoClient('mongodb://localhost:27017/')
db = client.DBcompanies_cb
data = db.companies_clean.find()

In [3]:
#Creating the dataframe

df_comp = pd.DataFrame(data)
df_comp.head()
len(df_comp)

1726

In [4]:
print(df_comp.shape)
print(df_comp.index)

(1726, 10)
RangeIndex(start=0, stop=1726, step=1)


In [5]:
#Ordering the columns and dropping column id.

data_comp = df_comp[['name', 'lat', 'lng', 'geopoint', 'number_of_employees','amount_raised_k$','category_code', 'wealth', 'news_agencies']]
data_comp.head()


Unnamed: 0,name,lat,lng,geopoint,number_of_employees,amount_raised_k$,category_code,wealth,news_agencies
0,Geni,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,...",18,16500.0,web,0.296509,0
1,Scribd,37.789634,-122.404052,"{'type': 'Point', 'coordinates': [-122.404052,...",50,25800.0,news,0.861548,1
2,Wetpaint,40.723731,-73.996431,"{'type': 'Point', 'coordinates': [-73.9964312,...",47,39800.0,web,0.844415,0
3,Wetpaint,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,...",47,39800.0,web,0.844415,0
4,MeetMoi,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ...",15,5580.0,social,0.219505,0


In [6]:
#Querying near companies

def nearComps(df, rad_max_meters=1000):
    lst=[]
    for i in range(len(df)): 
        near=db.companies_clean.find({'$and':[{
                        "geopoint": {
                            "$near": {
                            "$geometry": df[i],
                            "$maxDistance": rad_max_meters
                            }
                        }
                    }]})
        data=pd.DataFrame(near)
        lst.append(data.shape[0])
    return lst

data_comp['offices_near']=nearComps(data_comp["geopoint"])

In [7]:
#Querying near news_agencies

def nearNews(df, rad_max_meters=1000):
    lst=[]
    for i in range(len(df)): 
        near=db.companies_clean.find({'$and':[{
                        "geopoint": {
                            "$near": {
                            "$geometry": df[i],
                            "$maxDistance": rad_max_meters
                            }
                        }},{
                            'news_agencies': 1
                        }]})
                    
        data=pd.DataFrame(near)
        lst.append(data.shape[0])
    return lst

data_comp['news_agencies']=nearNews(data_comp["geopoint"])


In [8]:
#Normalizing near offices and near news_agencies (I weighted it to a 10% value)

def normalizator (df):
    return df/df.max()

data_comp['offices_near'] = normalizator(data_comp['offices_near'])
data_comp['news_agencies'] = normalizator(data_comp['news_agencies'])*(-0.1)

In [9]:
#Summing up wealth, news_agencies and offices_near to obtain a final score.

def final_score(df, col1, col2, col3):
    return df[[col1, col2, col3]].sum(axis = 1)

data_comp['final_score'] = final_score(data_comp, 'wealth', 'news_agencies', 'offices_near')

In [16]:
len(data_comp)

1726

In [24]:
data_comp.sort_values('final_score', ascending = False).head(1000)

Unnamed: 0,name,lat,lng,geopoint,number_of_employees,amount_raised_k$,category_code,wealth,news_agencies,offices_near,final_score
881,Wishabi,40.744549,-73.988071,"{'type': 'Point', 'coordinates': [-73.988071, ...",50,16000.00,ecommerce,0.821025,-0.00,0.764706,1.585731
1,Scribd,37.789634,-122.404052,"{'type': 'Point', 'coordinates': [-122.404052,...",50,25800.00,news,0.861548,-0.05,0.725490,1.537038
50,Echo,37.786942,-122.401245,"{'type': 'Point', 'coordinates': [-122.401245,...",50,4800.00,enterprise,0.718912,-0.05,0.843137,1.512049
1119,ReachForce,40.752539,-73.987871,"{'type': 'Point', 'coordinates': [-73.987871, ...",40,14500.00,enterprise,0.650141,-0.00,0.823529,1.473670
448,Pivot,37.787646,-122.402759,"{'type': 'Point', 'coordinates': [-122.402759,...",40,13100.00,software,0.643252,-0.05,0.803922,1.397173
1166,CrowdFlower,37.795141,-122.401194,"{'type': 'Point', 'coordinates': [-122.401194,...",49,13200.00,enterprise,0.788615,-0.05,0.647059,1.385674
1428,SpaBooker,37.778991,-122.401803,"{'type': 'Point', 'coordinates': [-122.401803,...",42,14700.00,software,0.683624,-0.00,0.666667,1.350291
83,Worklight,37.786906,-122.397672,"{'type': 'Point', 'coordinates': [-122.397672,...",30,17600.00,mobile,0.497465,-0.05,0.901961,1.349426
209,Replay Solutions,40.752143,-73.990675,"{'type': 'Point', 'coordinates': [-73.990675, ...",30,15200.00,software,0.490005,-0.00,0.745098,1.235103
476,Lat49,37.787076,-122.399412,"{'type': 'Point', 'coordinates': [-122.399412,...",24,3800.00,advertising,0.335567,-0.05,0.921569,1.207136


In [17]:
# top1000 = data_comp.sort_values('final_score', ascending = False).head(1000)

In [20]:
# Heatmap
latlng = data_comp[['lat', 'lng']].values

# m = folium.Map(location=[40, -45], zoom_start=4)
# m.add_child(plugins.HeatMap(latlng, radius=20))
# m

In [21]:
#Cluster map

m2 = folium.Map(zoom_start=15)
FastMarkerCluster(latlng).add_to(m2)
m2


In [23]:
# from pandas.io.json import json_normalize
# json_normalize(data_comp['geopoint'])

In [30]:
# def findNear(df,radio_max_meters = 2000):
#     location = db.geo_offices.find({'main_office':{
#         '$near':{
#             '$geometry':df,
#             '$maxDistance': radio_max_meters,
#         }
#     }
#     })
#     df1 = pd.DataFrame(location)
#     return len(df1)
# near_offices = findNear(data_comp['main_office'][0])


def find_old(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_young':0
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista


OperationFailure: error processing query: ns=DBcompanies_cb.geo_officesTree: GEONEAR  field=geo_point maxdist=2000 isNearSphere=0
Sort: {}
Proj: {}
 planner returned error: unable to find index for $geoNear query

In [27]:
def findNear(df, radio_max_meters = 20):
    lst =[]
    for i in range(len(df)):
        location = db.data_companies_clean.find({'main_office':{
                                                 '$near': {
                                                 '$geometry': df[i],
                                                 '$maxDistance': radio_max_meters,
                                                 }
                                                }
                                                })
        
        df2 = pd.DataFrame(location)
        lst.append(len(df2))    
    return lst

near_offices = findNear(data_comp['main_office'])
len(near_offices)

OperationFailure: error processing query: ns=DBcompanies_cb.geo_officesTree: GEONEAR  field=geo_point maxdist=20 isNearSphere=0
Sort: {}
Proj: {}
 planner returned error: unable to find index for $geoNear query

In [None]:
data_comp['Near offices'] = pd.DataFrame(near_offices)

In [None]:
data_comp.head()

In [None]:
base_map = folium.Map(zoom_start=15)

In [None]:
a = list(zip(data_comp['lat'], data_comp['lng']))
for i in a:
    b = folium.Marker(location=[i[0],i[1]]).add_to(base_map) 
    
base_map

In [None]:
from folium.plugins import FastMarkerCluster
base_map_two = folium.Map(zoom_start=15)

FastMarkerCluster(data=list(zip(data_comp['lat'].values, data_comp['lng'].values))).add_to(base_map_two)
base_map_two



In [None]:
import json
import requests
from dotenv import load_dotenv
import os

In [None]:
load_dotenv()
if not "KEYG" in os.environ:
    raise ValueError("You should pass a Google Key")
    
google_key = os.environ["KEYG"]

In [None]:
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch'
res = requests.get("{}/json?location=34.017606,-118.487267&radius=1500&type=school&key={}".format(BASE_URL, google_key))





In [None]:
locat = []
for i in a:
    locat.append(i)



In [None]:
asd = []
for i in locat:
    asd.append(requests.get("{}/json?location={},{}&radius=1500&type=school&key={}".format(BASE_URL, i[0], i[1], google_key)))
                 


In [None]:
asd_jsons = []
for i in asd:
    asd_jsons.append(i.json())


In [None]:
#https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=-33.8670522,151.1957362&radius=1500&type=restaurant&keyword=cruise&key=YOUR_API_KEY

In [None]:
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='


In [None]:
res = requests.get('{}-33.8670522,151.1957362&radius=1500&type=restaurant&keyword=cruise&key={}'.format(BASE_URL, google_key)


In [None]:
BASE_URL = “https://maps.googleapis.com/maps/api/place/nearbysearch”