In [1]:
#import libraries
from pymongo import MongoClient
import pandas as pd

In [2]:
#connection to MongoDB local client
def connect_mongo():
    client = MongoClient ('localhost', 27017)
    db = client['companies_clean']
    return db

# Query to MongoDB to get the companies within a radius
def geoquery(lat, long, radius=800):
    db = connect_mongo()
    loc = {"lng": long, "lat": lat}

    result = pd.DataFrame(db.comps.find({
        "coord": {
         "$near": {
           "$geometry": {
              "type": "Point" ,
              "coordinates": [ loc["lng"] , loc["lat"] ]
           },
           "$maxDistance": radius, # In meters 
         }
        }}))
    return result


# Adds columns of employees and money in the area surrounding the company
def add_money_emp():
    db = connect_mongo()
    all_comps = pd.DataFrame(db.comps.find())
    all_comps = all_comps.drop(columns='_id')
    #all_comps = all_comps.loc[all_comps.astype(str).drop_duplicates().index]

    money = []
    employees = []
    for i in range(len(all_comps)):#
        comp = geoquery(all_comps['latitude'][i],all_comps['longitude'][i]).sum(axis = 0, skipna = True)
        try:
            money.append(comp['total_money_raised'])
        except:
            money.append(None)
        try:
            employees.append(comp['number_of_employees'])
        except:
            employees.append(None)
            
    all_comps['employees_neighborhood'] = employees      
    all_comps['money_neighborhood'] = money
    return all_comps

# Gets relevant info about the companys location and sorts the results.
def get_neighborhood(companies, sortby, r):
    companies = companies.sort_values(by=sortby, ascending=False)
    companies = companies.reset_index(drop=True)
    coords = {'lat': companies['latitude'][0],
                'lng': companies['longitude'][0]}
    return geoquery(coords['lat'], coords['lng'], r)

In [4]:
# Runs the functions and saves the querys as csv files for Tableau
companies = add_money_emp()
display(companies.head())
get_neighborhood(companies, 'money_neighborhood', 5000).to_csv('money_companies.csv')
get_neighborhood(companies, 'employees_neighborhood', 5000).to_csv('employees_companies.csv')
get_neighborhood(companies, 'employees_neighborhood', 5000).to_csv('employees_companies.csv')


Unnamed: 0,category_code,coord,founded_year,latitude,longitude,name,number_of_employees,raised_amount,raised_currency_code,total_money_raised,employees_neighborhood,money_neighborhood
0,web,"[-122.333253, 47.603122]",2005,47.603122,-122.333253,Wetpaint,47,5250000,USD,39800000.0,1033.0,318315000.0
1,news,"[-122.394523, 37.764726]",2004,37.764726,-122.394523,Digg,60,8500000,USD,45000000.0,416.0,188600000.0
2,web,"[-118.393064, 34.090368]",2006,34.090368,-118.393064,Geni,18,1500000,USD,16500000.0,18.0,16500000.0
3,news,"[-122.404052, 37.789634]",2007,37.789634,-122.404052,Scribd,50,12000,USD,25800000.0,3387.0,1368430000.0
4,social,"[-73.985506, 40.757929]",2007,40.757929,-73.985506,MeetMoi,15,1500000,USD,5580000.0,940.0,349890000.0
