In [119]:
import pandas as pd
import numpy as np
import re
import requests as req
import os
from dotenv import load_dotenv
load_dotenv()
from pymongo import MongoClient
import folium
from folium import plugins
from folium.plugins import MiniMap


api_key = os.environ["KEY"] 

In [120]:
client = MongoClient('mongodb://localhost:27017/')
db = client.companies

In [121]:
df = db.companies

## Query Pymongo eligiendo empresas tecnológicas, con información sobre oficinas, con empleados, fundadas a partir de 2005 y que no hayan quebrado

In [122]:
companies_with_coord = pd.DataFrame(df.find(
    {'$and': 
     [
         {'$or':
          [
         {'category_code':'games_video'},
         {'category_code':'web'},
         {'category_code':'software'},
         {'category_code':'mobile'},
         {'category_code':'advertising'},
         {'category_code':'ecommerce'},
         {'category_code':'design'},
         {'category_code':'biotech'},
         {'category_code':'cleantech'},
         {'category_code':'travel'}
     ]
         },
         {"number_of_employees": 
          {'$not': 
           {'$eq': None}
          }
         },
         {"offices.latitude": 
          {'$exists': True}
         },
          {"founded_year": 
          {'$gte': 2005}
         },
         {"offices.longitude": 
          {'$exists': True}
         },
         {"offices.latitude": 
          {'$not': 
           {'$eq': None}
          }
         },
         {"offices.longitude": 
          {'$not': 
           {'$eq': None}
          }
         },
         {'deadpooled_year': 
             {'$eq' :None}
         }
     ]
    }))

In [123]:
companies_with_coord.shape

(2102, 42)

In [124]:
companies_with_coord['founded_year'].describe()

count    2102.000000
mean     2007.091342
std         1.204557
min      2005.000000
25%      2006.000000
50%      2007.000000
75%      2008.000000
max      2013.000000
Name: founded_year, dtype: float64

## Limpiando campo total_money_raised

In [125]:
letters=[]
for i in companies_with_coord['total_money_raised']:
    if re.findall('[^0-9.,]+',i) not in letters:
        letters.append(re.findall('[^0-9.,]+',i))
    

In [126]:
letters

[['$', 'M'],
 ['$'],
 ['$', 'k'],
 ['€', 'M'],
 ['€', 'k'],
 ['£', 'k'],
 ['£', 'M'],
 ['C$', 'k']]

In [127]:
dict_coin={
    '$' : 'USD',
    '£' : 'GBP',
    'C$' : 'CAD',
    'kr' : 'SEK',
    '€' : 'EUR'
}
coins= ['USD','GBP','CAD','SEK']

In [128]:
coindict={}
for i in coins:
    res = req.get('https://api.exchangeratesapi.io/latest')
    data = res.json()
    coindict[i] = data['rates'][i]

In [129]:
coindict

{'USD': 1.1226, 'GBP': 0.89635, 'CAD': 1.4655, 'SEK': 10.5195}

In [130]:
dict_quantity={
    '$': 1,
    'M': 1000000, 
    'B':1000000000, 
    'k': 1000, 
    '£':coindict['GBP']/coindict['USD'],  
    'C$':coindict['CAD']/coindict['USD'], 
    'kr':coindict['SEK']/coindict['USD'],
    '€':1/coindict['USD']
}

In [131]:
total_money_raised_normalized=[]
for i in companies_with_coord['total_money_raised']:
    x = float(re.findall('[0-9.]+',str(i))[0])
    y=1
    for j in re.findall('[^0-9.]+',str(i)):
        y = y* dict_quantity[j]
    total_money_raised_normalized.append(x*y)

In [132]:
len(total_money_raised_normalized)

2102

In [133]:
companies_with_coord['total_money_raised_normalized_USD']= total_money_raised_normalized

In [134]:
companies_with_coord['total_money_raised_normalized_USD'].head()

0    16500000.0
1    45000000.0
2    21000000.0
3    23400000.0
4    88700000.0
Name: total_money_raised_normalized_USD, dtype: float64

In [135]:
#companies_first_filter_2 = companies_first_filter[companies_first_filter['total_money_raised_normalized_USD']>=1000000]

In [136]:
#companies_first_filter_2.head()

## Duplicando registros de empresas con varias oficinas, creando un registro por cada oficina

In [137]:
companies_with_coord_intermediate=companies_with_coord.drop(columns='offices')

In [138]:
office_list=[]
for i in companies_with_coord['offices']:
    if not isinstance(i,list):
        office_list.append(list(i))
    else: office_list.append(i)

In [139]:
#companies_first_filter_3 = companies_first_filter_2.copy()
companies_with_coord['offices']=office_list

In [140]:
companies_with_coord['offices'].head()

0    [{'description': 'Headquarters', 'address1': '...
1    [{'description': '', 'address1': '100 5th Ave ...
2    [{'description': '', 'address1': '3525 Eastham...
3    [{'description': None, 'address1': '442 Post S...
4    [{'description': '', 'address1': '475 Park Ave...
Name: offices, dtype: object

In [141]:
companies_with_coord2 = pd.DataFrame(companies_with_coord.offices.tolist()).stack().reset_index().drop(columns='level_1')
companies_with_coord2.columns=['index','office']

In [142]:
display(companies_with_coord2.head())

Unnamed: 0,index,office
0,0,"{'description': 'Headquarters', 'address1': '9..."
1,1,"{'description': '', 'address1': '100 5th Ave F..."
2,2,"{'description': '', 'address1': '3525 Eastham ..."
3,3,"{'description': None, 'address1': '442 Post St..."
4,4,"{'description': '', 'address1': '475 Park Ave ..."


In [143]:
companies_with_coord.reset_index(inplace=True)

In [144]:
companies_with_coord.head()

Unnamed: 0,index,_id,acquisition,acquisitions,alias_list,blog_feed_url,blog_url,category_code,competitions,created_at,...,products,providerships,relationships,screenshots,tag_list,total_money_raised,twitter_username,updated_at,video_embeds,total_money_raised_normalized_USD
0,0,52cdef7c4bab8bd675297d91,"{'price_amount': None, 'price_currency_code': ...",[],,http://blog.geni.com/index.rdf,http://blog.geni.com,web,"[{'competitor': {'name': 'Ancestry', 'permalin...",Thu May 31 19:52:34 UTC 2007,...,"[{'name': 'Geni', 'permalink': 'geni'}]",[],"[{'is_past': False, 'title': 'CEO', 'person': ...",[],"geni, geneology, social, family, genealogy",$16.5M,geni,Wed Oct 10 14:01:29 UTC 2012,"[{'embed_code': '<object width=""425"" height=""3...",16500000.0
1,1,52cdef7c4bab8bd675297d9d,"{'price_amount': None, 'price_currency_code': ...",[],,http://blog.joost.com/atom.xml,http://blog.joost.com,games_video,"[{'competitor': {'name': 'Babelgum', 'permalin...",Sat Jun 09 07:05:30 UTC 2007,...,"[{'name': 'Joost', 'permalink': 'joost'}]",[],"[{'is_past': False, 'title': 'CEO', 'person': ...",[],"iptv, babelgum, television, video, thevenicepr...",$45M,,Fri Mar 15 23:31:11 UTC 2013,"[{'embed_code': '<object width=""425"" height=""3...",45000000.0
2,2,52cdef7c4bab8bd675297da9,,[],,http://blog.mahalo.com/feed/,http://blog.mahalo.com/,web,"[{'competitor': {'name': 'Topicle', 'permalink...",Thu Jun 14 03:42:20 UTC 2007,...,"[{'name': 'Mahalo', 'permalink': 'mahalo'}]",[],"[{'is_past': False, 'title': 'Founder, CEO', '...",[],"search, search-engine, human-powered-search, aaa",$21M,MahaloDotCom,Fri May 17 04:34:19 UTC 2013,"[{'embed_code': '<embed src=""http://blip.tv/pl...",21000000.0
3,3,52cdef7c4bab8bd675297dab,"{'price_amount': None, 'price_currency_code': ...",[],,http://feeds.feedburner.com/kyte_blog,http://kyte.com/blog,games_video,"[{'competitor': {'name': 'Ustream', 'permalink...",Thu Jun 14 18:26:11 UTC 2007,...,"[{'name': 'Kyte', 'permalink': 'kyte'}, {'name...",[],"[{'is_past': False, 'title': 'CTO and co-found...",[],"video, mobile, iphone-app, video-platform, mob...",$23.4M,kyte,Mon Oct 28 09:34:37 UTC 2013,"[{'embed_code': '<embed src=""http://blip.tv/pl...",23400000.0
4,4,52cdef7c4bab8bd675297daf,"{'price_amount': 62500000, 'price_currency_cod...",[],,,,mobile,[],Sat Jun 16 05:53:38 UTC 2007,...,"[{'name': 'MyFree411', 'permalink': 'myfree411'}]","[{'title': 'Public Relations', 'is_past': Fals...","[{'is_past': False, 'title': 'Founder and CTO'...",[],"directoryassistance, advertising, mobile",$88.7M,Marchex,Mon Jun 03 05:26:22 UTC 2013,[],88700000.0


In [145]:
companies_with_coord_offices= companies_with_coord2.merge(companies_with_coord, how='left', on='index')

In [146]:
companies_with_coord_offices.columns

Index(['index', 'office', '_id', 'acquisition', 'acquisitions', 'alias_list',
       'blog_feed_url', 'blog_url', 'category_code', 'competitions',
       'created_at', 'crunchbase_url', 'deadpooled_day', 'deadpooled_month',
       'deadpooled_url', 'deadpooled_year', 'description', 'email_address',
       'external_links', 'founded_day', 'founded_month', 'founded_year',
       'funding_rounds', 'homepage_url', 'image', 'investments', 'ipo',
       'milestones', 'name', 'number_of_employees', 'offices', 'overview',
       'partners', 'permalink', 'phone_number', 'products', 'providerships',
       'relationships', 'screenshots', 'tag_list', 'total_money_raised',
       'twitter_username', 'updated_at', 'video_embeds',
       'total_money_raised_normalized_USD'],
      dtype='object')

In [147]:
def getFirst(data):
    data = data['office']
#    return (len(data),data[0]['latitude'],data[0]['longitude'])

    # Only create the geoJSON object if all geodata is available
    principal = None
    if data['latitude'] and data['longitude']:
        principal = {
            "type":"Point",
            "coordinates":[data['longitude'], data['latitude']]
        }

    return {
        "lat": data['latitude'],
        "lng": data['longitude'],
        "oficina_principal": principal
    }

In [148]:
companies_with_coord_offices_sub = companies_with_coord_offices[["office"]].apply(getFirst, result_type="expand", axis=1)

In [149]:
companies_with_coord_offices_sub.head()

Unnamed: 0,lat,lng,oficina_principal
0,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,..."
1,40.746497,-74.009447,"{'type': 'Point', 'coordinates': [-74.0094471,..."
2,34.017606,-118.487267,"{'type': 'Point', 'coordinates': [-118.487267,..."
3,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
4,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."


In [150]:
companies_with_coord_offices_clean = pd.concat([companies_with_coord_offices,companies_with_coord_offices_sub], axis=1)

In [151]:
companies_with_coord_offices_clean.head()

Unnamed: 0,index,office,_id,acquisition,acquisitions,alias_list,blog_feed_url,blog_url,category_code,competitions,...,screenshots,tag_list,total_money_raised,twitter_username,updated_at,video_embeds,total_money_raised_normalized_USD,lat,lng,oficina_principal
0,0,"{'description': 'Headquarters', 'address1': '9...",52cdef7c4bab8bd675297d91,"{'price_amount': None, 'price_currency_code': ...",[],,http://blog.geni.com/index.rdf,http://blog.geni.com,web,"[{'competitor': {'name': 'Ancestry', 'permalin...",...,[],"geni, geneology, social, family, genealogy",$16.5M,geni,Wed Oct 10 14:01:29 UTC 2012,"[{'embed_code': '<object width=""425"" height=""3...",16500000.0,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,..."
1,1,"{'description': '', 'address1': '100 5th Ave F...",52cdef7c4bab8bd675297d9d,"{'price_amount': None, 'price_currency_code': ...",[],,http://blog.joost.com/atom.xml,http://blog.joost.com,games_video,"[{'competitor': {'name': 'Babelgum', 'permalin...",...,[],"iptv, babelgum, television, video, thevenicepr...",$45M,,Fri Mar 15 23:31:11 UTC 2013,"[{'embed_code': '<object width=""425"" height=""3...",45000000.0,40.746497,-74.009447,"{'type': 'Point', 'coordinates': [-74.0094471,..."
2,2,"{'description': '', 'address1': '3525 Eastham ...",52cdef7c4bab8bd675297da9,,[],,http://blog.mahalo.com/feed/,http://blog.mahalo.com/,web,"[{'competitor': {'name': 'Topicle', 'permalink...",...,[],"search, search-engine, human-powered-search, aaa",$21M,MahaloDotCom,Fri May 17 04:34:19 UTC 2013,"[{'embed_code': '<embed src=""http://blip.tv/pl...",21000000.0,34.017606,-118.487267,"{'type': 'Point', 'coordinates': [-118.487267,..."
3,3,"{'description': None, 'address1': '442 Post St...",52cdef7c4bab8bd675297dab,"{'price_amount': None, 'price_currency_code': ...",[],,http://feeds.feedburner.com/kyte_blog,http://kyte.com/blog,games_video,"[{'competitor': {'name': 'Ustream', 'permalink...",...,[],"video, mobile, iphone-app, video-platform, mob...",$23.4M,kyte,Mon Oct 28 09:34:37 UTC 2013,"[{'embed_code': '<embed src=""http://blip.tv/pl...",23400000.0,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
4,4,"{'description': '', 'address1': '475 Park Ave ...",52cdef7c4bab8bd675297daf,"{'price_amount': 62500000, 'price_currency_cod...",[],,,,mobile,[],...,[],"directoryassistance, advertising, mobile",$88.7M,Marchex,Mon Jun 03 05:26:22 UTC 2013,[],88700000.0,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."


In [152]:
companies_with_coord_offices_clean_final = companies_with_coord_offices_clean.drop(columns=['index','alias_list','blog_feed_url','blog_url','crunchbase_url','deadpooled_url','external_links','homepage_url','image','offices','permalink','twitter_username','video_embeds'])

In [153]:
companies_with_coord_offices_clean_final = companies_with_coord_offices_clean.drop(columns=['_id'])

In [154]:
companies_with_coord_offices_clean_final.head()

Unnamed: 0,index,office,acquisition,acquisitions,alias_list,blog_feed_url,blog_url,category_code,competitions,created_at,...,screenshots,tag_list,total_money_raised,twitter_username,updated_at,video_embeds,total_money_raised_normalized_USD,lat,lng,oficina_principal
0,0,"{'description': 'Headquarters', 'address1': '9...","{'price_amount': None, 'price_currency_code': ...",[],,http://blog.geni.com/index.rdf,http://blog.geni.com,web,"[{'competitor': {'name': 'Ancestry', 'permalin...",Thu May 31 19:52:34 UTC 2007,...,[],"geni, geneology, social, family, genealogy",$16.5M,geni,Wed Oct 10 14:01:29 UTC 2012,"[{'embed_code': '<object width=""425"" height=""3...",16500000.0,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,..."
1,1,"{'description': '', 'address1': '100 5th Ave F...","{'price_amount': None, 'price_currency_code': ...",[],,http://blog.joost.com/atom.xml,http://blog.joost.com,games_video,"[{'competitor': {'name': 'Babelgum', 'permalin...",Sat Jun 09 07:05:30 UTC 2007,...,[],"iptv, babelgum, television, video, thevenicepr...",$45M,,Fri Mar 15 23:31:11 UTC 2013,"[{'embed_code': '<object width=""425"" height=""3...",45000000.0,40.746497,-74.009447,"{'type': 'Point', 'coordinates': [-74.0094471,..."
2,2,"{'description': '', 'address1': '3525 Eastham ...",,[],,http://blog.mahalo.com/feed/,http://blog.mahalo.com/,web,"[{'competitor': {'name': 'Topicle', 'permalink...",Thu Jun 14 03:42:20 UTC 2007,...,[],"search, search-engine, human-powered-search, aaa",$21M,MahaloDotCom,Fri May 17 04:34:19 UTC 2013,"[{'embed_code': '<embed src=""http://blip.tv/pl...",21000000.0,34.017606,-118.487267,"{'type': 'Point', 'coordinates': [-118.487267,..."
3,3,"{'description': None, 'address1': '442 Post St...","{'price_amount': None, 'price_currency_code': ...",[],,http://feeds.feedburner.com/kyte_blog,http://kyte.com/blog,games_video,"[{'competitor': {'name': 'Ustream', 'permalink...",Thu Jun 14 18:26:11 UTC 2007,...,[],"video, mobile, iphone-app, video-platform, mob...",$23.4M,kyte,Mon Oct 28 09:34:37 UTC 2013,"[{'embed_code': '<embed src=""http://blip.tv/pl...",23400000.0,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
4,4,"{'description': '', 'address1': '475 Park Ave ...","{'price_amount': 62500000, 'price_currency_cod...",[],,,,mobile,[],Sat Jun 16 05:53:38 UTC 2007,...,[],"directoryassistance, advertising, mobile",$88.7M,Marchex,Mon Jun 03 05:26:22 UTC 2013,[],88700000.0,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."


In [155]:
companies_with_coord_offices_clean_final.shape

(2247, 47)

## Filtrando por empresas en las que se ha invertido más de 1.000.000 USD

In [156]:
companies_with_coord_offices_clean_final_2 = companies_with_coord_offices_clean_final[companies_with_coord_offices_clean_final['total_money_raised_normalized_USD']>=1000000][['index', 'office', 'acquisition','category_code', 'deadpooled_year', 'founded_year','funding_rounds', 'investments', 'ipo', 'name', 'number_of_employees', 'products','total_money_raised_normalized_USD', 'lat', 'lng', 'oficina_principal']]
companies_with_coord_offices_clean_final_2.reset_index(inplace=True)

## Exportando a JSON

In [157]:
companies_with_coord_offices_clean_final_2.to_json('data_clean.json', orient="records")

## Exportando a CSV

In [158]:
companies_with_coord_offices_clean_final_2.to_csv('data_clean.csv')

In [159]:
#mongoimport --db companies --collection offices --jsonArray ./data_clean.json

## Geoquery NEAR

In [160]:
mapa = folium.Map(location=[40, -45], zoom_start=2.5)
latlng = companies_with_coord_offices_clean_final_2[['lat', 'lng']].values
mapa.add_child(plugins.HeatMap(latlng, radius=20))
mapa

In [161]:
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,level_0,index,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,number_of_employees,products,total_money_raised_normalized_USD,lat,lng,oficina_principal
0,0,0,"{'description': 'Headquarters', 'address1': '9...","{'price_amount': None, 'price_currency_code': ...",web,,2006,"[{'id': 6, 'round_code': 'a', 'source_url': ''...",[],,Geni,18,"[{'name': 'Geni', 'permalink': 'geni'}]",16500000.0,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,..."
1,1,1,"{'description': '', 'address1': '100 5th Ave F...","{'price_amount': None, 'price_currency_code': ...",games_video,,2006,"[{'id': 19, 'round_code': 'a', 'source_url': '...",[],,Joost,0,"[{'name': 'Joost', 'permalink': 'joost'}]",45000000.0,40.746497,-74.009447,"{'type': 'Point', 'coordinates': [-74.0094471,..."
2,2,2,"{'description': '', 'address1': '3525 Eastham ...",,web,,2007,"[{'id': 323, 'round_code': 'a', 'source_url': ...",[],,Mahalo,40,"[{'name': 'Mahalo', 'permalink': 'mahalo'}]",21000000.0,34.017606,-118.487267,"{'type': 'Point', 'coordinates': [-118.487267,..."
3,3,3,"{'description': None, 'address1': '442 Post St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2006,"[{'id': 32, 'round_code': 'a', 'source_url': '...",[],,Kyte,40,"[{'name': 'Kyte', 'permalink': 'kyte'}, {'name...",23400000.0,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
4,4,4,"{'description': '', 'address1': '475 Park Ave ...","{'price_amount': 62500000, 'price_currency_cod...",mobile,,2005,"[{'id': 36, 'round_code': 'c', 'source_url': '...",[],,Jingle Networks,35,"[{'name': 'MyFree411', 'permalink': 'myfree411'}]",88700000.0,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."


## Cálculo de la densidad monetaria, que la he definido como dinero invertido en las empresas que se encuentran dentro de un radio dividido por el número total de empresas que se encuentran dentro de dicho radio

In [162]:
def findNear(geopoint, radio_max_meters=1000):
    return db.offices.find({
        "oficina_principal": {
         "$near": {
           "$geometry": geopoint,
           "$maxDistance": radio_max_meters,
         }
       }
    })

In [163]:
near=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    near.append(findNear(companies_with_coord_offices_clean_final_2['oficina_principal'][i]).count())

#print(companies_with_coord_offices_clean_final_2['oficina_principal'][1])

  This is separate from the ipykernel package so we can avoid doing imports until


In [164]:
near_money=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    x= findNear(companies_with_coord_offices_clean_final_2['oficina_principal'][i])#.count()-1)
    total=0
    for j in range(x.count()):
        total+=x[j]['total_money_raised_normalized_USD']
    near_money.append(total)
#print(companies_with_coord_offices_clean_final_2['oficina_principal'][1])

  """


In [165]:
money_density=[]
for i in range(len(near)):
    money_density.append(near_money[i]/near[i])

In [166]:
companies_with_coord_offices_clean_final_2['money_density']=money_density
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,level_0,index,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,number_of_employees,products,total_money_raised_normalized_USD,lat,lng,oficina_principal,money_density
0,0,0,"{'description': 'Headquarters', 'address1': '9...","{'price_amount': None, 'price_currency_code': ...",web,,2006,"[{'id': 6, 'round_code': 'a', 'source_url': ''...",[],,Geni,18,"[{'name': 'Geni', 'permalink': 'geni'}]",16500000.0,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,...",16500000.0
1,1,1,"{'description': '', 'address1': '100 5th Ave F...","{'price_amount': None, 'price_currency_code': ...",games_video,,2006,"[{'id': 19, 'round_code': 'a', 'source_url': '...",[],,Joost,0,"[{'name': 'Joost', 'permalink': 'joost'}]",45000000.0,40.746497,-74.009447,"{'type': 'Point', 'coordinates': [-74.0094471,...",21875000.0
2,2,2,"{'description': '', 'address1': '3525 Eastham ...",,web,,2007,"[{'id': 323, 'round_code': 'a', 'source_url': ...",[],,Mahalo,40,"[{'name': 'Mahalo', 'permalink': 'mahalo'}]",21000000.0,34.017606,-118.487267,"{'type': 'Point', 'coordinates': [-118.487267,...",13766670.0
3,3,3,"{'description': None, 'address1': '442 Post St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2006,"[{'id': 32, 'round_code': 'a', 'source_url': '...",[],,Kyte,40,"[{'name': 'Kyte', 'permalink': 'kyte'}, {'name...",23400000.0,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,...",28617710.0
4,4,4,"{'description': '', 'address1': '475 Park Ave ...","{'price_amount': 62500000, 'price_currency_cod...",mobile,,2005,"[{'id': 36, 'round_code': 'c', 'source_url': '...",[],,Jingle Networks,35,"[{'name': 'MyFree411', 'permalink': 'myfree411'}]",88700000.0,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,...",66350000.0


## Ordenando el dataframe por la densidad monetaria

In [167]:
companies_with_coord_offices_clean_final_2 = companies_with_coord_offices_clean_final_2.sort_values(by='money_density',ascending=False).drop(['level_0','index'],axis=1)

In [168]:
companies_with_coord_offices_clean_final_2.reset_index(drop=True, inplace=True)
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,number_of_employees,products,total_money_raised_normalized_USD,lat,lng,oficina_principal,money_density
0,"{'description': '', 'address1': '1051 East Hil...",,software,,2007,"[{'id': 1763, 'round_code': 'a', 'source_url':...",[],,Zuora,250,[],128000000.0,37.559628,-122.270987,"{'type': 'Point', 'coordinates': [-122.2709868...",319666700.0
1,"{'description': 'Elevance Renewable', 'address...",,cleantech,,2007,"[{'id': 6041, 'round_code': 'a', 'source_url':...",[],,Elevance Renewable Sciences,68,[],294000000.0,41.675004,-88.064215,"{'type': 'Point', 'coordinates': [-88.0642145,...",294000000.0
2,"{'description': None, 'address1': '130 West Un...",,cleantech,,2007,"[{'id': 2102, 'round_code': 'b', 'source_url':...",[],,eSolar,140,[],192000000.0,34.146436,-118.153136,"{'type': 'Point', 'coordinates': [-118.153136,...",192000000.0
3,"{'description': 'TestPlant', 'address1': '3000...",,software,,2008,"[{'id': 7135, 'round_code': 'unattributed', 's...",[],,Testplant,50,"[{'name': 'eggPlant', 'permalink': 'eggplant'}]",2560000.0,40.023005,-105.253284,"{'type': 'Point', 'coordinates': [-105.2532841...",179780000.0
4,"{'description': '', 'address1': '1500 Fashion ...",,web,,2007,"[{'id': 634, 'round_code': 'a', 'source_url': ...",[],,SpeedDate,0,"[{'name': 'SpeedDate.com', 'permalink': 'speed...",11600000.0,37.557637,-122.285714,"{'type': 'Point', 'coordinates': [-122.285714,...",133058600.0


In [169]:
young_companies=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    x= findNear(companies_with_coord_offices_clean_final_2['oficina_principal'][i])#.count()-1)
    young=0
    for j in range(x.count()):
        if x[j]['founded_year']>=2010:
            young+=1
    young_companies.append(young)
#print(companies_with_coord_offices_clean_final_2['oficina_principal'][1])

  """


In [170]:
companies_with_coord_offices_clean_final_2['young_companies']=young_companies

In [171]:
design_companies=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    x= findNear(companies_with_coord_offices_clean_final_2['oficina_principal'][i])#.count()-1)
    design=0
    for j in range(x.count()):
        if x[j]['category_code']=='design':
            design+=1
    design_companies.append(design)

  """


In [172]:
companies_with_coord_offices_clean_final_2['design_companies']=design_companies

## Haciendo ranking ponderando con un 33,33% a la densidad monetaria, con un 33,33% a la cercanía de empresas jóvenes y con un 33,33% a la cercanía de empresas de diseño.

In [105]:
max_comp = companies_with_coord_offices_clean_final_2['young_companies'].max()
max_mon_den = companies_with_coord_offices_clean_final_2['money_density'].max()

In [173]:
def standarize(x):
    return (x - np.min(x))/(np.max(x)-np.min(x))

In [174]:
money_density_standard = standarize(companies_with_coord_offices_clean_final_2['money_density'])
young_companies_standard = standarize(companies_with_coord_offices_clean_final_2['young_companies'])
design_companies_standard = standarize(companies_with_coord_offices_clean_final_2['design_companies'])

In [175]:
companies_with_coord_offices_clean_final_2['money_density_standard']=money_density_standard
companies_with_coord_offices_clean_final_2['young_companies_standard']=young_companies_standard
companies_with_coord_offices_clean_final_2['design_companies_standard']=design_companies_standard

In [176]:
ranking=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    ranking.append((companies_with_coord_offices_clean_final_2['money_density_standard'][i])/3+ (companies_with_coord_offices_clean_final_2['young_companies_standard'][i])/3+(companies_with_coord_offices_clean_final_2['design_companies'][i])/3)

In [115]:
companies_with_coord_offices_clean_final_2.drop('ranking',axis=1,inplace=True)

In [177]:
companies_with_coord_offices_clean_final_2['ranking']=ranking

In [178]:
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,number_of_employees,...,lat,lng,oficina_principal,money_density,young_companies,design_companies,money_density_standard,young_companies_standard,design_companies_standard,ranking
0,"{'description': '', 'address1': '1051 East Hil...",,software,,2007,"[{'id': 1763, 'round_code': 'a', 'source_url':...",[],,Zuora,250,...,37.559628,-122.270987,"{'type': 'Point', 'coordinates': [-122.2709868...",319666700.0,0,0,1.0,0.0,0.0,0.333333
1,"{'description': 'Elevance Renewable', 'address...",,cleantech,,2007,"[{'id': 6041, 'round_code': 'a', 'source_url':...",[],,Elevance Renewable Sciences,68,...,41.675004,-88.064215,"{'type': 'Point', 'coordinates': [-88.0642145,...",294000000.0,0,0,0.919456,0.0,0.0,0.306485
2,"{'description': None, 'address1': '130 West Un...",,cleantech,,2007,"[{'id': 2102, 'round_code': 'b', 'source_url':...",[],,eSolar,140,...,34.146436,-118.153136,"{'type': 'Point', 'coordinates': [-118.153136,...",192000000.0,0,0,0.599372,0.0,0.0,0.199791
3,"{'description': 'TestPlant', 'address1': '3000...",,software,,2008,"[{'id': 7135, 'round_code': 'unattributed', 's...",[],,Testplant,50,...,40.023005,-105.253284,"{'type': 'Point', 'coordinates': [-105.2532841...",179780000.0,0,0,0.561025,0.0,0.0,0.187008
4,"{'description': '', 'address1': '1500 Fashion ...",,web,,2007,"[{'id': 634, 'round_code': 'a', 'source_url': ...",[],,SpeedDate,0,...,37.557637,-122.285714,"{'type': 'Point', 'coordinates': [-122.285714,...",133058600.0,0,0,0.41441,0.0,0.0,0.138137


In [181]:
companies_with_coord_offices_clean_final_2 = companies_with_coord_offices_clean_final_2.sort_values(by=['ranking','money_density','young_companies','design_companies'],ascending=[False,False,False,False])
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,number_of_employees,...,lat,lng,oficina_principal,money_density,young_companies,design_companies,money_density_standard,young_companies_standard,design_companies_standard,ranking
52,"{'description': 'Europe', 'address1': 'Schlesi...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,100,...,52.49862,13.446903,"{'type': 'Point', 'coordinates': [13.4469031, ...",35000000.0,0,1,0.106695,0.0,1.0,0.368898
53,"{'description': 'Australia', 'address1': '204 ...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,100,...,-37.802659,144.986855,"{'type': 'Point', 'coordinates': [144.9868546,...",35000000.0,0,1,0.106695,0.0,1.0,0.368898
72,"{'description': '', 'address1': '580 Market St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2008,"[{'id': 2113, 'round_code': 'unattributed', 's...",[],,Serious Business,22,...,37.789321,-122.401362,"{'type': 'Point', 'coordinates': [-122.4013624...",29192550.0,0,1,0.08847,0.0,1.0,0.362823
73,"{'description': '', 'address1': '50 Beale St. ...","{'price_amount': None, 'price_currency_code': ...",software,,2007,"[{'id': 1442, 'round_code': 'a', 'source_url':...",[],,Roost,15,...,37.789129,-122.402028,"{'type': 'Point', 'coordinates': [-122.4020276...",29192550.0,0,1,0.08847,0.0,1.0,0.362823
74,"{'description': 'HQ', 'address1': '525 Market ...",,web,,2007,"[{'id': 18509, 'round_code': 'a', 'source_url'...",[],,Huddler,52,...,37.789321,-122.401362,"{'type': 'Point', 'coordinates': [-122.4013624...",29192550.0,0,1,0.08847,0.0,1.0,0.362823


In [182]:
companies_with_coord_offices_clean_final_2 = companies_with_coord_offices_clean_final_2[companies_with_coord_offices_clean_final_2['ranking']>=0.191648].reset_index()
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,index,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,...,lat,lng,oficina_principal,money_density,young_companies,design_companies,money_density_standard,young_companies_standard,design_companies_standard,ranking
0,52,"{'description': 'Europe', 'address1': 'Schlesi...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,...,52.49862,13.446903,"{'type': 'Point', 'coordinates': [13.4469031, ...",35000000.0,0,1,0.106695,0.0,1.0,0.368898
1,53,"{'description': 'Australia', 'address1': '204 ...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,...,-37.802659,144.986855,"{'type': 'Point', 'coordinates': [144.9868546,...",35000000.0,0,1,0.106695,0.0,1.0,0.368898
2,72,"{'description': '', 'address1': '580 Market St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2008,"[{'id': 2113, 'round_code': 'unattributed', 's...",[],,Serious Business,...,37.789321,-122.401362,"{'type': 'Point', 'coordinates': [-122.4013624...",29192550.0,0,1,0.08847,0.0,1.0,0.362823
3,73,"{'description': '', 'address1': '50 Beale St. ...","{'price_amount': None, 'price_currency_code': ...",software,,2007,"[{'id': 1442, 'round_code': 'a', 'source_url':...",[],,Roost,...,37.789129,-122.402028,"{'type': 'Point', 'coordinates': [-122.4020276...",29192550.0,0,1,0.08847,0.0,1.0,0.362823
4,74,"{'description': 'HQ', 'address1': '525 Market ...",,web,,2007,"[{'id': 18509, 'round_code': 'a', 'source_url'...",[],,Huddler,...,37.789321,-122.401362,"{'type': 'Point', 'coordinates': [-122.4013624...",29192550.0,0,1,0.08847,0.0,1.0,0.362823


In [183]:
mapa = folium.Map(location=[40, -45], zoom_start=2.5)
latlng = companies_with_coord_offices_clean_final_2[['lat', 'lng']].values
mapa.add_child(plugins.HeatMap(latlng, radius=20))
mapa

## Usando API Google para encontrar Starbucks en un radio de 100 metros

In [188]:
starbucks=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    lat = companies_with_coord_offices_clean_final_2['lat'][i]
    lng = companies_with_coord_offices_clean_final_2['lng'][i]
    radio = 50
    places=req.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&keyword={}&key={}'.format(str(lat),str(lng),str(radio),'starbucks',api_key)).json()
    starbucks.append(len(places['results']))

In [189]:
companies_with_coord_offices_clean_final_2['starbucks']=starbucks

In [190]:
companies_with_coord_offices_clean_final_2=companies_with_coord_offices_clean_final_2[companies_with_coord_offices_clean_final_2['starbucks']!=0].reset_index(drop=True)

In [191]:
companies_with_coord_offices_clean_final_2.shape

(42, 24)

In [192]:
kinder_garden=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    lat = companies_with_coord_offices_clean_final_2['lat'][i]
    lng = companies_with_coord_offices_clean_final_2['lng'][i]
    radio = 500
    places=req.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type=school&keyword={}&key={}'.format(str(lat),str(lng),str(radio),'kinder garden',api_key)).json()
    kinder_garden.append(len(places['results']))

In [193]:
schools=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    lat = companies_with_coord_offices_clean_final_2['lat'][i]
    lng = companies_with_coord_offices_clean_final_2['lng'][i]
    radio = 500
    places=req.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type=school&keyword={}&key={}'.format(str(lat),str(lng),str(radio),'school',api_key)).json()
    schools.append(len(places['results']))

In [194]:
companies_with_coord_offices_clean_final_2['kinder_garden']=kinder_garden

In [195]:
companies_with_coord_offices_clean_final_2['schools']=schools

In [196]:
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,index,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,...,money_density,young_companies,design_companies,money_density_standard,young_companies_standard,design_companies_standard,ranking,starbucks,kinder_garden,schools
0,72,"{'description': '', 'address1': '580 Market St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2008,"[{'id': 2113, 'round_code': 'unattributed', 's...",[],,Serious Business,...,29192550.0,0,1,0.08847,0.0,1.0,0.362823,8,0,20
1,73,"{'description': '', 'address1': '50 Beale St. ...","{'price_amount': None, 'price_currency_code': ...",software,,2007,"[{'id': 1442, 'round_code': 'a', 'source_url':...",[],,Roost,...,29192550.0,0,1,0.08847,0.0,1.0,0.362823,8,0,19
2,74,"{'description': 'HQ', 'address1': '525 Market ...",,web,,2007,"[{'id': 18509, 'round_code': 'a', 'source_url'...",[],,Huddler,...,29192550.0,0,1,0.08847,0.0,1.0,0.362823,8,0,20
3,76,"{'description': 'San Francisco HQ', 'address1'...",,cleantech,,2007,"[{'id': 6290, 'round_code': 'a', 'source_url':...",[],,Sunrun,...,28881520.0,0,1,0.087494,0.0,1.0,0.362498,9,0,16
4,80,"{'description': 'United States (HQ)', 'address...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,...,28583720.0,0,1,0.08656,0.0,1.0,0.362187,5,1,20


In [197]:
airports=[]

In [198]:
for i in range(len(companies_with_coord_offices_clean_final_2)):
    lat = companies_with_coord_offices_clean_final_2['lat'][i]
    lng = companies_with_coord_offices_clean_final_2['lng'][i]
    radio = 1500
    places=req.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type=airport&keyword={}&key={}'.format(str(lat),str(lng),str(radio),'airport',api_key)).json()
    airports.append(len(places['results']))

In [199]:
night_club=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    lat = companies_with_coord_offices_clean_final_2['lat'][i]
    lng = companies_with_coord_offices_clean_final_2['lng'][i]
    radio = 500
    places=req.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type=night_club&keyword={}&key={}'.format(str(lat),str(lng),str(radio),'night_club',api_key)).json()
    night_club.append(len(places['results']))

In [200]:
companies_with_coord_offices_clean_final_2['airports']=airports
companies_with_coord_offices_clean_final_2['night_club']=night_club

In [206]:
companies_with_coord_offices_clean_final_2.head()

Unnamed: 0,index,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,...,money_density_standard,young_companies_standard,design_companies_standard,ranking,starbucks,kinder_garden,schools,airports,night_club,vegan_restaurants
0,72,"{'description': '', 'address1': '580 Market St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2008,"[{'id': 2113, 'round_code': 'unattributed', 's...",[],,Serious Business,...,0.08847,0.0,1.0,0.362823,8,0,20,1,8,4
1,73,"{'description': '', 'address1': '50 Beale St. ...","{'price_amount': None, 'price_currency_code': ...",software,,2007,"[{'id': 1442, 'round_code': 'a', 'source_url':...",[],,Roost,...,0.08847,0.0,1.0,0.362823,8,0,19,1,9,4
2,74,"{'description': 'HQ', 'address1': '525 Market ...",,web,,2007,"[{'id': 18509, 'round_code': 'a', 'source_url'...",[],,Huddler,...,0.08847,0.0,1.0,0.362823,8,0,20,1,8,4
3,76,"{'description': 'San Francisco HQ', 'address1'...",,cleantech,,2007,"[{'id': 6290, 'round_code': 'a', 'source_url':...",[],,Sunrun,...,0.087494,0.0,1.0,0.362498,9,0,16,1,14,2
4,80,"{'description': 'United States (HQ)', 'address...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,...,0.08656,0.0,1.0,0.362187,5,1,20,1,10,1


In [202]:
vegan_restaurants=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    lat = companies_with_coord_offices_clean_final_2['lat'][i]
    lng = companies_with_coord_offices_clean_final_2['lng'][i]
    radio = 100
    places=req.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type=restaurant&keyword={}&key={}'.format(str(lat),str(lng),str(radio),'vegan',api_key)).json()
    vegan_restaurants.append(len(places['results']))

In [205]:
companies_with_coord_offices_clean_final_2['vegan_restaurants']=vegan_restaurants


Unnamed: 0,index,office,acquisition,category_code,deadpooled_year,founded_year,funding_rounds,investments,ipo,name,...,money_density_standard,young_companies_standard,design_companies_standard,ranking,starbucks,kinder_garden,schools,airports,night_club,vegan_restaurants
0,72,"{'description': '', 'address1': '580 Market St...","{'price_amount': None, 'price_currency_code': ...",games_video,,2008,"[{'id': 2113, 'round_code': 'unattributed', 's...",[],,Serious Business,...,0.08847,0.0,1.0,0.362823,8,0,20,1,8,4
1,73,"{'description': '', 'address1': '50 Beale St. ...","{'price_amount': None, 'price_currency_code': ...",software,,2007,"[{'id': 1442, 'round_code': 'a', 'source_url':...",[],,Roost,...,0.08847,0.0,1.0,0.362823,8,0,19,1,9,4
2,74,"{'description': 'HQ', 'address1': '525 Market ...",,web,,2007,"[{'id': 18509, 'round_code': 'a', 'source_url'...",[],,Huddler,...,0.08847,0.0,1.0,0.362823,8,0,20,1,8,4
3,76,"{'description': 'San Francisco HQ', 'address1'...",,cleantech,,2007,"[{'id': 6290, 'round_code': 'a', 'source_url':...",[],,Sunrun,...,0.087494,0.0,1.0,0.362498,9,0,16,1,14,2
4,80,"{'description': 'United States (HQ)', 'address...",,design,,2008,"[{'id': 22714, 'round_code': 'a', 'source_url'...",[],,99designs,...,0.08656,0.0,1.0,0.362187,5,1,20,1,10,1


In [207]:
starbucks_standard = standarize(companies_with_coord_offices_clean_final_2['starbucks'])
kinder_garden_standard = standarize(companies_with_coord_offices_clean_final_2['kinder_garden'])
schools_standard = standarize(companies_with_coord_offices_clean_final_2['schools'])
airports_standard = standarize(companies_with_coord_offices_clean_final_2['airports'])
night_club_standard = standarize(companies_with_coord_offices_clean_final_2['night_club'])
vegan_restaurants_standard = standarize(companies_with_coord_offices_clean_final_2['vegan_restaurants'])

In [208]:
companies_with_coord_offices_clean_final_2['starbucks_standard']=starbucks_standard
companies_with_coord_offices_clean_final_2['kinder_garden_standard']=kinder_garden_standard
companies_with_coord_offices_clean_final_2['schools_standard']=schools_standard
companies_with_coord_offices_clean_final_2['airports_standard']=airports_standard
companies_with_coord_offices_clean_final_2['night_club_standard']=night_club_standard
companies_with_coord_offices_clean_final_2['vegan_restaurants_standard']=vegan_restaurants_standard

In [211]:
companies_with_coord_offices_clean_final_2.columns

Index(['index', 'office', 'acquisition', 'category_code', 'deadpooled_year',
       'founded_year', 'funding_rounds', 'investments', 'ipo', 'name',
       'number_of_employees', 'products', 'total_money_raised_normalized_USD',
       'lat', 'lng', 'oficina_principal', 'money_density', 'young_companies',
       'design_companies', 'money_density_standard',
       'young_companies_standard', 'design_companies_standard', 'ranking',
       'starbucks', 'kinder_garden', 'schools', 'airports', 'night_club',
       'vegan_restaurants', 'starbucks_standard', 'kinder_garden_standard',
       'schools_standard', 'airports_standard', 'night_club_standard',
       'vegan_restaurants_standard'],
      dtype='object')

In [213]:
new_ranking=[]
for i in range(len(companies_with_coord_offices_clean_final_2)):
    new_ranking.append((companies_with_coord_offices_clean_final_2['money_density_standard'][i])*(15/87)\
                    + (companies_with_coord_offices_clean_final_2['young_companies_standard'][i])*(15/87)\
                    +(companies_with_coord_offices_clean_final_2['design_companies_standard'][i])*(20/87)\
                    +(companies_with_coord_offices_clean_final_2['starbucks_standard'][i])*(10/87)\
                      +(companies_with_coord_offices_clean_final_2['kinder_garden_standard'][i])*(0.15)\
                      +(companies_with_coord_offices_clean_final_2['schools_standard'][i])*(0.15)\
                      +(companies_with_coord_offices_clean_final_2['airports_standard'][i])*(20/87)\
                      +(companies_with_coord_offices_clean_final_2['night_club_standard'][i])*(1)\
                      +(companies_with_coord_offices_clean_final_2['vegan_restaurants_standard'][i])*(1/87))

In [215]:
companies_with_coord_offices_clean_final_2['new_ranking']=new_ranking

In [259]:
companies_with_coord_offices_clean_final_2 = companies_with_coord_offices_clean_final_2.sort_values(by=['new_ranking','money_density','young_companies','design_companies'],ascending=[False,False,False,False]).reset_index(drop=True)
top_5 = companies_with_coord_offices_clean_final_2.head()
top_1 = companies_with_coord_offices_clean_final_2.head(1)

In [262]:
mapa = folium.Map(location=top_1[['lat', 'lng']].values, zoom_start=12, tiles='Stamen Terrain')
tooltip='Best place'
folium.CircleMarker(
    location=top_1[['lat', 'lng']].values,
    radius=10,
    popup='Best Place\n\nCompany: {}\n\nNumber of employees: {}\n\nMoney Density: ${}'.format(top_1['name'][0],top_1['number_of_employees'][0],top_1['money_density'][0]),
    color='green',
    fill=True,
    fill_color='green'
).add_to(mapa)
#folium.Marker(top_1[['lat', 'lng']].values,popup = 'Best Place',tooltip=tooltip).add_to(mapa)

mapa