# location pinning based on given criteria

In [1]:
import requests
import pymongo
import pandas as pd
import math

import dotenv
import json
import os
import requests
from dotenv import load_dotenv

load_dotenv()

import geopandas as gpd
from geopy.distance import distance
from shapely.geometry import Point
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

## Preparing the data

In [2]:
#In order to import the DB into MongoDB:

from pymongo import MongoClient
client = MongoClient("mongodb://localhost/companies")
db = client.get_database()

#To keep just the columns we need:

companies = list(db["companies"].find({}, {"name":1, "category_code":1, "description":1, "total_money_raised":1, "offices":1,}))

In [3]:
#Convert into a dataframe:

import pandas as pd
df = pd.DataFrame(companies)
df.head()

Unnamed: 0,_id,name,category_code,description,total_money_raised,offices
0,52cdef7c4bab8bd675297d8b,AdventNet,enterprise,Server Management Software,$0,"[{'description': 'Headquarters', 'address1': '..."
1,52cdef7c4bab8bd675297d8a,Wetpaint,web,Technology Platform Company,$39.8M,"[{'description': '', 'address1': '710 - 2nd Av..."
2,52cdef7c4bab8bd675297d8c,Zoho,software,Online Business Apps Suite,$0,"[{'description': 'Headquarters', 'address1': '..."
3,52cdef7c4bab8bd675297d8f,Omnidrive,network_hosting,,$800k,"[{'description': '', 'address1': 'Suite 200', ..."
4,52cdef7c4bab8bd675297d92,Flektor,games_video,,$0,"[{'description': None, 'address1': '8536 Natio..."


In [4]:
df=df.rename(columns={"description": "description_company"})

### In order to obtain coordinates

In [5]:
df = df.explode('offices')

In [6]:
dfOfficeData = df[["offices"]].apply(lambda r: r.offices, result_type="expand", axis=1)
cleanData = pd.concat([df,dfOfficeData], axis=1)

cleanData.head()

Unnamed: 0,_id,name,category_code,description_company,total_money_raised,offices,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,52cdef7c4bab8bd675297d8b,AdventNet,enterprise,Server Management Software,$0,"{'description': 'Headquarters', 'address1': '4...",Headquarters,4900 Hopyard Rd.,Suite 310,94588,Pleasanton,CA,USA,37.692934,-121.904945
1,52cdef7c4bab8bd675297d8a,Wetpaint,web,Technology Platform Company,$39.8M,"{'description': '', 'address1': '710 - 2nd Ave...",,710 - 2nd Avenue,Suite 1100,98104,Seattle,WA,USA,47.603122,-122.333253
1,52cdef7c4bab8bd675297d8a,Wetpaint,web,Technology Platform Company,$39.8M,"{'description': '', 'address1': '270 Lafayette...",,270 Lafayette Street,Suite 505,10012,New York,NY,USA,40.723731,-73.996431
2,52cdef7c4bab8bd675297d8c,Zoho,software,Online Business Apps Suite,$0,"{'description': 'Headquarters', 'address1': '4...",Headquarters,4900 Hopyard Rd,Suite 310,94588,Pleasanton,CA,USA,37.692934,-121.904945
3,52cdef7c4bab8bd675297d8f,Omnidrive,network_hosting,,$800k,"{'description': '', 'address1': 'Suite 200', '...",,Suite 200,654 High Street,94301,Palo Alto,CA,ISR,,


In [7]:
cleanData = cleanData.drop(columns=["_id","offices", "address1", "address2", "zip_code", "description"])
cleanData.head()

Unnamed: 0,name,category_code,description_company,total_money_raised,city,state_code,country_code,latitude,longitude
0,AdventNet,enterprise,Server Management Software,$0,Pleasanton,CA,USA,37.692934,-121.904945
1,Wetpaint,web,Technology Platform Company,$39.8M,Seattle,WA,USA,47.603122,-122.333253
1,Wetpaint,web,Technology Platform Company,$39.8M,New York,NY,USA,40.723731,-73.996431
2,Zoho,software,Online Business Apps Suite,$0,Pleasanton,CA,USA,37.692934,-121.904945
3,Omnidrive,network_hosting,,$800k,Palo Alto,CA,ISR,,


In [8]:
cleanData['latitude'].replace('', np.nan, inplace=True)
cleanData.dropna(subset=['latitude'], inplace=True)

In [9]:
cleanData['longitude'].replace('', np.nan, inplace=True)
cleanData.dropna(subset=['longitude'], inplace=True)

## 1st condition: money raised > 1M

In [10]:
cleanData.astype('str').dtypes #Conversion to string to operate with the column
cleanData['total_money_raised'].value_counts()
cleanData.head()

Unnamed: 0,name,category_code,description_company,total_money_raised,city,state_code,country_code,latitude,longitude
0,AdventNet,enterprise,Server Management Software,$0,Pleasanton,CA,USA,37.692934,-121.904945
1,Wetpaint,web,Technology Platform Company,$39.8M,Seattle,WA,USA,47.603122,-122.333253
1,Wetpaint,web,Technology Platform Company,$39.8M,New York,NY,USA,40.723731,-73.996431
2,Zoho,software,Online Business Apps Suite,$0,Pleasanton,CA,USA,37.692934,-121.904945
4,Flektor,games_video,,$0,Culver City,CA,USA,34.025958,-118.379768


In [11]:
#Only those companies whose total_money_raised is above 1M

cleanData = cleanData[cleanData['total_money_raised'].str.contains("M")]
cleanData["total_money_raised"].value_counts()
cleanData.head()
print(len(cleanData))

3364


## 2nd condition: field

In [12]:
cleanData.description_company = cleanData.description_company.fillna('')
cleanData["description_company"].value_counts()

                                            773
Microsoft Business Solutions Consultancy     16
Innovative Software Product Creation         12
Data Archiving Software                      10
Car Buying Marketplace                        8
                                           ... 
Proprietary regenerative cell therapies       1
Solar Module Manufacturing                    1
handheld diagnostic & monitoring devices      1
mobile communications                         1
Family history network                        1
Name: description_company, Length: 2154, dtype: int64

In [13]:
#Only those companies with keywords in their description: design, cloud, microsoft, mobile

cleanData = cleanData[cleanData['description_company'].str.contains("design|Cloud|Microsoft|Mobile", case=False, regex=True)]

cleanData["description_company"].value_counts()
cleanData.head()
print(len(cleanData))

269


### Format location in the dataframe

In [14]:
import math

def asGeoJSON(lat,lng):
    try:
        lat = float(lat)
        lng = float(lng)
        if not math.isnan(lat) and not math.isnan(lng):
            return {
                "type":"Point",
                "coordinates":[lng,lat]
            }
    except Exception:
        print("Invalid data")
        return None
        

cleanData["location"] = cleanData[["latitude","longitude"]].apply(lambda x:asGeoJSON(x.latitude,x.longitude), axis=1)

cleanData[["latitude","longitude","location"]].head()

Unnamed: 0,latitude,longitude,location
15,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ..."
29,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
33,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."
69,42.375392,-71.118487,"{'type': 'Point', 'coordinates': [-71.118487, ..."
69,37.780134,-122.396744,"{'type': 'Point', 'coordinates': [-122.396744,..."


In [15]:
cleanData = cleanData.rename(columns={"location":"location_company"})
cleanData.head()

Unnamed: 0,name,category_code,description_company,total_money_raised,city,state_code,country_code,latitude,longitude,location_company
15,MeetMoi,social,Mobile Dating,$5.58M,New York City,NY,USA,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ..."
29,Kyte,games_video,Online & Mobile Video Platform,$23.4M,San Francisco,CA,USA,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
33,Jingle Networks,mobile,Voice and Mobile Search,$88.7M,New York,NY,USA,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."
69,iSkoot,mobile,Mobile VOIP service,$32.2M,Cambridge,MA,USA,42.375392,-71.118487,"{'type': 'Point', 'coordinates': [-71.118487, ..."
69,iSkoot,mobile,Mobile VOIP service,$32.2M,San Francisco,CA,USA,37.780134,-122.396744,"{'type': 'Point', 'coordinates': [-122.396744,..."


## 3rd condition: near places

In [16]:
#FourSquare API

CLIENT_ID = os.getenv("CLIENT_ID")
token = os.getenv("CLIENT_SECRET")

In [17]:
def fourSquareRestaurants (lat, long, venue):
    
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
      client_id=CLIENT_ID,
      client_secret=token,
      v='20200210',
      ll=lat+ "," + long,
      query=venue,
      limit=1
    )

    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)

    latitud = data['response']['groups'][0]['items'][0]['venue']['location']['lat']
    longitude = data['response']['groups'][0]['items'][0]['venue']['location']['lng']

    return {
        "type":"Point",
        "coordinates":[float(latitud),float(longitude)]}

In [18]:
print(fourSquareRestaurants('40.4168','3.7038', "Starbucks"))
print(fourSquareRestaurants('40.4168','3.7038', "Burguer King"))

{'type': 'Point', 'coordinates': [39.886202, 4.258711]}
{'type': 'Point', 'coordinates': [40.00121564143177, 3.835936916891427]}


In [19]:
def fourSquareStarbucks (lat, long):
    
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
      client_id=CLIENT_ID,
      client_secret=token,
      v='20200210',
      ll=lat+ "," + long,
      query="Starbucks",
      limit=1
    )

    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)

    latitud = data['response']['groups'][0]['items'][0]['venue']['location']['lat']
    longitude = data['response']['groups'][0]['items'][0]['venue']['location']['lng']

    return {
        "type":"Point",
        "coordinates":[float(latitud),float(longitude)]}

In [20]:
print(fourSquareStarbucks('40.987','3.7978'))

{'type': 'Point', 'coordinates': [41.69967462810277, 2.84411189644026]}


In [21]:
# Query by category instead of name

def fourSquareCateory (lat, long, venue):
    
    url = 'https://api.foursquare.com/v2/venues/categories'

    params = dict(
      client_id=CLIENT_ID,
      client_secret=token,
      v='20200210',
      ll=lat+ "," + long,
      query="4bf58dd8d48988d1d3941735",
      limit=1
    )

    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    
    return data

In [22]:
#fourSquareCateory('40.4168','3.7038', "vegano")

In [23]:
#4bf58dd8d48988d1d3941735

## To join the information

In [24]:
#Crear una nueva columna para transformar la latitud y longitud en un type:point, YAS.
#Run the function through every row of the column in the dataframe. YAS.
#create a second column with the coordinates
#print a map with both columns as different layers

In [25]:
#cleanData.insert(3, “location_of_starbucks”, sold_in_bulk)

latitud = cleanData['latitude']
longitude = cleanData['longitude']

def coordsToPoints(latitud, longitude):
    for i in latitud:
        for t in longitude:
            return {
                "type":"Point",
                "coordinates":[float(i),float(t)]}
coordsToPoints(latitud, longitude)

def coordsToPoints():
    for latitude, longitude in cleanData.itertuples(index=False):
        
        return {
                "type":"Point",
                "coordinates":[float(i),float(t)]}
    
coordsToPoints(latitud, longitude)

In [28]:
cleanData.head()

Unnamed: 0,name,category_code,description_company,total_money_raised,city,state_code,country_code,latitude,longitude,location_company
15,MeetMoi,social,Mobile Dating,$5.58M,New York City,NY,USA,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ..."
29,Kyte,games_video,Online & Mobile Video Platform,$23.4M,San Francisco,CA,USA,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,..."
33,Jingle Networks,mobile,Voice and Mobile Search,$88.7M,New York,NY,USA,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,..."
69,iSkoot,mobile,Mobile VOIP service,$32.2M,Cambridge,MA,USA,42.375392,-71.118487,"{'type': 'Point', 'coordinates': [-71.118487, ..."
69,iSkoot,mobile,Mobile VOIP service,$32.2M,San Francisco,CA,USA,37.780134,-122.396744,"{'type': 'Point', 'coordinates': [-122.396744,..."


In [32]:
cleanData['Starbucks']=cleanData.apply(lambda x: fourSquareStarbucks(str(x.latitude), str(x.longitude)), axis=1)

In [None]:
#cleanData = cleanData.drop(columns=["latitude","longitude"])
#cleanData.head()

In [33]:
cleanData.head()

Unnamed: 0,name,category_code,description_company,total_money_raised,city,state_code,country_code,latitude,longitude,location_company,Starbucks
15,MeetMoi,social,Mobile Dating,$5.58M,New York City,NY,USA,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ...","{'type': 'Point', 'coordinates': [40.759393, -..."
29,Kyte,games_video,Online & Mobile Video Platform,$23.4M,San Francisco,CA,USA,37.788482,-122.409173,"{'type': 'Point', 'coordinates': [-122.409173,...","{'type': 'Point', 'coordinates': [37.789148154..."
33,Jingle Networks,mobile,Voice and Mobile Search,$88.7M,New York,NY,USA,37.480999,-122.173887,"{'type': 'Point', 'coordinates': [-122.173887,...","{'type': 'Point', 'coordinates': [37.459665187..."
69,iSkoot,mobile,Mobile VOIP service,$32.2M,Cambridge,MA,USA,42.375392,-71.118487,"{'type': 'Point', 'coordinates': [-71.118487, ...","{'type': 'Point', 'coordinates': [42.373383, -..."
69,iSkoot,mobile,Mobile VOIP service,$32.2M,San Francisco,CA,USA,37.780134,-122.396744,"{'type': 'Point', 'coordinates': [-122.396744,...","{'type': 'Point', 'coordinates': [37.778704130..."
