# location pinning based on given criteria

In [None]:
import requests
import pymongo
import pandas as pd
import math

import dotenv
import json
import os
import requests
from dotenv import load_dotenv

load_dotenv()

import geopandas as gpd
from geopy.distance import distance
from shapely.geometry import Point
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

## Preparing the data

In [None]:
#In order to import the DB into MongoDB:

from pymongo import MongoClient
client = MongoClient("mongodb://localhost/companies")
db = client.get_database()

#To keep just the columns we need:

companies = list(db["companies"].find({}, {"name":1, "category_code":1, "description":1, "total_money_raised":1, "offices":1,}))

In [None]:
#Convert into a dataframe:

import pandas as pd
df = pd.DataFrame(companies)
df.head()

In [None]:
df=df.rename(columns={"description": "description_company"})

### In order to obtain coordinates

In [None]:
df = df.explode('offices')

In [None]:
dfOfficeData = df[["offices"]].apply(lambda r: r.offices, result_type="expand", axis=1)
cleanData = pd.concat([df,dfOfficeData], axis=1)

cleanData.head()

In [None]:
cleanData = cleanData.drop(columns=["_id","offices", "address1", "address2", "zip_code", "description"])
cleanData.head()

In [None]:
cleanData['latitude'].replace('', np.nan, inplace=True)
cleanData.dropna(subset=['latitude'], inplace=True)

In [None]:
cleanData['longitude'].replace('', np.nan, inplace=True)
cleanData.dropna(subset=['longitude'], inplace=True)

## 1st condition: money raised > 1M

In [None]:
cleanData.astype('str').dtypes #Conversion to string to operate with the column
cleanData['total_money_raised'].value_counts()
cleanData.head()

In [None]:
#Only those companies whose total_money_raised is above 1M

cleanData = cleanData[cleanData['total_money_raised'].str.contains("M")]
cleanData["total_money_raised"].value_counts()
cleanData.head()
print(len(cleanData))

## 2nd condition: field

In [None]:
cleanData.description_company = cleanData.description_company.fillna('')
cleanData["description_company"].value_counts()

In [None]:
#Only those companies with keywords in their description: design, cloud, microsoft, mobile

cleanData = cleanData[cleanData['description_company'].str.contains("design|Cloud|Microsoft|Mobile", case=False, regex=True)]

cleanData["description_company"].value_counts()
cleanData.head()
print(len(cleanData))

### Format location in the dataframe

In [None]:
import math

def asGeoJSON(lat,lng):
    try:
        lat = float(lat)
        lng = float(lng)
        if not math.isnan(lat) and not math.isnan(lng):
            return {
                "type":"Point",
                "coordinates":[lng,lat]
            }
    except Exception:
        print("Invalid data")
        return None
        

cleanData["location"] = cleanData[["latitude","longitude"]].apply(lambda x:asGeoJSON(x.latitude,x.longitude), axis=1)

cleanData[["latitude","longitude","location"]].head()

In [None]:
cleanData = cleanData.rename(columns={"location":"location_company"})
cleanData.head()

## 3rd condition: close Starbucks

In [None]:
#FourSquare API

CLIENT_ID = os.getenv("CLIENT_ID")
token = os.getenv("CLIENT_SECRET")

In [None]:
def fourSquareStarbucksLat (lat, long):
    
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
      client_id=CLIENT_ID,
      client_secret=token,
      v='20200210',
      ll=lat+ "," + long,
      query="Starbucks",
      limit=1
    )
    
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)

    latitud = data['response']['groups'][0]['items'][0]['venue']['location']['lat']

    return latitud

In [None]:
cleanData['StarbucksLat']=cleanData.apply(lambda x: fourSquareStarbucksLat(str(x.latitude), str(x.longitude)), axis=1)
cleanData.head()

In [None]:
def fourSquareStarbucksLong (lat, long):
    
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
      client_id=CLIENT_ID,
      client_secret=token,
      v='20200210',
      ll=lat+ "," + long,
      query="Starbucks",
      limit=1
    )

    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)

    longitude = data['response']['groups'][0]['items'][0]['venue']['location']['lng']

    return longitude

In [None]:
cleanData['StarbucksLong']=cleanData.apply(lambda x: fourSquareStarbucksLong(str(x.latitude), str(x.longitude)), axis=1)
cleanData.head()

In [None]:
def fourSquareStarbucks (lat, long):
    
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
      client_id=CLIENT_ID,
      client_secret=token,
      v='20200210',
      ll=lat+ "," + long,
      query="Starbucks",
      limit=1
    )

    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
   
    latitud = data['response']['groups'][0]['items'][0]['venue']['location']['lat']
    longitude = data['response']['groups'][0]['items'][0]['venue']['location']['lng']

    return {
        "type":"Point",
        "coordinates":[float(latitud),float(longitude)]}

In [None]:
cleanData['Starbucks']=cleanData.apply(lambda x: fourSquareStarbucks(str(x.latitude), str(x.longitude)), axis=1)
cleanData.head()

In [None]:
cleanData = cleanData.rename(columns={"latitude":"lat_comp", "longitude":"long_comp"})
cleanData.head()

In [None]:
cleanData.to_csv(r'output/dfstar.csv')

## To pin the coordinates in a map

In [None]:
import geopandas as gpd
import pandas as pd

from cartoframes.viz import Map, Layer
from cartoframes.viz.helpers import size_continuous_layer
from cartoframes.viz.widgets import histogram_widget

import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

In [None]:
data = pd.read_csv("output/dfstar.csv") 
dfstar = pd.DataFrame(data)
dfstar.head()

In [None]:
coorcomp = dfstar[["lat_comp", "long_comp"]]
gdfCompanies = gpd.GeoDataFrame(coorcomp, geometry=gpd.points_from_xy(coorcomp.lat_comp, coorcomp.long_comp))
gdfCompanies.head()

In [None]:
coorstar = dfstar[["StarbucksLat","StarbucksLong"]]
gdfStarbucks = gpd.GeoDataFrame(coorstar, geometry=gpd.points_from_xy(coorstar.StarbucksLat, coorstar.StarbucksLong))
gdfStarbucks.head()

In [None]:
gdfStarbucks.to_file('output/starbucks.geojson', driver='GeoJSON')
gdfStarbucks = gpd.read_file('output/starbucks.geojson', crs='EPSG:4346')

In [None]:
Map(Layer(gdfCompanies, gdfStarbucks))

# To do:

### - Add more venue filters: vegan places, etc.
### - Calculate minimun distance to rank possible places