## Librerias que utilizaremos 

In [1]:
from pymongo import MongoClient
from pymongo import GEOSPHERE
import re
import geopandas as gpd
from cartoframes.viz import Map, Layer, popup_element
from scipy.spatial import distance
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Conectamos con Mongo DB

In [2]:
client = MongoClient("localhost:27017")
db = client.get_database("ironhack")
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'ironhack')

In [3]:
client.list_database_names()

['Ironhack', 'Taller_Geo', 'admin', 'config', 'local']

## Obtenemos la Base de datos con la que trabajaremos

In [4]:
db = client.get_database("Taller_Geo")

In [5]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo')

In [6]:
db.list_collection_names()

['Bucarest', 'Dublin', 'Madrid']

In [7]:
B = db.get_collection("Bucarest")

In [8]:
B

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo'), 'Bucarest')

In [9]:
M = db.get_collection("Madrid")

In [10]:
M

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo'), 'Madrid')

In [11]:
D = db.get_collection("Dublin")

In [12]:
D

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo'), 'Dublin')

## Creamos los índices para cada colección

In [13]:
db.Bucarest.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [14]:
db.Madrid.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [15]:
db.Dublin.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [16]:
#db.Dublin.find_one() #comprobamos que las 3 colecciones nos brindan información correctamente

## Utilizamos Near para ver la cercanía de los datos a nuestro punto central

In [17]:
#Estos son mis puntos centrales:
madrid_coord = [40.42955,-3.6793]
dublin_coord = [53.34919,-6.2606] 
bucarest_coord = [44.42724,26.09208]

In [18]:
#Esta es la distancia máxima que quiero: 
metros = 5000

In [19]:
def type_point(lista):
    return {"type":"Point", "coordinates": lista}

In [20]:
coord_tp_M = type_point(madrid_coord)
coord_tp_D = type_point(dublin_coord)
coord_tp_B = type_point(bucarest_coord)

In [21]:
coord_tp_M 

{'type': 'Point', 'coordinates': [40.42955, -3.6793]}

In [22]:
query_M = {"location": {"$near": {"$geometry": coord_tp_M, "$maxDistance": metros}}}

In [23]:
query_M

{'location': {'$near': {'$geometry': {'type': 'Point',
    'coordinates': [40.42955, -3.6793]},
   '$maxDistance': 5000}}}

In [24]:
resultado_M = list(M.find(query_M))
#resultado_M

In [25]:
query_D = {"location": {"$near": {"$geometry": coord_tp_D, "$maxDistance": metros}}}

In [26]:
query_D

{'location': {'$near': {'$geometry': {'type': 'Point',
    'coordinates': [53.34919, -6.2606]},
   '$maxDistance': 5000}}}

In [27]:
resultado_D = list(D.find(query_D))
#resultado_D

In [28]:
coord_tp_B

{'type': 'Point', 'coordinates': [44.42724, 26.09208]}

In [29]:
query_B = {"location": {"$near": {"$geometry": coord_tp_B, "$maxDistance": metros}}}

In [30]:
resultado_B = list(B.find(query_B))
#resultado_B

In [31]:
resultado_B[0]

{'_id': ObjectId('618fac476bd838c298f8a588'),
 'nombre': 'bella dog',
 'latitud': 44.429934133909114,
 'longitud': 26.09415232523072,
 'location': {'type': 'Point',
  'coordinates': [44.429934133909114, 26.09415232523072]}}

## Generamos un DataFrame para cada ciudad y hacemos una primera exploración visual

In [32]:
df_M = pd.DataFrame(resultado_M)
#df_M.head()

In [33]:
gdf_M = gpd.GeoDataFrame(df_M, geometry=gpd.points_from_xy(df_M.longitud, df_M.latitud))
#gdf_M

In [34]:
#Map(Layer(gdf_M, "color:purple", popup_hover=[popup_element("nombre", "Madrid")]))

In [35]:
df_D = pd.DataFrame(resultado_D)
#df_D.head(2)

In [36]:
gdf_D = gpd.GeoDataFrame(df_D, geometry=gpd.points_from_xy(df_D.longitud, df_D.latitud))
#gdf_D

In [37]:
#Map(Layer(gdf_D, "color:purple", popup_hover=[popup_element("nombre", "Madrid")]))

In [38]:
df_B = pd.DataFrame(resultado_B)
#df_B.head(2)

In [39]:
gdf_B = gpd.GeoDataFrame(df_B, geometry=gpd.points_from_xy(df_B.longitud, df_B.latitud))
#gdf_B

In [40]:
#Map(Layer(gdf_B, "color:purple", popup_hover=[popup_element("nombre", "Madrid")]))

## Agrupamos los nombres de los requisitos según categorías

In [41]:
def limpiar(x):
    diccionario = {"Disco":re.search(".*[Nn](ight|IGHT).*",str(x)),
                   "Airport":re.search(".*[Aa](irport|IRPORT).*",str(x)),
                   "School":re.search(".*[Ss](chool|CHOOL).*",str(x)),
                   "Dog grooming":re.search(".*[Dd](og|OG).*",str(x)),
                   "Starbucks":re.search(".*[Ss](tarbucks|TARBUCKS).*",str(x)),
                  }

    for key,values in diccionario.items():
        if values:
            return key
    return 'Others'

In [42]:
df_M["categoria"] = df_M["nombre"].apply(limpiar)
#df_M.head(2)

In [43]:
df_D["categoria"] = df_D["nombre"].apply(limpiar)
#df_D.head(2)

In [44]:
df_B["categoria"] = df_B["nombre"].apply(limpiar)
#df_B.head(2)

## Calculamos la distancia de cada punto con el punto central

In [45]:
mis_puntos = []
for i,row in df_M.iterrows():
    mis_puntos.append(row["location"]["coordinates"])    

In [46]:
df_M["latlong"] = mis_puntos

In [47]:
#df_M.head()

In [48]:
def distancia_Madrid(coordin):
    return (distance.euclidean(coord_tp_M['coordinates'],coordin))*1000

In [49]:
df_M["Distancia"] = df_M["latlong"].apply(distancia_Madrid)
#df_M["Distancia"]

In [50]:
df_M["ciudad"] = 'Madrid'
df_M.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,geometry,categoria,latlong,Distancia,ciudad
0,618fad036bd838c298f8a6c1,MSMK Madrid School of Marketing,40.429084,-3.679913,"{'type': 'Point', 'coordinates': [40.429084011...",POINT (-3.67991 40.42908),School,"[40.42908401194077, -3.6799125837274125]",0.769678,Madrid
1,618fad036bd838c298f8a6d2,Deusto Business School,40.430948,-3.681059,"{'type': 'Point', 'coordinates': [40.430948485...",POINT (-3.68106 40.43095),School,"[40.43094848530408, -3.681059215078213]",2.247354,Madrid


In [51]:
mis_puntos = []
for i,row in df_D.iterrows():
    mis_puntos.append(row["location"]["coordinates"])    

In [52]:
df_D["latlong"] = mis_puntos

In [53]:
#df_D.head()

In [54]:
def distancia_Dublin(coordin):
    return (distance.euclidean(coord_tp_D['coordinates'],coordin))*1000

In [55]:
df_D["Distancia"] = df_D["latlong"].apply(distancia_Dublin)
#df_D["Distancia"]

In [56]:
df_D["ciudad"] = 'Dublin'
df_D.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,geometry,categoria,latlong,Distancia,ciudad
0,618faca16bd838c298f8a5ee,Toronto Airport Limo,53.349732,-6.260254,"{'type': 'Point', 'coordinates': [53.349731569...",POINT (-6.26025 53.34973),Airport,"[53.34973156905316, -6.260254383087158]",0.642455,Dublin
1,618facda6bd838c298f8a668,Starbucks,53.349679,-6.259835,"{'type': 'Point', 'coordinates': [53.349679131...",POINT (-6.25983 53.34968),Starbucks,"[53.34967913192385, -6.259834819120199]",0.908158,Dublin


In [57]:
mis_puntos = []
for i,row in df_B.iterrows():
    mis_puntos.append(row["location"]["coordinates"])  

In [58]:
df_B["latlong"] = mis_puntos

In [59]:
#df_B.head()

In [60]:
def distancia_Bucarest(coordin):
    return (distance.euclidean(coord_tp_B['coordinates'],coordin))*1000

In [61]:
df_B["Distancia"] = df_B["latlong"].apply(distancia_Bucarest)
#df_B["Distancia"]

In [62]:
#df_B.head()

In [63]:
df_B["ciudad"] = 'Bucarest'
df_B.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,geometry,categoria,latlong,Distancia,ciudad
0,618fac476bd838c298f8a588,bella dog,44.429934,26.094152,"{'type': 'Point', 'coordinates': [44.429934133...",POINT (26.09415 44.42993),Dog grooming,"[44.429934133909114, 26.09415232523072]",3.398954,Bucarest
1,618fac7f6bd838c298f8a5b2,ICEP Hotel School,44.42088,26.09475,"{'type': 'Point', 'coordinates': [44.42088, 26...",POINT (26.09475 44.42088),School,"[44.42088, 26.09475]",6.897717,Bucarest


## Ahora intentemos hacer un ranking

In [79]:
data = df_M.append([df_B, df_D])

In [80]:
data.shape

(283, 10)

In [89]:
def puntuacion(x):
    if x == 'Airport':
        return 2
    elif x == 'School':
        return 5
    elif x == 'Starbucks':
        return 4
    elif x == 'Disco':
        return 3
    else:
        return 1

In [90]:
data["Ranking"] = data["categoria"].apply(puntuacion)

In [91]:
data.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,geometry,categoria,latlong,Distancia,ciudad,ranking,Ranking
0,618fad036bd838c298f8a6c1,MSMK Madrid School of Marketing,40.429084,-3.679913,"{'type': 'Point', 'coordinates': [40.429084011...",POINT (-3.67991 40.42908),School,"[40.42908401194077, -3.6799125837274125]",0.769678,Madrid,5,5
1,618fad036bd838c298f8a6d2,Deusto Business School,40.430948,-3.681059,"{'type': 'Point', 'coordinates': [40.430948485...",POINT (-3.68106 40.43095),School,"[40.43094848530408, -3.681059215078213]",2.247354,Madrid,5,5


In [92]:
data_final = data[data["categoria"] != 'Others']

In [93]:
total_data = data_final.groupby(["categoria", "ciudad"]).mean()

In [97]:
total_data

Unnamed: 0_level_0,Unnamed: 1_level_0,latitud,longitud,Distancia,Ranking
categoria,ciudad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Airport,Bucarest,44.437259,26.091341,17.402089,2.0
Airport,Dublin,53.350183,-6.26168,16.862564,2.0
Airport,Madrid,40.430468,-3.695076,25.885293,2.0
Disco,Bucarest,44.438146,26.096557,21.067706,3.0
Disco,Dublin,53.342643,-6.262321,8.816538,3.0
Disco,Madrid,40.446783,-3.681514,22.529341,3.0
Dog grooming,Bucarest,44.43384,26.096344,25.599482,1.0
Dog grooming,Dublin,53.346605,-6.259738,20.385157,1.0
Dog grooming,Madrid,40.430027,-3.691807,24.703066,1.0
School,Bucarest,44.437523,26.098311,21.029913,5.0


In [98]:
total_data.reset_index(drop = False)

Unnamed: 0,categoria,ciudad,latitud,longitud,Distancia,Ranking
0,Airport,Bucarest,44.437259,26.091341,17.402089,2.0
1,Airport,Dublin,53.350183,-6.26168,16.862564,2.0
2,Airport,Madrid,40.430468,-3.695076,25.885293,2.0
3,Disco,Bucarest,44.438146,26.096557,21.067706,3.0
4,Disco,Dublin,53.342643,-6.262321,8.816538,3.0
5,Disco,Madrid,40.446783,-3.681514,22.529341,3.0
6,Dog grooming,Bucarest,44.43384,26.096344,25.599482,1.0
7,Dog grooming,Dublin,53.346605,-6.259738,20.385157,1.0
8,Dog grooming,Madrid,40.430027,-3.691807,24.703066,1.0
9,School,Bucarest,44.437523,26.098311,21.029913,5.0


In [107]:
total_data['Totales'] = total_data['Distancia'] * total_data['Ranking']
total_data = total_data.reset_index()
total_data.head()

Unnamed: 0,categoria,ciudad,latitud,longitud,Distancia,Ranking,Totales
0,Airport,Bucarest,44.437259,26.091341,17.402089,2.0,34.804177
1,Airport,Dublin,53.350183,-6.26168,16.862564,2.0,33.725128
2,Airport,Madrid,40.430468,-3.695076,25.885293,2.0,51.770587
3,Disco,Bucarest,44.438146,26.096557,21.067706,3.0,63.203118
4,Disco,Dublin,53.342643,-6.262321,8.816538,3.0,26.449615


In [108]:
total_data_final = total_data.groupby(['ciudad'])["Totales"].sum()

In [109]:
total_data_final

ciudad
Bucarest    340.355169
Dublin      159.879332
Madrid      292.242400
Name: Totales, dtype: float64

In [111]:
total_data_final.to_csv('../Data/Ciudades_ready.csv')