## Librerias que utilizaremos 

In [343]:
from pymongo import MongoClient
from pymongo import GEOSPHERE
import re
import geopandas as gpd
from cartoframes.viz import Map, Layer, popup_element
from scipy.spatial import distance
import pandas as pd

## Conectamos con Mongo DB

In [2]:
client = MongoClient("localhost:27017")
db = client.get_database("ironhack")
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'ironhack')

In [3]:
client.list_database_names()

['Ironhack', 'Taller_Geo', 'admin', 'config', 'local']

## Obtenemos la Base de datos con la que trabajaremos

In [4]:
db = client.get_database("Taller_Geo")

In [5]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo')

In [6]:
db.list_collection_names()

['Bucarest', 'Dublin', 'Madrid']

In [7]:
B = db.get_collection("Bucarest")

In [8]:
B

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo'), 'Bucarest')

In [9]:
M = db.get_collection("Madrid")

In [10]:
M

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo'), 'Madrid')

In [11]:
D = db.get_collection("Dublin")

In [12]:
D

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'Taller_Geo'), 'Dublin')

## Creamos los índices para cada colección

In [14]:
db.Bucarest.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [15]:
db.Madrid.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [16]:
db.Dublin.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [20]:
#db.Dublin.find_one() #comprobamos que las 3 colecciones nos brindan información correctamente

## Utilizamos Near para ver la cercanía de los datos a nuestro punto central

In [169]:
#Estos son mis puntos centrales:
madrid_coord = [40.42955,-3.6793]
dublin_coord = [53.34919,-6.2606] 
bucarest_coord = [44.42724,26.09208]

In [170]:
#Esta es la distancia máxima que quiero: 
metros = 5000

In [171]:
def type_point(lista):
    return {"type":"Point", "coordinates": lista}

In [172]:
coord_tp_M = type_point(madrid_coord)
coord_tp_D = type_point(dublin_coord)
coord_tp_B = type_point(bucarest_coord)

In [173]:
coord_tp_M 

{'type': 'Point', 'coordinates': [40.42955, -3.6793]}

In [174]:
query_M = {"location": {"$near": {"$geometry": coord_tp_M, "$maxDistance": metros}}}

In [175]:
query_M

{'location': {'$near': {'$geometry': {'type': 'Point',
    'coordinates': [40.42955, -3.6793]},
   '$maxDistance': 5000}}}

In [176]:
resultado_M = list(M.find(query_M))
#resultado_M

In [177]:
query_D = {"location": {"$near": {"$geometry": coord_tp_D, "$maxDistance": metros}}}

In [178]:
query_D

{'location': {'$near': {'$geometry': {'type': 'Point',
    'coordinates': [53.34919, -6.2606]},
   '$maxDistance': 5000}}}

In [179]:
resultado_D = list(D.find(query_D))
#resultado_D

In [180]:
coord_tp_B

{'type': 'Point', 'coordinates': [44.42724, 26.09208]}

In [181]:
query_B = {"location": {"$near": {"$geometry": coord_tp_B, "$maxDistance": metros}}}

In [275]:
resultado_B = list(B.find(query_B))
#resultado_B

In [183]:
resultado_B[0]

{'_id': ObjectId('618fac476bd838c298f8a588'),
 'nombre': 'bella dog',
 'latitud': 44.429934133909114,
 'longitud': 26.09415232523072,
 'location': {'type': 'Point',
  'coordinates': [44.429934133909114, 26.09415232523072]}}

## Generamos un DataFrame para cada ciudad y hacemos una primera exploración visual

In [277]:
df_M = pd.DataFrame(resultado_M)
#df_M.head()

In [225]:
gdf_M = gpd.GeoDataFrame(df_M, geometry=gpd.points_from_xy(df_M.longitud, df_M.latitud))
#gdf_M

In [283]:
#Map(Layer(gdf_M, "color:purple", popup_hover=[popup_element("nombre", "Madrid")]))

In [284]:
df_D = pd.DataFrame(resultado_D)
#df_D.head(2)

In [224]:
gdf_D = gpd.GeoDataFrame(df_D, geometry=gpd.points_from_xy(df_D.longitud, df_D.latitud))
#gdf_D

In [285]:
#Map(Layer(gdf_D, "color:purple", popup_hover=[popup_element("nombre", "Madrid")]))

In [286]:
df_B = pd.DataFrame(resultado_B)
#df_B.head(2)

In [223]:
gdf_B = gpd.GeoDataFrame(df_B, geometry=gpd.points_from_xy(df_B.longitud, df_B.latitud))
#gdf_B

In [287]:
#Map(Layer(gdf_B, "color:purple", popup_hover=[popup_element("nombre", "Madrid")]))

## Agrupamos los nombres de los requisitos según categorías

In [240]:
def limpiar(x):
    diccionario = {"Disco":re.search(".*[Nn](ight|IGHT).*",str(x)),
                   "Airport":re.search(".*[Aa](irport|IRPORT).*",str(x)),
                   "School":re.search(".*[Ss](chool|CHOOL).*",str(x)),
                   "Dog grooming":re.search(".*[Dd](og|OG).*",str(x)),
                   "Starbucks":re.search(".*[Ss](tarbucks|TARBUCKS).*",str(x)),
                  }

    for key,values in diccionario.items():
        if values:
            return key
    return 'Others'

In [290]:
df_M["categoria"] = df_M["nombre"].apply(limpiar)
#df_M.head(2)

In [293]:
df_D["categoria"] = df_D["nombre"].apply(limpiar)
#df_D.head(2)

In [295]:
df_B["categoria"] = df_B["nombre"].apply(limpiar)
#df_B.head(2)

## Calculamos la distancia de cada punto con el punto central

In [297]:
mis_puntos = []
for i,row in df_M.iterrows():
    mis_puntos.append(row["location"]["coordinates"])    

In [298]:
df_M["latlong"] = mis_puntos

In [381]:
#df_M.head()

In [311]:
def distancia_Madrid(coordin):
    return (distance.euclidean(coord_tp_M['coordinates'],coordin))*1000

In [312]:
df_M["Distancia"] = df_M["latlong"].apply(distancia_Madrid)
#df_M["Distancia"]

In [339]:
df_M["ciudad"] = 'Madrid'
df_M.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,categoria,latlong,Distancia,ciudad
0,618fad036bd838c298f8a6c1,MSMK Madrid School of Marketing,40.429084,-3.679913,"{'type': 'Point', 'coordinates': [40.429084011...",School,"[40.42908401194077, -3.6799125837274125]",0.769678,Madrid
1,618fad036bd838c298f8a6d2,Deusto Business School,40.430948,-3.681059,"{'type': 'Point', 'coordinates': [40.430948485...",School,"[40.43094848530408, -3.681059215078213]",2.247354,Madrid


In [314]:
mis_puntos = []
for i,row in df_D.iterrows():
    mis_puntos.append(row["location"]["coordinates"])    

In [315]:
df_D["latlong"] = mis_puntos

In [316]:
#df_D.head()

In [317]:
def distancia_Dublin(coordin):
    return (distance.euclidean(coord_tp_D['coordinates'],coordin))*1000

In [319]:
df_D["Distancia"] = df_D["latlong"].apply(distancia_Dublin)
#df_D["Distancia"]

In [338]:
df_D["ciudad"] = 'Dublin'
df_D.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,categoria,latlong,Distancia,ciudad
0,618faca16bd838c298f8a5ee,Toronto Airport Limo,53.349732,-6.260254,"{'type': 'Point', 'coordinates': [53.349731569...",Airport,"[53.34973156905316, -6.260254383087158]",0.642455,Dublin
1,618facda6bd838c298f8a668,Starbucks,53.349679,-6.259835,"{'type': 'Point', 'coordinates': [53.349679131...",Starbucks,"[53.34967913192385, -6.259834819120199]",0.908158,Dublin


In [322]:
mis_puntos = []
for i,row in df_B.iterrows():
    mis_puntos.append(row["location"]["coordinates"])  

In [323]:
df_B["latlong"] = mis_puntos

In [324]:
#df_B.head()

In [325]:
def distancia_Bucarest(coordin):
    return (distance.euclidean(coord_tp_B['coordinates'],coordin))*1000

In [326]:
df_B["Distancia"] = df_B["latlong"].apply(distancia_Bucarest)
#df_B["Distancia"]

In [328]:
#df_B.head()

In [369]:
df_B["ciudad"] = 'Bucarest'
df_B.head(2)

Unnamed: 0,_id,nombre,latitud,longitud,location,categoria,latlong,Distancia,ciudad
0,618fac476bd838c298f8a588,bella dog,44.429934,26.094152,"{'type': 'Point', 'coordinates': [44.429934133...",Dog grooming,"[44.429934133909114, 26.09415232523072]",3.398954,Bucarest
1,618fac7f6bd838c298f8a5b2,ICEP Hotel School,44.42088,26.09475,"{'type': 'Point', 'coordinates': [44.42088, 26...",School,"[44.42088, 26.09475]",6.897717,Bucarest


## Ahora intentemos normalizar

In [370]:
data = df_M.append([df_B, df_D])

In [371]:
data.shape

(283, 9)

In [373]:
data_final = data[data["categoria"] != 'Others']

In [376]:
total_data = data_final.groupby(["categoria", "ciudad"]).mean()

In [382]:
total_data

Unnamed: 0_level_0,Unnamed: 1_level_0,latitud,longitud,Distancia
categoria,ciudad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Airport,Bucarest,44.437259,26.091341,17.402089
Airport,Dublin,53.350183,-6.26168,16.862564
Airport,Madrid,40.430468,-3.695076,25.885293
Disco,Bucarest,44.438146,26.096557,21.067706
Disco,Dublin,53.342643,-6.262321,8.816538
Disco,Madrid,40.446783,-3.681514,22.529341
Dog grooming,Bucarest,44.43384,26.096344,25.599482
Dog grooming,Dublin,53.346605,-6.259738,20.385157
Dog grooming,Madrid,40.430027,-3.691807,24.703066
School,Bucarest,44.437523,26.098311,21.029913


In [383]:
total_data.reset_index(drop = False)

Unnamed: 0,categoria,ciudad,latitud,longitud,Distancia
0,Airport,Bucarest,44.437259,26.091341,17.402089
1,Airport,Dublin,53.350183,-6.26168,16.862564
2,Airport,Madrid,40.430468,-3.695076,25.885293
3,Disco,Bucarest,44.438146,26.096557,21.067706
4,Disco,Dublin,53.342643,-6.262321,8.816538
5,Disco,Madrid,40.446783,-3.681514,22.529341
6,Dog grooming,Bucarest,44.43384,26.096344,25.599482
7,Dog grooming,Dublin,53.346605,-6.259738,20.385157
8,Dog grooming,Madrid,40.430027,-3.691807,24.703066
9,School,Bucarest,44.437523,26.098311,21.029913


## Entiendo que esta función no me servirá porque no voy a hacer intersección de colecciones

In [None]:
total = [] # lista para resultado final
for i in libres_cerca: #iteramos por la lista entera de librerías de antes 
    point = type_point([i["longitud"],i["latitud"]]) #preparamos tipo point de cada una 
    query = {"geometry": {"$geoIntersects": {"$geometry": point}}} #hacemos la query para buscar el barrio 
    resultado = dis.find_one(query) # hacemos la query 
    libre = {i["nombre"]:resultado["properties"]["nombre"]} # nombre de la librería y nombre del distrito de la query
    total.append(libre)

## Algunas pruebas para cambiar el nombre de las variables desde el clining pero desde jyn hago ensayos