In [11]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, shape

from pymongo import MongoClient
from config.envs import mongo_usuario, mongo_credencial, mongo_port

from keplergl import KeplerGl

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings("ignore")

In [12]:
def gen_mapa(dataset, name_f, file_path_name):

    ''' 
    
    Esta funcion genera y guarda el mapa en Kepler

    input: 

        dataset: Archivo (csv, geopandas)
        name_f: nombre que tomara en Kepler
        file_path_name: path y nombre.html para guardar el archivo

    output: mapa

    Notas: Mirar evitar configuracion

     '''

    mapita = KeplerGl(height=500, 
                      data={name_f: dataset})
    config_mapita = mapita.config
    mapita.save_to_html(data={name_f: dataset},
                        config=config_mapita,
                        file_name=file_path_name
                        )

In [13]:
mongo_client = MongoClient(f'mongodb://{mongo_usuario}:{mongo_credencial}@localhost:{mongo_port}/')
db = mongo_client['db_javeriana']
collection_clientes = db['datos_clientes']
collection_poligonos = db['poligonos_buenos_aires']

# Calcular los más cercanos (taller 5)

¿Cuáles son los 20 clientes más cercanos y a qué grupo pertenecen?

datos a usar: 

    - df_clientes.parquet.gzip

Crear una coleccion en mongo para clientes (mas de una direccion) 

In [14]:
df_clientes = pd.read_parquet('/home/oecorrechag/jave_buenos_aires_py_cmp/insumos/df_clientes.parquet.gzip')
# df_clientes['coordinates'] = '[' + df_clientes['latitude'].astype(str) + ', ' + df_clientes['longitude'].astype(str) + ' ]'
# df_clientes['coordinates2'] = '[' + df_clientes['latitude2'].astype(str) + ', ' + df_clientes['longitude2'].astype(str) + ' ]'
df_clientes.head()

Unnamed: 0,id_cliente,sexo,edad,group,latitude,longitude,latitude2,longitude2,coordinates,coordinates2
0,1011,0,26.966101,0,-34.744849,-58.328306,,,"[-34.74484892664207, -58.32830567174994 ]","[nan, nan ]"
1,1026,1,28.789567,0,-34.736076,-58.279651,,,"[-34.736075990378815, -58.27965082252797 ]","[nan, nan ]"
2,1094,0,27.426637,0,-34.72264,-58.45509,,,"[-34.72263959913693, -58.45508997026702 ]","[nan, nan ]"
3,1107,0,23.518445,0,-34.692857,-58.486704,,,"[-34.69285747258296, -58.48670444901569 ]","[nan, nan ]"
4,1121,1,27.319498,0,-34.642551,-58.450736,,,"[-34.64255097083067, -58.45073600066322 ]","[nan, nan ]"


In [5]:
coordenadas_referencia = {"type": "Point", "coordinates": [-34.66114402162969, -58.36807293115531]}  # Punto de la empresa avellaneda

# collection.create_index([("coordinates", "2dsphere")])
cursor = collection_clientes.find({
    "coordinates": {
        "$near": {
            "$geometry": coordenadas_referencia
        }
    }
}).limit(20)

clientes_cerca = pd.DataFrame(cursor)

# coordenadas_referencia = {"type": "Point", "coordinates": [-34.550708088082665, -58.46863855979501]}  # Punto de la empresa nuñez

# # collection.create_index([("coordinates", "2dsphere")])
# cursor = collection.find({
#     "coordinates": {
#         "$near": {
#             "$geometry": coordenadas_referencia
#         }
#     }
# }).limit(20)

# clientes_cerca = pd.DataFrame(cursor)


# clientes_cerca.to_csv('clientes_cerca.csv', encoding = 'utf-8', index = False)
clientes_cerca

In [7]:
# # Esto me dice a que grupo pertenecen
# clientes_cerca['group'].value_counts()

In [None]:
# gen_mapa(clientes_cerca, 'clientes_cercanos', 
#          '/home/oecorrechag/jave_buenos_aires_py_cmp/outputs/clientes_cercanos.html')

# Calcular los más cercanos a la playa (taller 5)

¿Cuáles son los clientes más cercanos a la playa?

datos a usar: 

    - df_clientes.parquet.gzip
    - poligono playa

Crear una coleccion en mongo para clientes (mas de una direccion) 

In [8]:
coast_polygon = [
    [-58.5958566696177, -34.43454403948739],
    [-58.324964171635486, -34.67117336416664],
    [-58.19977900233804, -34.768165256713786],
    [-58.00789680947208, -34.85746565390766],
    [-57.82935402703147, -34.93236950640585],
    [-57.82935401791714, -34.934051956338855],
    [-57.43440397492876, -34.647402476158966],
    [-58.628486219863966, -34.05291632813343],
    [-58.5958566696177, -34.43454403948739]
 ]

# Definimos las columnas
columnas = ["latitude", "longitude"]

# Creamos el Geopandas
polygon = Polygon(coast_polygon)
df_poligono_costa = gpd.GeoDataFrame(geometry=[polygon], columns=['geometry'])#, crs={'init': 'epsg:4326'})
# df_poligono_costa.to_file('df_poligono_costa.geojson', driver='GeoJSON')

In [None]:
collection_clientes.create_index([("coordinates", "2dsphere")])
cursor = collection_clientes.find({
    "coordinates": {
        "$geoWithin": {
            "$geometry": {
                "type": "Polygon",
                "coordinates": [coast_polygon]
            }
        }
    }
})

clientes_costa = pd.DataFrame(cursor)
# clientes_costa.to_csv('clientes_costa.csv', encoding = 'utf-8', index = False)
clientes_costa

In [None]:
# Guardar el mapa
mapita = KeplerGl(height=500, data={'clientes costa': clientes_costa})
config_mapita = mapita.config
mapita.save_to_html(data={'clientes costa': clientes_costa, 'Playa': df_poligono_costa},
                    config=config_mapita,
                    file_name='salida.html'
                    )

# Los mejores clientes tienen mas ingresos?

¿Los clientes clasificados como los mejores tienen mayores ingresos?

datos a usar: 

    - df_clientes.parquet.gzip
    - df_encuestas.parquet.gzip

Crear una coleccion en mongo para clientes (mas de una direccion) <br>
Crear una tabla sql para encuestas por que tienen formato definido tabular

Notas: Revisar por que salen mas poligonos? se cerro la conexion a la base de datos?

In [9]:
# query = {
#  $and: [ { 'properties.PROVINCIA': 'Ciudad Autónoma de Buenos Aires' }, 
#          { 'properties.PROVINCIA': 'Buenos Aires' }] 
# }
# consulta = collection_poligonos.find(query)

query = {
    'properties.PROVINCIA': 'Buenos Aires'
}
consulta = collection_poligonos.find(query)

# Almacenar en un dataframe
df_resultado = pd.DataFrame(consulta)

# Convertir las coordenadas a objetos geométricos válidos
df_resultado['geometry'] = df_resultado['geometry'].apply(lambda x: shape(x) if isinstance(x, dict) else x)

# Crear un DataFrame a partir de la lista de registros de 'properties'
df = pd.DataFrame.from_records(df_resultado['properties'].tolist())

# Añadir la columna 'geometry' al nuevo DataFrame
df['geometry'] = df_resultado['geometry']

# Crear el GeoDataFrame
df = gpd.GeoDataFrame(df, geometry='geometry')

# Mostrar las primeras filas del GeoDataFrame
df.head()

Unnamed: 0,RADIO,DEPTO,PROVINCIA,PERSONAS,HOGARES,VIVIENDAS,VIV_HABIT,CORONA,geometry
0,60280101,Almirante Brown,Buenos Aires,1308.0,360.0,326.0,306.0,2.0,"MULTIPOLYGON (((-58.33671 -34.75432, -58.33634..."
1,60280102,Almirante Brown,Buenos Aires,1213.0,290.0,247.0,240.0,2.0,"MULTIPOLYGON (((-58.33886 -34.75551, -58.33816..."
2,60280103,Almirante Brown,Buenos Aires,1214.0,326.0,331.0,312.0,2.0,"MULTIPOLYGON (((-58.33356 -34.75868, -58.33321..."
3,60280104,Almirante Brown,Buenos Aires,985.0,295.0,340.0,293.0,2.0,"MULTIPOLYGON (((-58.34234 -34.75876, -58.34124..."
4,60280105,Almirante Brown,Buenos Aires,614.0,201.0,215.0,196.0,2.0,"MULTIPOLYGON (((-58.34123 -34.75679, -58.34112..."


In [23]:
df_polingos = gpd.read_file("/home/oecorrechag/jave_buenos_aires_py_cmp/insumos/df_ba.geojson")
df_polingos = df_polingos[df_polingos['provincia'] == 'Ciudad Autonoma de Buenos Aires']
df_polingos = df_polingos.loc[:,['comuna', 'geometry']]
df_polingos.head()

Unnamed: 0,comuna,geometry
0,15,"POLYGON ((-58.47597 -34.59192, -58.47712 -34.5..."
1,5,"POLYGON ((-58.43334 -34.60268, -58.43224 -34.6..."
2,3,"POLYGON ((-58.39293 -34.59964, -58.39239 -34.6..."
3,4,"MULTIPOLYGON (((-58.39589 -34.66185, -58.39591..."
4,13,"POLYGON ((-58.43771 -34.56251, -58.43843 -34.5..."


In [24]:
df_clientes3 = df_clientes.copy()
df_clientes3 = df_clientes3.loc[:,['group', 'latitude', 'longitude']]
df_clientes3.head()

Unnamed: 0,group,latitude,longitude
0,0,-34.744849,-58.328306
1,0,-34.736076,-58.279651
2,0,-34.72264,-58.45509
3,0,-34.692857,-58.486704
4,0,-34.642551,-58.450736


In [25]:
df_encuestas = pd.read_parquet('/home/oecorrechag/jave_buenos_aires_py_cmp/insumos/df_encuestas.parquet.gzip')
df_encuestas = df_encuestas.loc[:,['comuna', 'ingresos']]
df_encuestas = df_encuestas.groupby('comuna').mean().reset_index()
df_encuestas.head()

Unnamed: 0,comuna,ingresos
0,1,9173.671355
1,2,17640.125549
2,3,10482.039358
3,4,7818.638964
4,5,12366.673813


In [26]:
# Joins
df_polingos_encuestas = pd.merge(df_polingos, df_encuestas, how='left', on=['comuna'])  
df_polingos_encuestas.shape

(48, 3)

In [29]:
# Guardar el mapa
mapita = KeplerGl(height=500, data={'Poligonos Buenos Aires': df_polingos_encuestas})
config_mapita = mapita.config
mapita.save_to_html(data={'Poligonos Buenos Aires': df_polingos_encuestas, 'Clientes': df_clientes3},
                    config=config_mapita,
                    file_name='salida.html'
                    )

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to salida.html!


# localizacion vs compra

¿Los clientes clasificados como los mejores tienen mayores ingresos?

datos a usar: 

    - dataset.parquet.gzip
    - df_encuestas.parquet.gzip

Crear una coleccion en mongo para clientes (mas de una direccion) <br>
Crear una tabla sql para encuestas por que tienen formato definido tabular

Notas: Revisar por que salen mas poligonos? se cerro la conexion a la base de datos?

In [35]:
df_clientes3 = df_clientes.copy()
df_clientes3 = df_clientes3.loc[:,['id_cliente', 'group', 'latitude', 'longitude']]
df_clientes3.head()

Unnamed: 0,id_cliente,group,latitude,longitude
0,1011,0,-34.744849,-58.328306
1,1026,0,-34.736076,-58.279651
2,1094,0,-34.72264,-58.45509
3,1107,0,-34.692857,-58.486704
4,1121,0,-34.642551,-58.450736


In [37]:
dataset = pd.read_parquet('/home/oecorrechag/jave_buenos_aires_py_cmp/insumos/dataset.parquet.gzip')
dataset = dataset.loc[:,['id_cliente', 'sexo', 'monetary',
                         'Babucha', 'Bermuda', 'Calza', 'Camisa', 'Capri', 'Chaleco', 'Jean',
                         'Mini', 'Pantalón', 'Pollera', 'Remera', 'Short', 'Strapless',
                         'Vestido', 
                         'Time_Average']]
dataset.head()

Unnamed: 0,id_cliente,sexo,monetary,Babucha,Bermuda,Calza,Camisa,Capri,Chaleco,Jean,Mini,Pantalón,Pollera,Remera,Short,Strapless,Vestido,Time_Average
0,1001,1,894.448,0.0,0.0,0.047619,0.142857,0.047619,0.047619,0.095238,0.095238,0.0,0.0,0.190476,0.142857,0.0,0.0,86.0
1,1002,1,1127.408,0.0,0.0,0.0,0.285714,0.071429,0.0,0.071429,0.0,0.0,0.071429,0.357143,0.0,0.0,0.0,113.0
2,1003,0,699.168,0.0,0.0,0.0,0.333333,0.0,0.066667,0.066667,0.0,0.0,0.0,0.2,0.2,0.0,0.0,117.0
3,1004,0,1054.56,0.0,0.0,0.0,0.263158,0.052632,0.0,0.157895,0.052632,0.0,0.0,0.157895,0.0,0.0,0.0,85.0
4,1005,0,1608.32,0.0,0.045455,0.0,0.272727,0.0,0.0,0.181818,0.045455,0.0,0.0,0.181818,0.045455,0.045455,0.0,76.0


In [39]:
# Joins
dataset = pd.merge(df_clientes3, dataset, how='left', on=['id_cliente'])  
dataset.shape

(500, 21)

In [40]:
# Guardar el mapa
mapita = KeplerGl(height=500, data={'Compras': dataset})
config_mapita = mapita.config
mapita.save_to_html(data={'Compras Buenos Aires': dataset, 'Clientes': df_clientes3},
                    config=config_mapita,
                    file_name='salida.html'
                    )

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to salida.html!


In [None]:
print('ok_')