## Establishments proportion by comuna

### Open data

In [1]:
import geopandas as gpd
import pandas as pd
from src.geo_data_handler import GeoDataHandler
from src.constants import ciiu_interes, ciiu_interested_for_tourism


establecimientos_comercio = GeoDataHandler(
    gpd.read_file(
        "../datasets/GeoMedellin Seleccionado/establecimientos_de_indus.geojson"
    )
)

comuna_mapping = establecimientos_comercio.geodata[['comuna', 'nombre_comuna']].drop_duplicates().set_index('comuna')['nombre_comuna'].to_dict()
comuna_mapping

{'14': 'EL POBLADO',
 '16': 'BelÃ©n',
 '10': 'La Candelaria',
 '08': 'Villa Hermosa',
 '12': 'La AmÃ©rica',
 '03': 'Manrique',
 '09': 'Buenos Aires',
 '07': 'Robledo',
 '70': 'Corregimiento de Altavista',
 '11': 'Laureles Estadio',
 '06': 'Doce de Octubre',
 '15': 'Guayabal',
 '60': 'Corregimiento de San CristÃ³bal',
 '01': 'Popular',
 '02': 'Santa Cruz',
 '05': 'Castilla',
 '13': 'San Javier',
 '80': 'Corregimiento de San Antonio de Prado',
 '04': 'Aranjuez',
 '90': 'Corregimiento de Santa Elena',
 'AU': 'AU',
 'AE': 'AE',
 'In': 'In',
 None: None,
 'SN': 'SN',
 '50': 'Corregimiento de San SebastiÃ¡n de Palmitas',
 '00': '00'}

In [2]:
geodata_dict = {}

for key in ciiu_interes.keys():
    geodata_dict[key] = GeoDataHandler(
        gpd.read_file(
            f"../datasets/GeoMedellin Procesado/processed_establishments_{key}.geojson"
        )
    )


## Sum of establishments of interest by comuna

In [3]:
# Total of establishments by comuna
total_by_comuna = establecimientos_comercio.geodata.groupby("comuna").size()

In [4]:
ciiu_interes

{'alojamientos': ['5511',
  '5512',
  '5513',
  '5514',
  '5519',
  '5520',
  '5530',
  '5590'],
 'servicios_turisticos': ['7911', '7912', '7990'],
 'entretenimiento': ['8230', '9103', '9200', '9321', '9329'],
 'transporte': ['4911', '4921', '5011', '5021', '5111', '5112'],
 'alimentacion': ['5611', '5612', '5613', '5619'],
 'bares': ['5630']}

In [5]:
counts_by_comuna = {}
for key in ciiu_interes.keys():
    counts_by_comuna[key] = geodata_dict[key].geodata.groupby("comuna").size()

# Sum all counts by comuna across only alojamientos, 
total_tourism_establishments_by_comuna = pd.Series(0, index=counts_by_comuna[list(counts_by_comuna.keys())[0]].index)
for key in counts_by_comuna:
    if key in ciiu_interested_for_tourism:
        total_tourism_establishments_by_comuna += counts_by_comuna[key]

total_tourism_establishments_by_comuna

comuna
01      96.0
02      67.0
03     115.0
04     224.0
05     177.0
06     120.0
07     143.0
08     117.0
09     212.0
10    1286.0
11     867.0
12     216.0
13      95.0
14    1359.0
15     185.0
16     446.0
60      50.0
70      22.0
80      33.0
90      14.0
AE       NaN
AU       NaN
In       NaN
dtype: float64

# Establishment proporcion by comuna

In [6]:
counts_by_comuna.keys()

dict_keys(['alojamientos', 'servicios_turisticos', 'entretenimiento', 'transporte', 'alimentacion', 'bares'])

In [9]:
import pandas as pd


# Combine all proportions into a single DataFrame
proportions_df = pd.DataFrame({
    'comuna': total_by_comuna.index,
    'nombre_comuna': total_by_comuna.index.map(comuna_mapping),
    'total_establecimientos': total_by_comuna,
    'total_establecimientos_turismo': total_tourism_establishments_by_comuna,
    'prop_rel_establecimientos_turismo': total_tourism_establishments_by_comuna / total_by_comuna,
    'proporcion_relativa_alojamientos': counts_by_comuna["alojamientos"] / total_tourism_establishments_by_comuna,
    'proporcion_relativa_servicios_turisticos': counts_by_comuna["servicios_turisticos"] / total_tourism_establishments_by_comuna,
    'proporcion_relativa_entretenimiento': counts_by_comuna["entretenimiento"] / total_tourism_establishments_by_comuna,
    'proporcion_transporte': counts_by_comuna["transporte"] / total_by_comuna,
    'proporcion_alimentacion': counts_by_comuna["alimentacion"] / total_by_comuna,
    'proporcion_bares': counts_by_comuna["bares"] / total_by_comuna,
})

proportions_df

Unnamed: 0_level_0,comuna,nombre_comuna,total_establecimientos,total_establecimientos_turismo,prop_rel_establecimientos_turismo,proporcion_relativa_alojamientos,proporcion_relativa_servicios_turisticos,proporcion_relativa_entretenimiento,proporcion_transporte,proporcion_alimentacion,proporcion_bares
comuna,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
00,00,00,4,,,,,,,,
01,01,Popular,2811,96.0,0.034152,0.604167,0.135417,0.260417,0.00249,0.060121,0.070793
02,02,Santa Cruz,3090,67.0,0.021683,0.507463,0.149254,0.343284,0.002589,0.067961,0.052751
03,03,Manrique,4911,115.0,0.023417,0.434783,0.286957,0.278261,0.00224,0.067196,0.041336
04,04,Aranjuez,8759,224.0,0.025574,0.602679,0.200893,0.196429,0.00548,0.095331,0.037561
05,05,Castilla,7074,177.0,0.025021,0.542373,0.248588,0.20904,0.010744,0.080153,0.024314
06,06,Doce de Octubre,5326,120.0,0.022531,0.391667,0.333333,0.275,0.00338,0.063838,0.034923
07,07,Robledo,6718,143.0,0.021286,0.307692,0.482517,0.20979,0.008038,0.063709,0.018904
08,08,Villa Hermosa,4687,117.0,0.024963,0.307692,0.435897,0.25641,0.005761,0.056326,0.036271
09,09,Buenos Aires,7843,212.0,0.02703,0.377358,0.367925,0.254717,0.005993,0.072676,0.027923


In [10]:
proportions_df.to_csv("../datasets/GeoMedellin Procesado/summary_by_comuna.csv", index=False)