![cg.jpg](https://raw.githubusercontent.com/jdanifalcon/FundamentosIA/main/logo/logo_cg.jpg)


# Proyecto, Parte 1: Preprocesamiento de los datos de INEGI 2020 para Macuspana, Tabasco, México.

#### @date 03/04/2024


In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Cargar datos socioeconómicos y geoespaciales
# Ruta del archivo CSV en Google Drive
ruta = '/content/drive/MyDrive/inegi/2020/ageb27_2020.csv'

# Lee el archivo CSV en un DataFrame de pandas
data = pd.read_csv(ruta)
data

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBFEM,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
0,27,Tabasco,0,Total de la entidad Tabasco,0,Total de la entidad,0000,0,2402598,1228927,...,118527,563390,228487,276817,66338,31004,54983,96508,404524,25731
1,27,Tabasco,1,Balancán,0,Total del municipio,0000,0,58524,29282,...,2960,12347,3499,7392,486,227,1907,3607,12387,993
2,27,Tabasco,1,Balancán,1,Total de la localidad urbana,0000,0,13944,7272,...,625,3917,1534,2402,302,133,305,271,2258,75
3,27,Tabasco,1,Balancán,1,Total AGEB urbana,0401,0,1962,1017,...,144,587,244,353,57,15,53,38,330,12
4,27,Tabasco,1,Balancán,1,Balancán,0401,1,54,32,...,9,16,13,13,4,0,*,*,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17781,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,18,15,*,...,*,*,*,*,*,*,*,*,*,*
17782,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,19,5,*,...,*,*,*,*,*,*,*,*,*,*
17783,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,20,8,*,...,*,*,*,*,*,*,*,*,*,*
17784,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,21,2,*,...,*,*,*,*,*,*,*,*,*,*


In [None]:
# Filtrado
palabra_referencia = 'Total'

# Eliminar filas (referencia en la columna 'NOM_LOC')
data = data[~data['NOM_LOC'].str.contains(r'\b' + palabra_referencia + r'\b', case=False, na=False, regex=True)]
data

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBFEM,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
4,27,Tabasco,1,Balancán,1,Balancán,0401,1,54,32,...,9,16,13,13,4,0,*,*,6,0
5,27,Tabasco,1,Balancán,1,Balancán,0401,2,44,24,...,5,13,6,10,0,*,0,0,4,0
6,27,Tabasco,1,Balancán,1,Balancán,0401,3,28,13,...,5,10,7,6,0,0,*,*,4,0
7,27,Tabasco,1,Balancán,1,Balancán,0401,4,44,24,...,3,15,6,12,0,0,0,0,8,0
8,27,Tabasco,1,Balancán,1,Balancán,0401,5,110,58,...,9,31,16,25,7,*,0,3,16,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17781,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,18,15,*,...,*,*,*,*,*,*,*,*,*,*
17782,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,19,5,*,...,*,*,*,*,*,*,*,*,*,*
17783,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,20,8,*,...,*,*,*,*,*,*,*,*,*,*
17784,27,Tabasco,17,Tenosique,1,Tenosique de Pino Suárez,0596,21,2,*,...,*,*,*,*,*,*,*,*,*,*


In [None]:
Macuspana = data.loc[data['NOM_MUN'] == 'Macuspana']
Macuspana

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBFEM,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
13912,27,Tabasco,12,Macuspana,1,Macuspana,0072,1,319,181,...,47,84,53,62,21,12,4,8,36,*
13913,27,Tabasco,12,Macuspana,1,Macuspana,0072,2,369,192,...,42,106,99,85,16,6,*,7,16,0
13914,27,Tabasco,12,Macuspana,1,Macuspana,0072,3,445,230,...,46,120,76,81,18,11,5,9,50,*
13915,27,Tabasco,12,Macuspana,1,Macuspana,0072,4,73,40,...,10,16,11,13,*,*,0,*,6,0
13916,27,Tabasco,12,Macuspana,1,Macuspana,0072,5,691,359,...,38,147,59,78,20,5,14,27,106,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14879,27,Tabasco,12,Macuspana,91,Pemex (Ciudad Pemex),0759,28,0,0,...,0,0,0,0,0,0,0,0,0,0
14880,27,Tabasco,12,Macuspana,91,Pemex (Ciudad Pemex),0759,29,0,0,...,0,0,0,0,0,0,0,0,0,0
14881,27,Tabasco,12,Macuspana,91,Pemex (Ciudad Pemex),0759,30,0,0,...,0,0,0,0,0,0,0,0,0,0
14882,27,Tabasco,12,Macuspana,91,Pemex (Ciudad Pemex),0759,31,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
#Macuspana['P15YM_AN']
listaDeseada = ['POBTOT','PROM_OCUP','P3YM_HLI','PHOGJEF_F','P15YM_AN','GRAPROES','PSINDER','PEA','POB15_64','TVIVHAB','VPH_AGUAFV','VPH_NODREN','VPH_PISOTI']
N_Macuspana = Macuspana.loc[:, listaDeseada]

In [None]:
N_Macuspana

Unnamed: 0,POBTOT,PROM_OCUP,P3YM_HLI,PHOGJEF_F,P15YM_AN,GRAPROES,PSINDER,PEA,POB15_64,TVIVHAB,VPH_AGUAFV,VPH_NODREN,VPH_PISOTI
13912,319,3.39,*,131,7,11.5,70,144,214,94,0,0,0
13913,369,3.18,*,185,8,10.45,54,169,253,116,0,*,0
13914,445,3.37,0,144,3,11.75,138,239,312,132,0,0,0
13915,73,4.06,0,19,*,11.29,7,27,47,18,0,0,0
13916,691,3.88,10,221,24,9.29,164,322,454,178,*,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14879,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0
14880,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0
14881,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0
14882,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0


In [None]:
#Macuspana['P15YM_AN']
listaDeseada = ['ENTIDAD','MUN','LOC','AGEB','MZA']
NM_Macuspana = Macuspana.loc[:, listaDeseada]
NM_Macuspana

Unnamed: 0,ENTIDAD,MUN,LOC,AGEB,MZA
13912,27,12,1,0072,1
13913,27,12,1,0072,2
13914,27,12,1,0072,3
13915,27,12,1,0072,4
13916,27,12,1,0072,5
...,...,...,...,...,...
14879,27,12,91,0759,28
14880,27,12,91,0759,29
14881,27,12,91,0759,30
14882,27,12,91,0759,31


In [None]:
NM_Macuspana['ENTIDAD']=NM_Macuspana['ENTIDAD'].astype(str).str.rjust(2, '0')
NM_Macuspana['MUN']=NM_Macuspana['MUN'].astype(str).str.rjust(3, '0')
NM_Macuspana['LOC']=NM_Macuspana['LOC'].astype(str).str.rjust(4, '0')
NM_Macuspana['AGEB']=NM_Macuspana['AGEB'].astype(str).str.rjust(4, '0')
NM_Macuspana['MZA']=NM_Macuspana['MZA'].astype(str).str.rjust(3, '0')
NM_Macuspana['Unidas'] = NM_Macuspana['ENTIDAD'].astype(str) + NM_Macuspana['MUN'].astype(str) + NM_Macuspana['LOC'].astype(str) + NM_Macuspana['AGEB'].astype(str) + NM_Macuspana['MZA'].astype(str)
print(NM_Macuspana['Unidas'])

13912    2701200010072001
13913    2701200010072002
13914    2701200010072003
13915    2701200010072004
13916    2701200010072005
               ...       
14879    2701200910759028
14880    2701200910759029
14881    2701200910759030
14882    2701200910759031
14883    2701200910759032
Name: Unidas, Length: 927, dtype: object


In [None]:
N_Macuspana['Unidas'] = NM_Macuspana['Unidas']
N_Macuspana

Unnamed: 0,POBTOT,PROM_OCUP,P3YM_HLI,PHOGJEF_F,P15YM_AN,GRAPROES,PSINDER,PEA,POB15_64,TVIVHAB,VPH_AGUAFV,VPH_NODREN,VPH_PISOTI,Unidas
13912,319,3.39,*,131,7,11.5,70,144,214,94,0,0,0,2701200010072001
13913,369,3.18,*,185,8,10.45,54,169,253,116,0,*,0,2701200010072002
13914,445,3.37,0,144,3,11.75,138,239,312,132,0,0,0,2701200010072003
13915,73,4.06,0,19,*,11.29,7,27,47,18,0,0,0,2701200010072004
13916,691,3.88,10,221,24,9.29,164,322,454,178,*,0,6,2701200010072005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14879,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0,2701200910759028
14880,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0,2701200910759029
14881,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0,2701200910759030
14882,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0,2701200910759031


In [None]:
N_Macuspana= N_Macuspana[['Unidas','POBTOT','PROM_OCUP','P3YM_HLI','PHOGJEF_F','P15YM_AN','GRAPROES','PSINDER','PEA','POB15_64','TVIVHAB','VPH_AGUAFV','VPH_NODREN','VPH_PISOTI']]
N_Macuspana

Unnamed: 0,Unidas,POBTOT,PROM_OCUP,P3YM_HLI,PHOGJEF_F,P15YM_AN,GRAPROES,PSINDER,PEA,POB15_64,TVIVHAB,VPH_AGUAFV,VPH_NODREN,VPH_PISOTI
13912,2701200010072001,319,3.39,*,131,7,11.5,70,144,214,94,0,0,0
13913,2701200010072002,369,3.18,*,185,8,10.45,54,169,253,116,0,*,0
13914,2701200010072003,445,3.37,0,144,3,11.75,138,239,312,132,0,0,0
13915,2701200010072004,73,4.06,0,19,*,11.29,7,27,47,18,0,0,0
13916,2701200010072005,691,3.88,10,221,24,9.29,164,322,454,178,*,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14879,2701200910759028,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0
14880,2701200910759029,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0
14881,2701200910759030,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0
14882,2701200910759031,0,0.00,0,0,0,0.00,0,0,0,0,0,0,0


In [None]:
# Ruta completa en Google Drive
ruta_google_drive = '/content/drive/MyDrive/inegi/macuspana/N_macuspana.csv'

# Guardar el DataFrame como un archivo CSV en Google Drive
N_Macuspana.to_csv(ruta_google_drive, index=False)

NameError: name 'N_Macuspana' is not defined

In [None]:
N_Macuspana['TVIVHAB'].max()

'97'