# Simplificar los datos municipales

## Imports

In [1]:
import pandas as pd
import warnings

from src.data.utils import zeroes_to_cve, create_index__mun_id

## Configs

## Constants

In [2]:
DROUGHT_DATA = '../data/raw/drought_data.parquet'
MUNICIPAL_DATA = '../data/raw/muni.pkl'

SIMPLIFIED_MUNICIPAL_DATA = '../data/interim/municipal_data.pkl'


MEXICO_EPSG = "EPSG:6362" # https://epsg.io/6362
WGS84 = "EPSG:4326" # World Geodetic System 1984 ensemble

## Helper functions

## Read data

In [3]:
drought_df = pd.read_parquet(DROUGHT_DATA)
municipal_df = pd.read_pickle(MUNICIPAL_DATA)

## Process data

In [4]:
# We will create a mun_id for each entity, so that we can match correctly
drought_df['mun_id'] = (
    drought_df.CVE_ENT.apply(zeroes_to_cve, zeroes=2) + 
    "_" + 
    drought_df.CVE_MUN.apply(zeroes_to_cve, zeroes=3)
)
municipal_df['mun_id'] = (
    municipal_df.CVE_ENT.apply(zeroes_to_cve, zeroes=2) + 
    "_" + 
    municipal_df.CVE_MUN.apply(zeroes_to_cve, zeroes=3)
)

# These municipalities in municipal_df are not found in drought_df
lacking_mun1 = set(municipal_df.mun_id)-set(drought_df.mun_id)

# All municipalities in drought_df are found in municipal_df
lacking_mun2 = set(drought_df.mun_id)-set(municipal_df.mun_id)

drought_df.set_index('mun_id', inplace=True)
municipal_df.set_index('mun_id', inplace=True)

municipal_df = municipal_df.drop(lacking_mun1)


In [7]:
# We can now get an asociated representative point:
municipal_df['representative_coordinate'] = municipal_df.geometry.representative_point().set_crs(MEXICO_EPSG).to_crs(WGS84)

In [18]:
simplified_municipal_df = municipal_df.drop(["CVEGEO", "CVE_ENT", "CVE_MUN", "NOMGEO", "NOM_ENT", "COV_", "COV_ID", "AREA", 'PERIMETER'], axis=1)
simplified_municipal_df['nombre_municipio'] = drought_df['NOMBRE_MUN']
simplified_municipal_df['entidad_federativa'] = drought_df['ENTIDAD']
simplified_municipal_df['org_cuenca'] = drought_df['ORG_CUENCA*']
simplified_municipal_df['clv_oc'] = drought_df['CLV_OC']
simplified_municipal_df['con_cuenca'] = drought_df['CON_CUENCA']
simplified_municipal_df['cve_conc'] = drought_df['CVE_CONC']

In [20]:
simplified_municipal_df.head()

Unnamed: 0_level_0,geometry,representative_coordinate,nombre_municipio,entidad_federativa,org_cuenca,clv_oc,con_cuenca,cve_conc
mun_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
01_008,"POLYGON ((2463679.819 1122454.043, 2463636.551...",POINT (-102.53016 22.14257),San José de Gracia,Aguascalientes,Lerma-Santiago-Pacífico,VIII,Rio Santiago,16
01_009,"POLYGON ((2483386.269 1147520.506, 2483452.327...",POINT (-102.19271 22.24361),Tepezalá,Aguascalientes,Lerma-Santiago-Pacífico,VIII,Rio Santiago,16
01_010,"POLYGON ((2510504.977 1104537.995, 2511266.668...",POINT (-102.00073 21.92874),El Llano,Aguascalientes,Lerma-Santiago-Pacífico,VIII,Rio Santiago,16
01_011,"POLYGON ((2485534.556 1117425.794, 2485536.613...",POINT (-102.25178 22.02192),San Francisco de los Romo,Aguascalientes,Lerma-Santiago-Pacífico,VIII,Rio Santiago,16
01_001,"POLYGON ((2489959.178 1111683.059, 2489874.765...",POINT (-102.30547 21.84777),Aguascalientes,Aguascalientes,Lerma-Santiago-Pacífico,VIII,Rio Santiago,16


## Conclusions

In [21]:
simplified_municipal_df.to_pickle(SIMPLIFIED_MUNICIPAL_DATA)