In [31]:
import pandas as pd
import geopandas as gpd

In [32]:
def describir_shp(gdf):
    area_cargado = gdf['area'].sum()
    gdf['area'] = gdf.geometry.area / 10000
    area_recalculado = gdf['area'].sum()
    gdf['idd'] = gdf.apply(lambda row: f"{row['unidad_01']}|{row['unidad_03']}|{row['unidad_05']}", axis=1)
    gdf_disolve = gdf.dissolve(by='idd')
    gdf_disolve['area'] = gdf_disolve.geometry.area / 10000
    area_disolve = gdf_disolve['area'].sum()
    estado_datos = {
        'Items' : ['Area cargado', 'Area recalculado', 'Area disolve'],
        'Datos' : [round(area_cargado, 2), round(area_recalculado, 2), round(area_disolve, 2)]
    }
    return pd.DataFrame(estado_datos)

In [33]:
path_shp_cosecha = r'C:/Documents/Ingenio Azucarero Guabira S.A/UTEA - SEMANAL - CATASTRO/Catastro/CATASTRO 2024/cosecha2024.shp'
path_shp_catastro = r'C:/Documents/Ingenio Azucarero Guabira S.A/UTEA - SEMANAL - CATASTRO/Catastro/CATASTRO 2024/catastro140624.shp'

In [34]:
gdf_cosecha = gpd.read_file(path_shp_cosecha)
gdf_catastro = gpd.read_file(path_shp_catastro)

In [35]:
describir_shp(gdf_cosecha)

Unnamed: 0,Items,Datos
0,Area cargado,53873.13
1,Area recalculado,53873.09
2,Area disolve,53873.03


In [36]:
gdf_catastro_canha = gdf_catastro[gdf_catastro['cultivo']=='canha'].copy()

In [37]:
describir_shp(gdf_catastro_canha)

Unnamed: 0,Items,Datos
0,Area cargado,54081.38
1,Area recalculado,54033.26
2,Area disolve,54033.26


In [38]:
def comprobar_idd_repetidos(gdf):
    gdf['idd'] = gdf.apply(lambda row: f"{row['unidad_01']}|{row['unidad_03']}|{row['unidad_05']}", axis=1)
    gdf_orden = gdf.sort_values(by='idd')
    conteos = gdf['idd'].value_counts()
    valores_repetidos = conteos[conteos > 1]
    return valores_repetidos

# COMPROBAR CATASTRO

In [39]:
gdf_catastro_canha['unidad_03']

0        42047
1         2913
2         8816
3        41967
4        42047
         ...  
13370    13385
13371    40152
13372    40152
13373     8014
13374     2615
Name: unidad_03, Length: 10630, dtype: int64

In [40]:
gdf_catastro_canha['unidad_03'] = gdf_catastro_canha['unidad_03'].astype(int)

In [41]:
comprobar_idd_repetidos(gdf_catastro_canha)

idd
129|8028|L1.1      2
1979|41954|L1      2
299|1530|S7        2
129|8028|L2        2
613|2615|L3        2
184|14562|L28.2    2
Name: count, dtype: int64

In [42]:
gdf_catastro_disolve = gdf_catastro_canha.dissolve(by='idd')
gdf_catastro_disolve['area'] = gdf_catastro_disolve.geometry.area / 10000
gdf_catastro_disolve = gdf_catastro_disolve.reset_index()
area_catastro_disolve = gdf_catastro_disolve['area'].sum()

In [43]:
gdf_catastro_disolve['unidad_03'] = gdf_catastro_disolve['unidad_03'].astype(int)

In [44]:
area_catastro_disolve

54033.25622529656

In [45]:
comprobar_idd_repetidos(gdf_catastro_disolve)

Series([], Name: count, dtype: int64)

# COMPROBAR SHP COSECHA

In [46]:
gdf_cosecha_disolve = gdf_cosecha.dissolve(by='idd')
gdf_cosecha_disolve['area'] = gdf_cosecha_disolve.geometry.area / 10000
gdf_cosecha_disolve = gdf_cosecha_disolve.reset_index()
area_cosecha_disolve = gdf_cosecha_disolve['area'].sum()

In [47]:
area_cosecha_disolve

53873.02600785425

In [48]:
comprobar_idd_repetidos(gdf_cosecha_disolve)

Series([], Name: count, dtype: int64)

# COMPARAR CATASTRO - COSECHA

In [49]:
gdf_catastro_disolve['area'].sum()

54033.25622529656

In [50]:
gdf_cosecha_disolve['area'].sum()

53873.02600785425

In [51]:
gdf_catastro_disolve.columns

Index(['idd', 'geometry', 'id', 'unidad_01', 'unidad_02', 'unidad_03',
       'unidad_04', 'unidad_05', 'variedad', 'soca', 'zona', 'textura',
       'cultivo', 'zafra', 'financia', 'fs', 'fc', 'area', 'tecnico', 'codcos',
       'siembra', 'fecha_siem'],
      dtype='object')

In [52]:
gdf_cat_pivot = gdf_catastro_disolve[['idd', 'area']]

In [53]:
gdf_cos_pivot = gdf_cosecha_disolve[['idd', 'area']]

In [54]:
df_cat_merged = pd.merge(gdf_cat_pivot, gdf_cos_pivot, on='idd', how='left')

In [55]:
gdf_cos_pivot

Unnamed: 0,idd,area
0,1000|16213|L1.1,2.484536
1,1000|16213|L1.3,0.844639
2,1000|16213|L2.2,3.241613
3,1000|16213|L3,4.265272
4,1000|16213|L4,9.282966
...,...,...
10591,9|18115|L2,1.714524
10592,9|18115|L3.1,1.639247
10593,9|18115|L3.2,1.772982
10594,9|18115|L4,4.286313


In [56]:
df_cat_merged.to_excel('merge.xlsx')