In [13]:
!pip install rasterstats



In [23]:
import os
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats
from shapely.ops import transform
import matplotlib.pyplot as plt
import pandas as pd
import pyproj
import numpy as np

In [25]:
# Load your shapefile
shapefile_path = '../../_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp'
gdf = gpd.read_file(shapefile_path)
gdf

Unnamed: 0,OBJECTID_1,OBJECTID,CCDD,NOMBDEP,CAPITAL,Shape_STAr,Shape_STLe,ORIG_FID,Shape_Leng,Shape_Area,CORREO,CONTACTO,WHATSAPP,geometry
0,1,1.0,1,AMAZONAS,CHACHAPOYAS,3.203006,12.912088,0,12.912088,3.203006,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-77.81399 -2.99278, -77.81483 -2.995..."
1,2,2.0,2,ANCASH,HUARAZ,2.954592,11.780424,1,11.780424,2.954592,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-77.64697 -8.05086, -77.64689 -8.051..."
2,3,3.0,3,APURIMAC,ABANCAY,1.765933,7.730154,2,7.730154,1.765933,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-73.74655 -13.17442, -73.7457 -13.17..."
3,4,4.0,4,AREQUIPA,AREQUIPA,5.330203,17.40504,3,17.40504,5.330203,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-71.98109 -14.64062, -71.98093 -14.6..."
4,5,5.0,5,AYACUCHO,AYACUCHO,3.643705,17.127166,4,17.127166,3.643705,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-74.34843 -12.17503, -74.35 -12.1758..."
5,6,6.0,6,CAJAMARCA,CAJAMARCA,2.684527,12.397424,5,12.397424,2.684527,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-78.22182 -7.76346, -78.22233 -7.763..."
6,7,7.0,7,CALLAO,CALLAO,0.011738,1.111221,6,1.111221,0.011738,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-77.13504 -11.8182, -77.13484 -11.81..."
7,8,8.0,8,CUSCO,CUSCO,6.000331,21.794434,7,21.794434,6.000331,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-72.9728 -11.25189, -72.97134 -11.25..."
8,9,9.0,9,HUANCAVELICA,HUANCAVELICA,1.839851,9.561245,8,9.561245,1.839851,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-74.57118 -11.9871, -74.57095 -11.98..."
9,10,10.0,10,HUANUCO,HUANUCO,3.089811,14.58984,9,14.58984,3.089811,juan.suyo@geogpsperu.com,www.geogpsperu.com,931381206,"POLYGON ((-75.992 -8.32108, -75.99015 -8.32171..."


In [3]:
# Algunas modificaciones
transformer = pyproj.Transformer.from_crs('epsg:4326', 'esri:54009', always_xy=True)

# Define a function to apply the transformation
def apply_transform(geom):
    return transform(transformer.transform, geom)

# Apply the transformation to the geometries
gdf['geometry'] = gdf['geometry'].apply(apply_transform)


In [6]:
# Loop

# Definir los archivos raster y los nombres de los DataFrames
rasters = {
        'df10_10':'../../_data/rasters_peru/R10_C10.tif',
        'df10_11':'../../_data/rasters_peru/R10_C11.tif',
        'df10_12':'../../_data/rasters_peru/R10_C12.tif',
        'df11_11':'../../_data/rasters_peru/R11_C11.tif',
        'df11_12':'../../_data/rasters_peru/R11_C12.tif',
        'df12_11':'../../_data/rasters_peru/R12_C11.tif',
        'df12_12':'../../_data/rasters_peru/R12_C12.tif'
}

# Diccionario para almacenar los DataFrames
dfs = {}

# Bucle para procesar cada archivo raster
for df_name, raster_path in rasters.items():
    # Calcular estadísticas zonales
    stats = zonal_stats(gdf, raster_path, stats="unique", categorical=True)
    
    # Crear un DataFrame para las estadísticas
    df = pd.DataFrame(stats)
    
    # Almacenar el DataFrame en el diccionario
    dfs[df_name] = df


In [39]:
## Sumar todos los dfs del diccionario

# Inicializa un DataFrame vacío con el mismo índice que los otros
result = pd.DataFrame(index=dfs[list(dfs.keys())[0]].index)

# Recorre cada DataFrame en el diccionario
for df in dfs.values():
    # Convertir todas las columnas a numéricas, forzando errores en NaN
    df = df.apply(pd.to_numeric, errors='coerce')
    # Sumar al DataFrame resultante
    result = result.add(df, fill_value=0)

# Añadir una columna ID con valores del 0 al número de filas - 1
result['ID'] = range(len(result))

if 'Source' in result.columns:
    result = result.drop(columns=['Source'])

result

Unnamed: 0,0,1,2,3,4,5,11,12,13,14,15,21,22,23,24,25,ID,unique
0,394534800.0,43674.0,89873.0,247517.0,346.0,25140.0,298134.0,15559.0,47990.0,1332.0,,114.0,87.0,438.0,36.0,,0,14.0
1,358681700.0,406426.0,201920.0,643834.0,1497.0,60090.0,882127.0,69670.0,224621.0,9348.0,353.0,2051.0,776.0,4338.0,236.0,,1,22.0
2,211050500.0,65430.0,128728.0,429495.0,416.0,40433.0,489258.0,33874.0,59269.0,5800.0,,1127.0,36.0,83.0,25.0,,2,14.0
3,632849300.0,889738.0,221526.0,206575.0,1012.0,79594.0,580927.0,112331.0,424801.0,19926.0,,1755.0,843.0,11681.0,598.0,,3,50.0
4,436164300.0,168877.0,226253.0,476678.0,541.0,48624.0,628170.0,36632.0,133719.0,15510.0,,148.0,22.0,781.0,17.0,,4,14.0
5,325503900.0,179202.0,420575.0,1956275.0,751.0,64471.0,2185768.0,64082.0,155690.0,6771.0,,1923.0,455.0,2004.0,67.0,,5,14.0
6,598760.0,366462.0,8457.0,926.0,464.0,47083.0,19657.0,28974.0,288275.0,23249.0,,137.0,1636.0,32965.0,6090.0,,6,14.0
7,719392500.0,259384.0,531592.0,1433373.0,2374.0,132449.0,1653050.0,93293.0,247894.0,99517.0,962.0,1863.0,62.0,1721.0,523.0,,7,28.0
8,220981500.0,50146.0,137166.0,381259.0,286.0,33281.0,499974.0,9416.0,23065.0,3840.0,,219.0,28.0,69.0,,,8,13.0
9,375833400.0,109302.0,196093.0,626233.0,1461.0,54833.0,751489.0,41130.0,90129.0,11663.0,,313.0,,238.0,356.0,,9,13.0


In [40]:
## Apilar los dfs

# Función para agregar columna ID y columna Source
def add_identifiers(df, source_name):
    df['ID'] = range(len(df))  # Añadir columna de IDs únicos
    df['Source'] = source_name  # Añadir columna de origen
    return df

# Aplicar la función a cada DataFrame en el diccionario
dfs_id = [add_identifiers(df, key) for key, df in dfs.items()]

# Concatenar todos los DataFrames con las columnas adicionales
combined_df = pd.concat(dfs_id, ignore_index=True)
combined_df


Unnamed: 0,unique,0,1,2,3,4,5,11,12,13,14,21,22,23,24,ID,Source,15,25
0,,,,,,,,,,,,,,,,0,df10_10,,
1,,,,,,,,,,,,,,,,1,df10_10,,
2,,,,,,,,,,,,,,,,2,df10_10,,
3,,,,,,,,,,,,,,,,3,df10_10,,
4,,,,,,,,,,,,,,,,4,df10_10,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,10.0,108725542.0,26709.0,102928.0,169676.0,28.0,6807.0,259150.0,4320.0,5421.0,,202.0,,,,20,df12_12,,
171,,,,,,,,,,,,,,,,21,df12_12,,
172,13.0,160221720.0,283916.0,24877.0,8511.0,125.0,23371.0,125069.0,27922.0,118372.0,3077.0,552.0,220.0,1269.0,,22,df12_12,,
173,,,,,,,,,,,,,,,,23,df12_12,,


In [46]:
# Importar archivo JSON
json_file_path = '../Assignment_8/apilado.json'

# Cargar JSON directamente en un DataFrame
prueba = pd.read_json(json_file_path, lines=True)

prueba


Unnamed: 0,unique,0,1,2,3,4,5,11,12,13,14,21,22,23,24,ID,Source,15,25
0,,,,,,,,,,,,,,,,0,df10_10,,
1,,,,,,,,,,,,,,,,1,df10_10,,
2,,,,,,,,,,,,,,,,2,df10_10,,
3,,,,,,,,,,,,,,,,3,df10_10,,
4,,,,,,,,,,,,,,,,4,df10_10,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,10.0,108725542.0,26709.0,102928.0,169676.0,28.0,6807.0,259150.0,4320.0,5421.0,,202.0,,,,20,df12_12,,
171,,,,,,,,,,,,,,,,21,df12_12,,
172,13.0,160221720.0,283916.0,24877.0,8511.0,125.0,23371.0,125069.0,27922.0,118372.0,3077.0,552.0,220.0,1269.0,,22,df12_12,,
173,,,,,,,,,,,,,,,,23,df12_12,,


In [None]:
# Crear una lista para almacenar los porcentajes de cobertura por variable
coverage_percentages = []

# Abrir el mosaico ráster
with rasterio.open(mosaic_raster_path) as src:
    for i in range(1, src.count + 1):  # Para cada banda del ráster
        band_coverage = []

        # Para cada polígono de departamento, calcular el porcentaje de área cubierta
        for _, department in gdf_departments.iterrows():
            # Extraer el polígono del departamento
            geometry = [department['geometry']]

            # Recortar el ráster al área del departamento
            out_image, out_transform = mask(src, geometry, crop=True, indexes=i)
            out_image = out_image[0]  # Extraer solo la banda

            # Calcular el porcentaje de píxeles no nulos
            total_pixels = out_image.size
            covered_pixels = np.count_nonzero(out_image)

            # Calcular el porcentaje de cobertura
            percentage_coverage = (covered_pixels / total_pixels) * 100
            band_coverage.append(percentage_coverage)

        # Añadir los resultados a la lista
        coverage_percentages.append(band_coverage)

# Convertir la lista de porcentajes a columnas en el GeoDataFrame
for i in range(len(coverage_percentages)):
    gdf_departments[f'Variable_{i+1}'] = coverage_percentages[i]

# 4. Crear el mapa coroplético usando Folium
center_lat, center_lon = gdf_departments.geometry.centroid.y.mean(), gdf_departments.geometry.centroid.x.mean()
folium_map = folium.Map(location=[center_lat, center_lon], zoom_start=6)

# Añadir las capas de choropleth al mapa para cada variable
for i in range(15):  # Asumimos que el ráster tiene 15 bandas
    folium.Choropleth(
        geo_data=gdf_departments.to_json(),
        name=f'Variable {i+1}',
        data=gdf_departments,
        columns=[gdf_departments.index, f'Variable_{i+1}'],
        key_on='feature.id',
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=f'Variable {i+1} (%)',
    ).add_to(folium_map)

# Añadir el control de capas
folium.LayerControl().add_to(folium_map)

# 5. Guardar el mapa en un archivo HTML
html_output_path = 'nombre_de_tu_rama.html'
folium_map.save(html_output_path)

print(f'Mapa coroplético guardado en {html_output_path}')