In [9]:
from pyproj import datadir
datadir.set_data_dir("/home/jupyter-daniela/.conda/envs/peru_environment/share/proj")


In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from pathlib import Path
import numpy as np
import re
import warnings
warnings.filterwarnings("ignore")

class CalasDataVisualizer:
    def __init__(self, ruta_base):
        self.ruta_base = Path(ruta_base)
        self.df = None

    def cargar_datos(self, anio):
        """Carga todos los archivos de calas del año especificado ya sea de primera o segunda temporada."""
        carpeta = self.ruta_base / "ihma_data" / str(anio)
        archivos = list(carpeta.glob("*.csv"))
        archivos_filtrados = [
            a for a in archivos
            if re.search(r'Calas', a.name, re.IGNORECASE)
            and (re.search(r'Primera temporada', a.name, re.IGNORECASE) or re.search(r'Segunda temporada', a.name, re.IGNORECASE))
            and re.search(r'anchoveta', a.name, re.IGNORECASE)
        ]

        print(f"Archivos encontrados para el año {anio}: {[a.name for a in archivos_filtrados]}")

        if not archivos_filtrados:
            raise FileNotFoundError(f"No se encontró archivo con 'Calas' y 'Primera o Segunda temporada' en {carpeta}")
        # Leer y concatenar todos los archivos encontrados
        dfs = [pd.read_csv(a, low_memory=False) for a in archivos_filtrados]
        self.df = pd.concat(dfs, ignore_index=True)

        self.df['fecha_cala'] = pd.to_datetime(self.df['fecha_cala'], format='%d/%m/%Y')
        self.df['declarado_tm'] = pd.to_numeric(self.df['declarado_tm'], errors='coerce')
        self.df['porcentaje_juvenil '] = pd.to_numeric(self.df['porcentaje_juvenil '], errors='coerce')
        self.df['semana'] = self.df['fecha_cala'].dt.isocalendar().week
        return self.df

    def estadisticas_datos(self):
        """Convierte columnas a tipos adecuados para análisis."""
        if not hasattr(self, "df"):
            raise ValueError("Primero debes cargar los datos con cargar_datos()")
        df = self.df
    
        resumen = {
            "n_registros": len(df),
            "fechas": {
                "min": df['fecha_cala'].min(),
                "max": df['fecha_cala'].max()
            },
            "declarado_tm": df['declarado_tm'].describe().to_dict(),
            "porcentaje_juvenil": df['porcentaje_juvenil '].describe().to_dict(),
            "valores_faltantes": df[['fecha_cala', 'declarado_tm', 'porcentaje_juvenil ']].isna().sum().to_dict()
        }
        return resumen

    def visualizar_datos(self, anio):
        """Visualiza los datos de calas sobre el mapa base de países, una figura por temporada."""
        if not hasattr(self, "df"):
            raise ValueError("Primero debes cargar los datos con cargar_datos()")
        df = self.df

        if 'temporada' in df.columns:
            temporadas = df['temporada'].dropna().unique()
            for temporada in temporadas:
                df_temp = df[df['temporada'] == temporada].copy()
                if df_temp.empty:
                    print(f"Sin datos para {temporada} en {anio}")
                    continue
                temp_str = str(temporada).lower()
                if '1ra' in temp_str or 'primera' in temp_str:
                    meses = [4, 5, 6, 7]
                elif '2da' in temp_str or 'segunda' in temp_str:
                    meses = [11, 12, 1, 2]
                else:
                    meses = sorted(df_temp['fecha_cala'].dt.month.unique())
                df_temp['mes'] = df_temp['fecha_cala'].dt.month

                fig, axes = plt.subplots(1, len(meses), figsize=(6*len(meses), 6), sharex=True, sharey=True)
                if len(meses) == 1:
                    axes = [axes]
                boundary = gpd.read_file("/home/jupyter-daniela/suyana/geometries/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp")
                for i, mes in enumerate(meses):
                    ax = axes[i]
                    boundary.plot(ax=ax, facecolor='none', edgecolor='black')
                    df_mes = df_temp[df_temp['mes'] == mes]
                    if not df_mes.empty:
                        scatter = ax.scatter(df_mes['longitud'], df_mes['latitud '],
                                            c=df_mes['declarado_tm'],
                                            cmap='YlOrRd',
                                            alpha=0.6,
                                            s=50,
                                            vmin=0,
                                            vmax=200)
                    ax.set_xlim([-83, -74.5])
                    ax.set_ylim([-16, -4])
                    ax.set_title(f'Mes {mes}', fontsize=12)
                    ax.set_xlabel('Longitud', fontsize=10)
                    ax.set_ylabel('Latitud', fontsize=10)
                fig.suptitle(f'Distribución espacial de Calas - {temporada} {anio}', fontsize=16, x=0.2, y=0.95)
                cax = fig.add_axes([0.92, 0.15, 0.015, 0.7])
                plt.colorbar(scatter, cax=cax, label='Toneladas métricas declaradas')
                fig.savefig(f"/home/jupyter-daniela/suyana/peru_production/displays/calas_distribution_{anio}_{temporada}.png", bbox_inches='tight')
                plt.close()
        else:
            meses = sorted(df['fecha_cala'].dt.month.unique())
            df['mes'] = df['fecha_cala'].dt.month
            fig, axes = plt.subplots(1, len(meses), figsize=(6*len(meses), 8), sharex=True, sharey=True)
            if len(meses) == 1:
                axes = [axes]
            boundary = gpd.read_file("/home/jupyter-daniela/suyana/geometries/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp")
            for i, mes in enumerate(meses):
                ax = axes[i]
                boundary.plot(ax=ax, facecolor='none', edgecolor='black')
                df_mes = df[df['mes'] == mes]
                if not df_mes.empty:
                    scatter = ax.scatter(df_mes['longitud'], df_mes['latitud '],
                                        c=df_mes['declarado_tm'],
                                        cmap='YlOrRd',
                                        alpha=0.6,
                                        s=50,
                                        vmin=0,
                                        vmax=200)
                ax.set_xlim([-83, -74.5])
                ax.set_ylim([-16, -4])
                ax.set_title(f'Mes {mes}', fontsize=12)
                ax.set_xlabel('Longitud', fontsize=10)
                ax.set_ylabel('Latitud', fontsize=10)
            fig.suptitle(f'Distribución espacial de Calas - {anio}', fontsize=16, x=0.2, y=0.95)
            cax = fig.add_axes([0.92, 0.15, 0.015, 0.7])
            plt.colorbar(scatter, cax=cax, label='Toneladas métricas declaradas')

            fig.savefig(f"/home/jupyter-daniela/suyana/peru_production/displays/calas_distribution_{anio}.png", bbox_inches='tight')
            plt.close()

    def grillado_semanal(self, lat_bins, lon_bins):
        """
        Realiza un grillado semanal de la data de calas: suma de lo pescado por celda y semana.
        Devuelve un DataFrame con columnas: ['semana', 'lat_bin', 'lon_bin', 'suma_pescado', 'temporada']
        """

        if not hasattr(self, "df"):
            raise ValueError("Primero debes cargar los datos con cargar_datos()")
        df = self.df.copy()

        df['lat_bin'] = pd.cut(df['latitud '], bins=lat_bins, labels=lat_bins[:-1], include_lowest=True)
        df['lon_bin'] = pd.cut(df['longitud'], bins=lon_bins, labels=lon_bins[:-1], include_lowest=True)

        resultados = []

        for temporada in df['temporada'].dropna().unique():
            df_temporada = df[df['temporada'] == temporada]

            resultado = (
                df_temporada.groupby(['semana', 'lat_bin', 'lon_bin'], dropna=False)['declarado_tm']
                .sum()
                .reset_index()
                .rename(columns={'declarado_tm': 'suma_pescado'})
            )

            resultado['temporada'] = temporada
            resultados.append(resultado)

        grilla = pd.concat(resultados, ignore_index=True)

        
        return grilla

    def visualizar_grilla(self, grilla, temporada_objetivo='1ra 2022', ncols=6):
        """
        Visualiza la grilla semanal de suma_pescado en una matriz de subplots.
        """
    

        grilla_temp = grilla[grilla['temporada'] == temporada_objetivo].copy()
        semanas_grilla = grilla_temp['semana'].dropna().unique()
        semanas_altas = [semana for semana in semanas_grilla if semana >= 42]
        semanas_bajas = [semana for semana in semanas_grilla if semana < 42]
        semanas_grilla = semanas_altas + semanas_bajas
        semanas_grilla = [int(semana) for semana in semanas_grilla]
        print(f'Semanas dentro de la temporada {temporada_objetivo}: {semanas_grilla}')

        boundary = gpd.read_file("/home/jupyter-daniela/suyana/geometries/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp")

        n = len(semanas_grilla)
        nrows = int(np.ceil(n / ncols))

        fig, axes = plt.subplots(nrows, ncols, figsize=(6*ncols, 7*nrows), sharex=True, sharey=True)
        axes = np.array(axes).reshape(nrows, ncols)
        levels = np.linspace(0, 1000, 11)

        for idx, semana in enumerate(semanas_grilla):
            row = idx // ncols
            col = idx % ncols
            ax = axes[row, col]
            boundary.plot(ax=ax, facecolor='none', edgecolor='black')
            datos_semana = grilla_temp[grilla_temp['semana'] == semana]
            tabla = datos_semana.pivot_table(
                index='lat_bin', columns='lon_bin', values='suma_pescado', aggfunc='sum', fill_value=0
            )
            lat_centros = tabla.index.astype(float)
            lon_centros = tabla.columns.astype(float)
            X, Y = np.meshgrid(lon_centros, lat_centros)
            Z = tabla.values
            Z = np.where((Z == 0) | np.isnan(Z), np.nan, Z)

            cf = ax.contourf(X, Y, Z, levels=levels, cmap='viridis', alpha=0.7, extend='max', vmin=0.0001)
            ax.set_xlim([-83, -74.5])
            ax.set_ylim([-16, -4])
            ax.set_title(f'Semana {semana}', fontsize=14)
            ax.set_xlabel('Longitud', fontsize=12)
            ax.set_ylabel('Latitud', fontsize=12)

        for idx in range(n, nrows * ncols):
            row = idx // ncols
            col = idx % ncols
            fig.delaxes(axes[row, col])

        fig.suptitle(f'Toneladas pescadas por semana - Temporada {temporada_objetivo}', fontsize=18, x=0.5, y=0.99)
        cax = fig.add_axes([0.92, 0.15, 0.015, 0.7])
        plt.colorbar(cf, cax=cax, label='Toneladas métricas')

        fig.savefig(f"/home/jupyter-daniela/suyana/peru_production/displays/calas_distribution_{temporada_objetivo}.png", bbox_inches='tight', dpi=400)
        plt.close()


In [11]:
visualizer = CalasDataVisualizer("/home/jupyter-daniela/suyana/peru_production/inputs/")
anios = [year for year in range(2015, 2025)]

for year in anios:
    print(f"Procesando año: {year}")
    try:
        df = visualizer.cargar_datos(year)
        df_estadisticas = visualizer.estadisticas_datos()
        
        visualizer.visualizar_datos(year)
    except FileNotFoundError as e:
        print(e)

Procesando año: 2015
Archivos encontrados para el año 2015: ['Segunda temporada de anchoveta – Calas y biometría.csv']
Procesando año: 2016
Archivos encontrados para el año 2016: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Procesando año: 2017
Archivos encontrados para el año 2017: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Procesando año: 2018
Archivos encontrados para el año 2018: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Procesando año: 2019
Archivos encontrados para el año 2019: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Procesando año: 2020
Archivos encontrados para el año 2020: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.cs

## grilla para temperatura

In [None]:
lat_bins = [
    -3.145833, -3.1875, -3.229167, -3.270833, -3.3125, -3.354167, -3.395833, -3.4375, -3.479167, -3.520833, -3.5625, -3.604167, -3.645833,
    -3.6875, -3.729167, -3.770833, -3.8125, -3.854167, -3.895833, -3.9375, -3.979167, -4.020833, -4.0625, -4.104167, -4.145833, -4.1875, -4.229167,
    -4.270833, -4.3125, -4.354167, -4.395833, -4.4375, -4.479167, -4.520833, -4.5625, -4.604167, -4.645833, -4.6875, -4.729167, -4.770833, -4.8125,
    -4.854167, -4.895833, -4.9375, -4.979167, -5.020833, -5.0625, -5.104167, -5.145833, -5.1875, -5.229167, -5.270833, -5.3125, -5.354167, -5.395833,
    -5.4375, -5.479167, -5.520833, -5.5625, -5.604167, -5.645833, -5.6875, -5.729167, -5.770833, -5.8125, -5.854167, -5.895833, -5.9375, -5.979167,
    -6.020833, -6.0625, -6.104167, -6.145833, -6.1875, -6.229167, -6.270833, -6.3125, -6.354167, -6.395833, -6.4375, -6.479167, -6.520833, -6.5625,
    -6.604167, -6.645833, -6.6875, -6.729167, -6.770833, -6.8125, -6.854167, -6.895833, -6.9375, -6.979167, -7.020833, -7.0625, -7.104167, -7.145833,
    -7.1875, -7.229167, -7.270833, -7.3125, -7.354167, -7.395833, -7.4375, -7.479167, -7.520833, -7.5625, -7.604167, -7.645833, -7.6875, -7.729167,
    -7.770833, -7.8125, -7.854167, -7.895833, -7.9375, -7.979167, -8.020833, -8.0625, -8.104167, -8.145833, -8.1875, -8.229167, -8.270833, -8.3125,
    -8.354167, -8.395833, -8.4375, -8.479167, -8.520834, -8.5625, -8.604167, -8.645834, -8.6875, -8.729167, -8.770834, -8.8125, -8.854167, -8.895834,
    -8.9375, -8.979167, -9.020834, -9.0625, -9.104167, -9.145834, -9.1875, -9.229167, -9.270834, -9.3125, -9.354167, -9.395834, -9.4375, -9.479167,
    -9.520834, -9.5625, -9.604167, -9.645834, -9.6875, -9.729167, -9.770834, -9.8125, -9.854167, -9.895834, -9.9375, -9.979167, -10.02083, -10.0625,
    -10.10417, -10.14583, -10.1875, -10.22917, -10.27083, -10.3125, -10.35417, -10.39583, -10.4375, -10.47917, -10.52083, -10.5625, -10.60417, -10.64583,
    -10.6875, -10.72917, -10.77083, -10.8125, -10.85417, -10.89583, -10.9375, -10.97917, -11.02083, -11.0625, -11.10417, -11.14583, -11.1875, -11.22917,
    -11.27083, -11.3125, -11.35417, -11.39583, -11.4375, -11.47917, -11.52083, -11.5625, -11.60417, -11.64583, -11.6875, -11.72917, -11.77083, -11.8125,
    -11.85417, -11.89583, -11.9375, -11.97917, -12.02083, -12.0625, -12.10417, -12.14583, -12.1875, -12.22917, -12.27083, -12.3125, -12.35417, -12.39583,
    -12.4375, -12.47917, -12.52083, -12.5625, -12.60417, -12.64583, -12.6875, -12.72917, -12.77083, -12.8125, -12.85417, -12.89583, -12.9375, -12.97917,
    -13.02083, -13.0625, -13.10417, -13.14583, -13.1875, -13.22917, -13.27083, -13.3125, -13.35417, -13.39583, -13.4375, -13.47917, -13.52083, -13.5625,
    -13.60417, -13.64583, -13.6875, -13.72917, -13.77083, -13.8125, -13.85417, -13.89583, -13.9375, -13.97917, -14.02083, -14.0625, -14.10417, -14.14583,
    -14.1875, -14.22917, -14.27083, -14.3125, -14.35417, -14.39583, -14.4375, -14.47917, -14.52083, -14.5625, -14.60417, -14.64583, -14.6875, -14.72917,
    -14.77083, -14.8125, -14.85417, -14.89583, -14.9375, -14.97917, -15.02083, -15.0625, -15.10417, -15.14583, -15.1875, -15.22917, -15.27083, -15.3125,
    -15.35417, -15.39583, -15.4375, -15.47917, -15.52083, -15.5625, -15.60417, -15.64583, -15.6875, -15.72917, -15.77083, -15.8125, -15.85417, -15.89583,
    -15.9375, -15.97917, -16.02083, -16.0625, -16.10417, -16.14583, -16.1875, -16.22917, -16.27083, -16.3125, -16.35417, -16.39583, -16.4375, -16.47917,
    -16.52083, -16.5625, -16.60417, -16.64583, -16.6875, -16.72917, -16.77083, -16.8125, -16.85417, -16.89583, -16.9375, -16.97917, -17.02083, -17.0625,
    -17.10417, -17.14583, -17.1875, -17.22917, -17.27083, -17.3125, -17.35417, -17.39583, -17.4375, -17.47917, -17.52083, -17.5625, -17.60417, -17.64583,
    -17.6875, -17.72917, -17.77083, -17.8125, -17.85417, -17.89583, -17.9375, -17.97917, -18.02083, -18.0625, -18.10417, -18.14583, -18.1875, -18.22917,
    -18.27083, -18.3125, -18.35417, -18.39583, -18.4375, -18.47917, -18.52083, -18.5625, -18.60417, -18.64583, -18.6875, -18.72917, -18.77083, -18.8125,
    -18.85417, -18.89583, -18.9375, -18.97917, -19.02083, -19.0625, -19.10417, -19.14583, -19.1875, -19.22917, -19.27083, -19.3125, -19.35417, -19.39583,
    -19.4375, -19.47917, -19.52083, -19.5625, -19.60417
]

lon_bins = [
    -84.60416, -84.5625, -84.52084, -84.47916, -84.4375, -84.39584, -84.35416, -84.3125, -84.27084, -84.22916, -84.1875, -84.14584,
    -84.10416, -84.0625, -84.02084, -83.97916, -83.9375, -83.89584, -83.85416, -83.8125, -83.77084, -83.72916, -83.6875, -83.64584,
    -83.60416, -83.5625, -83.52084, -83.47916, -83.4375, -83.39584, -83.35416, -83.3125, -83.27084, -83.22916, -83.1875, -83.14584,
    -83.10416, -83.0625, -83.02084, -82.97916, -82.9375, -82.89584, -82.85416, -82.8125, -82.77084, -82.72916, -82.6875, -82.64584,
    -82.60416, -82.5625, -82.52084, -82.47916, -82.4375, -82.39584, -82.35416, -82.3125, -82.27084, -82.22916, -82.1875, -82.14584,
    -82.10416, -82.0625, -82.02084, -81.97916, -81.9375, -81.89584, -81.85416, -81.8125, -81.77084, -81.72916, -81.6875, -81.64584,
    -81.60416, -81.5625, -81.52084, -81.47916, -81.4375, -81.39584, -81.35416, -81.3125, -81.27084, -81.22916, -81.1875, -81.14584,
    -81.10416, -81.0625, -81.02084, -80.97916, -80.9375, -80.89584, -80.85416, -80.8125, -80.77084, -80.72916, -80.6875, -80.64584,
    -80.60416, -80.5625, -80.52084, -80.47916, -80.4375, -80.39584, -80.35416, -80.3125, -80.27084, -80.22916, -80.1875, -80.14584,
    -80.10416, -80.0625, -80.02084, -79.97916, -79.9375, -79.89584, -79.85416, -79.8125, -79.77084, -79.72916, -79.6875, -79.64584,
    -79.60416, -79.5625, -79.52084, -79.47916, -79.4375, -79.39584, -79.35416, -79.3125, -79.27084, -79.22916, -79.1875, -79.14584,
    -79.10416, -79.0625, -79.02084, -78.97916, -78.9375, -78.89584, -78.85416, -78.8125, -78.77084, -78.72916, -78.6875, -78.64584,
    -78.60416, -78.5625, -78.52084, -78.47916, -78.4375, -78.39584, -78.35416, -78.3125, -78.27084, -78.22916, -78.1875, -78.14584,
    -78.10416, -78.0625, -78.02084, -77.97916, -77.9375, -77.89584, -77.85416, -77.8125, -77.77084, -77.72916, -77.6875, -77.64584,
    -77.60416, -77.5625, -77.52084, -77.47916, -77.4375, -77.39584, -77.35416, -77.3125, -77.27084, -77.22916, -77.1875, -77.14584,
    -77.10416, -77.0625, -77.02084, -76.97916, -76.9375, -76.89584, -76.85416, -76.8125, -76.77084, -76.72916, -76.6875, -76.64584,
    -76.60416, -76.5625, -76.52084, -76.47916, -76.4375, -76.39584, -76.35416, -76.3125, -76.27084, -76.22916, -76.1875, -76.14584,
    -76.10416, -76.0625, -76.02084, -75.97916, -75.9375, -75.89584, -75.85416, -75.8125, -75.77084, -75.72916, -75.6875, -75.64584,
    -75.60416, -75.5625, -75.52084, -75.47916, -75.4375, -75.39584, -75.35416, -75.3125, -75.27084, -75.22916, -75.1875, -75.14584,
    -75.10416, -75.0625, -75.02084, -74.97916, -74.9375, -74.89584, -74.85416, -74.8125, -74.77084, -74.72916, -74.6875, -74.64584,
    -74.60416, -74.5625, -74.52084, -74.47916, -74.4375, -74.39584, -74.35416, -74.3125, -74.27084, -74.22916, -74.1875, -74.14584,
    -74.10416, -74.0625, -74.02084, -73.97916, -73.9375, -73.89584, -73.85416, -73.8125, -73.77084, -73.72916, -73.6875, -73.64584,
    -73.60416, -73.5625, -73.52084, -73.47916, -73.4375, -73.39584, -73.35416, -73.3125, -73.27084, -73.22916, -73.1875, -73.14584,
    -73.10416, -73.0625, -73.02084, -72.97916, -72.9375, -72.89584, -72.85416, -72.8125, -72.77084, -72.72916, -72.6875, -72.64584,
    -72.60416, -72.5625, -72.52084, -72.47916, -72.4375, -72.39584, -72.35416, -72.3125, -72.27084, -72.22916, -72.1875, -72.14584,
    -72.10416, -72.0625, -72.02084, -71.97916, -71.9375, -71.89584, -71.85416, -71.8125, -71.77084, -71.72916, -71.6875, -71.64584,
    -71.60416, -71.5625, -71.52084, -71.47916, -71.4375, -71.39584, -71.35416, -71.3125, -71.27084, -71.22916, -71.1875, -71.14584,
    -71.10416, -71.0625, -71.02084, -70.97916, -70.9375, -70.89584, -70.85416, -70.8125, -70.77084, -70.72916, -70.6875, -70.64584,
    -70.60416, -70.5625, -70.52084, -70.47916, -70.4375, -70.39584, -70.35416, -70.3125, -70.27084, -70.22916, -70.1875, -70.14584,
    -70.10416, -70.0625, -70.02084, -69.97916, -69.9375, -69.89584, -69.85416, -69.8125, -69.77084, -69.72916, -69.6875, -69.64584
]


In [None]:
lat_bins = sorted(lat_bins)
lon_bins = sorted(lon_bins)

dfs = []
for year in anios:
    try:
        df = visualizer.cargar_datos(year)
        dfs.append(df)
    except FileNotFoundError:
        continue

df_todos = pd.concat(dfs, ignore_index=True)
visualizer.df = df_todos

grilla = visualizer.grillado_semanal(lat_bins, lon_bins)

In [None]:
for year in anios:
    print(f"Procesando año: {year}")
    for temporada in ['1ra', '2da']:
        temporada_objetivo = temporada + f' {year}'
        grilla_temp = grilla[grilla['temporada'] == temporada_objetivo]
        if grilla_temp.empty:
            print(f"Sin datos para la temporada {temporada_objetivo}")
            continue
        try:
            visualizer.visualizar_grilla(grilla, temporada_objetivo=temporada_objetivo)
        except FileNotFoundError as e:
            print(e)


In [None]:
grilla.to_csv("/home/jupyter-daniela/suyana/peru_production/outputs/calas_grillado_semanal.csv", index=False)

In [None]:
grilla.semana.value_counts().sort_index().plot(kind='bar', figsize=(12, 6), color='skyblue')

In [None]:
grilla

In [None]:
import re

grilla_filtrada = grilla[grilla['suma_pescado'] != 0].copy()
grilla_filtrada['anio'] = grilla_filtrada['temporada'].apply(lambda x: int(re.findall(r'\d{4}', x)[0]))

suma_por_temporada = grilla_filtrada.groupby(['anio', 'temporada'])['suma_pescado'].sum().reset_index()

suma_por_temporada = suma_por_temporada.sort_values(['anio', 'temporada'])

import matplotlib.ticker as mticker

ax = suma_por_temporada.set_index('temporada')['suma_pescado'].plot(kind='barh', figsize=(8, 8), color='salmon')
ax.xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x):,}".replace(",", ".")))


In [None]:
df_todos['anio'] = df_todos['temporada'].apply(lambda x: int(re.findall(r'\d{4}', x)[0]))

suma_por_temporada = df_todos.groupby(['anio', 'temporada'])['declarado_tm'].sum().reset_index()

suma_por_temporada = suma_por_temporada.sort_values(['anio', 'temporada'])

ax = suma_por_temporada.set_index('temporada')['declarado_tm'].plot(kind='barh', figsize=(8, 8), color='salmon')


## grilla para SSS


In [3]:
lat_bins = [-15.875, -15.625, -15.375, -15.125, -14.875, -14.625, -14.375, -14.125,
       -13.875, -13.625, -13.375, -13.125, -12.875, -12.625, -12.375, -12.125,
       -11.875, -11.625, -11.375, -11.125, -10.875, -10.625, -10.375, -10.125,
        -9.875,  -9.625,  -9.375,  -9.125,  -8.875,  -8.625,  -8.375,  -8.125,
        -7.875,  -7.625,  -7.375,  -7.125,  -6.875,  -6.625,  -6.375,  -6.125,
        -5.875,  -5.625,  -5.375,  -5.125,  -4.875,  -4.625,  -4.375,  -4.125]

lon_bins = [-82.875, -82.625, -82.375, -82.125, -81.875, -81.625, -81.375, -81.125,
       -80.875, -80.625, -80.375, -80.125, -79.875, -79.625, -79.375, -79.125,
       -78.875, -78.625, -78.375, -78.125, -77.875, -77.625, -77.375, -77.125,
       -76.875, -76.625, -76.375, -76.125, -75.875, -75.625, -75.375, -75.125,
       -74.875, -74.625]


lat_bins = sorted(lat_bins)
lon_bins = sorted(lon_bins)

dfs = []
for year in anios:
    try:
        df = visualizer.cargar_datos(year)
        dfs.append(df)
    except FileNotFoundError:
        continue

df_todos = pd.concat(dfs, ignore_index=True)
visualizer.df = df_todos

grilla = visualizer.grillado_semanal(lat_bins, lon_bins)

grilla.to_csv("/home/jupyter-daniela/suyana/peru_production/outputs/calas_grillado_semanal_grilla_SSS.csv", index=False)


Archivos encontrados para el año 2015: ['Segunda temporada de anchoveta – Calas y biometría.csv']
Archivos encontrados para el año 2016: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Archivos encontrados para el año 2017: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Archivos encontrados para el año 2018: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Archivos encontrados para el año 2019: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Archivos encontrados para el año 2020: ['Segunda temporada de anchoveta – Calas y biometría.csv', 'Primera temporada de anchoveta – Calas y biometría.csv']
Archivos encontrados para el año 2021: ['Primera temporada de pesca de anchoveta – Calas y biometría.csv', 'Segunda tempor

In [8]:

grilla

Unnamed: 0,semana,lat_bin,lon_bin,suma_pescado,temporada
0,1,-15.875,-82.875,0.0,2da 2015
1,1,-15.875,-82.625,0.0,2da 2015
2,1,-15.875,-82.375,0.0,2da 2015
3,1,-15.875,-82.125,0.0,2da 2015
4,1,-15.875,-81.875,0.0,2da 2015
...,...,...,...,...,...
297193,25,-4.375,-75.875,0.0,1ra 2024
297194,25,-4.375,-75.625,0.0,1ra 2024
297195,25,-4.375,-75.375,0.0,1ra 2024
297196,25,-4.375,-75.125,0.0,1ra 2024
