In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
module_path = os.path.abspath(os.path.join('../src')) # or the path to your source code
sys.path.insert(0, module_path)

In [2]:
import pandas as pd
import geopandas as gpd
import json
import re
from time import sleep

In [3]:
# Import geolib
import kilimo_data_lib.geolib as geolib

In [4]:
# Instancia para Irriapp
from kilimo_data_lib.irriapp import build_irriapp_client
irriapp_client = build_irriapp_client()

In [18]:
# Load csv
df = pd.read_csv('subs_plotids.csv')

In [19]:
df.head(3)

Unnamed: 0,plot_id,sub_id,name
0,34980,a2d677ad-702f-4d31-a33e-950b4b033331,34980
1,33537,fcd1ad78-4940-404f-ad66-305d5ffd519f,33537
2,33538,77624dfa-5e55-4210-ab94-58db79755e48,33538


In [20]:
df['plot_id'].nunique()

133

In [None]:
# Dataframe to save images values
df_values = pd.DataFrame(columns=['plot_id', 'sub_id',
                                  'fecha_img_SWC','mean_value'])


path_prefix = 'SWC_images/Soil_Water_Holding_Capacity/SWHC/'

for index, row in df.iterrows():
    
    plot_id = row['plot_id']
    sub_id = row['sub_id']

    print('Processing plot:', plot_id)

    # Get geojson from irriapp
    plot_geojson = irriapp_client.get_plot_geojson(plot_id)
    
    geojson_str = json.dumps(plot_geojson)

    
    for (dirpath, dirnames, filenames) in os.walk(f'./{path_prefix}{sub_id}'):

        for filename in filenames:
            if filename.endswith('swc.tiff'): 
                image_path = os.sep.join([dirpath, filename])

                ano = image_path.split('/')[-4]
                mes = image_path.split('/')[-3]
                dia = image_path.split('/')[-2]
                fecha = f'{ano}-{mes}-{dia}'

                weighted_value = geolib.calculate_weighted_swc_value(image_path, geojson_str)

                if weighted_value is not None:
                    df_aux = pd.DataFrame({'plot_id': plot_id,
                                           'sub_id': sub_id,
                                           'fecha_img_SWC': fecha,
                                           'mean_value': weighted_value}, index=[0])

                    df_values = pd.concat([df_values, df_aux], ignore_index=True)
    
    df_values.to_csv('images_values.csv', index=False)

In [37]:
df_values.to_csv('images_values.csv', index=False)

In [8]:
df_values = pd.read_csv('images_values.csv')

In [9]:
df_values['plot_id'].nunique()

133

In [11]:
# Check if there is big gaps caused by not processed images

df_values['fecha_img_SWC'] = pd.to_datetime(df_values['fecha_img_SWC'], format='%Y-%m-%d')

# Contar valores únicos por plot_id
conteo_unicos = df_values.groupby('plot_id')['fecha_img_SWC'].nunique().reset_index()

# Renombrar columnas para claridad (opcional)
conteo_unicos.columns = ['plot_id', 'cantidad_valores_unicos']

In [13]:
conteo_unicos['cantidad_valores_unicos'].describe()

count     133.000000
mean     1158.864662
std        79.345121
min       893.000000
25%      1180.000000
50%      1193.000000
75%      1195.000000
max      1195.000000
Name: cantidad_valores_unicos, dtype: float64

In [14]:
conteo_unicos = conteo_unicos[conteo_unicos['cantidad_valores_unicos'] < 1100]

In [16]:
# Hay plots que tienen menos imagenes procesadas que el resto, si estan las imág
lista_plots_to_check = conteo_unicos['plot_id'].to_list()

In [17]:
lista_plots_to_check

[25102,
 25103,
 25104,
 25105,
 25106,
 25148,
 25150,
 25151,
 25153,
 25154,
 26707,
 26709,
 26712,
 27826,
 28716,
 28717,
 28718,
 28720,
 33716,
 33717,
 33718]

In [None]:
# Dataframe to save images values
df_values = pd.DataFrame(columns=['plot_id', 'sub_id',
                                  'fecha_img_SWC','mean_value'])


path_prefix = 'SWC_images/Soil_Water_Holding_Capacity/SWHC/'

for index, row in df.iterrows():
    
    plot_id = row['plot_id']
    sub_id = row['sub_id']

    if plot_id not in lista_plots_to_check:
        continue

    print('Processing plot:', plot_id)

    # Get geojson from irriapp
    plot_geojson = irriapp_client.get_plot_geojson(plot_id)
    
    geojson_str = json.dumps(plot_geojson)

    
    for (dirpath, dirnames, filenames) in os.walk(f'./{path_prefix}{sub_id}'):

        for filename in filenames:
            if filename.endswith('swc.tiff'): 
                image_path = os.sep.join([dirpath, filename])

                ano = image_path.split('/')[-4]
                mes = image_path.split('/')[-3]
                dia = image_path.split('/')[-2]
                fecha = f'{ano}-{mes}-{dia}'

                weighted_value = geolib.calculate_weighted_swc_value(image_path, geojson_str)

                if weighted_value is not None:
                    df_aux = pd.DataFrame({'plot_id': plot_id,
                                           'sub_id': sub_id,
                                           'fecha_img_SWC': fecha,
                                           'mean_value': weighted_value}, index=[0])

                    df_values = pd.concat([df_values, df_aux], ignore_index=True)
    
    df_values.to_csv('images_values_2.csv', index=False)

In [25]:
# Contar valores únicos por plot_id
conteo_unicos = df_values.groupby('plot_id')['fecha_img_SWC'].nunique().reset_index()

# Renombrar columnas para claridad (opcional)
conteo_unicos.columns = ['plot_id', 'cantidad_valores_unicos']

In [26]:
conteo_unicos

Unnamed: 0,plot_id,cantidad_valores_unicos
0,25102,1027
1,25103,1026
2,25104,1027
3,25105,1024
4,25106,1021
5,25148,1073
6,25150,1073
7,25151,1073
8,25153,1074
9,25154,1074


### Conclusión, no faltaron imagenes por procesar en el 1er intento. ya que luego de un segundo reprocesamiento la cantidad de imagenes por lote es la misma