# import

In [1]:
import pandas as pd
import openpyxl
import tensorflow as tf
import numpy as np
import os, sys
from memory_profiler import profile

2024-11-14 14:06:44.771540: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-14 14:06:44.771565: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-14 14:06:44.772270: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-14 14:06:44.777334: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
sys.path.append('/media/jczars/4C22F02A22F01B22/Pollen_classification_view/')
print(sys.path)
from models.models_train import run_train
from models.models_pre import  hyper_model_up
from models.get_data import reload_data_train, splitData, load_data_labels  
from models import utils
from models import maneger_gpu

['/media/jczars/4C22F02A22F01B22/Pollen_classification_view/0_pseudo_labels', '/home/jczars/anaconda3/envs/tf/lib/python310.zip', '/home/jczars/anaconda3/envs/tf/lib/python3.10', '/home/jczars/anaconda3/envs/tf/lib/python3.10/lib-dynload', '', '/home/jczars/.local/lib/python3.10/site-packages', '/home/jczars/anaconda3/envs/tf/lib/python3.10/site-packages', '/media/jczars/4C22F02A22F01B22/Pollen_classification_view/']



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [3]:
working_dir="/media/jczars/4C22F02A22F01B22/Pollen_classification_view/"
os.chdir(working_dir)

# functions

## prepare_data

In [4]:
@profile
def prepare_data(conf, root_path):
    """
    Prepares the environment for the pollen classification experiment.

    Parameters:
        conf (dict): Configuration dictionary containing:
            - id_test (str): Test identifier.
            - model (str): Model name to be used.
            - aug (str): Data augmentation method.
            - base (str): Base folder name of the dataset.
            - path_base (str): Path to the base dataset folder.
        root_path (str): Path to the root directory for results.

    Returns:
        dict: Dictionary with paths and experiment information.
    """
    # Destructuring the configuration dictionary for clarity
    id_test = int(conf['id_test'])
    model = conf['model']
    aug = conf['aug']
    base = conf['base']
    base_path = conf['path_base']

    # Label directory path
    labels_dir = os.path.join(base_path, "labels")
    categories = sorted(os.listdir(labels_dir))

    # Experiment name and path
    experiment_name = f"{id_test}_{model}_{aug}_{base}"
    experiment_path = os.path.join(root_path, experiment_name)
    pseudo_csv_dir = os.path.join(experiment_path, 'pseudo_csv')

    # Creating the necessary folders in a more concise way
    for directory in [root_path, experiment_path, pseudo_csv_dir]:
        utils.create_folders(directory, flag=0)

    print(f"Training save directory: {experiment_path}, ID: {experiment_name}")
    
    # Path to CSV file
    csv_file_path = os.path.join(base_path, f"{base}.csv")
    print('CSV data path:', csv_file_path)
    
    # Creating the labeled dataset
    labeled_data = utils.create_dataSet(labels_dir, csv_file_path, categories)
    
    num_labels = len(labeled_data)
    print('Total labeled data count:', num_labels)
    
    # Splitting data into training, validation and testing
    train_path, val_path, test_path = splitData(labeled_data, root_path, base)

    print("Finished preparing data")

    return {
        'path_train': train_path,
        'path_val': val_path,
        'path_test': test_path,
        'save_dir_train': experiment_path,
        'pseudo_csv': pseudo_csv_dir,
        'size_of_labels': num_labels,
        'categories': categories
    }

## build_train_config

In [None]:
@profile
def build_train_config(row, res_pre, time_step):
    """Build a configuration dictionary for model training.

    Parameters
    ----------
    row : pandas.Series
        A row from the configuration DataFrame, containing model parameters.
    res_pre : dict
        A dictionary containing the result of the previous step, including labeled data paths.
    iteration_num : int
        The current iteration number in the training loop.

    Returns
    -------
    config : dict
        A dictionary containing the configuration for model training.
    """
    # Extract directories from res_pre
    save_dir_train = res_pre["save_dir_train"]
    test_path = res_pre["path_test"]
    categories = res_pre["categories"]

    # Path where the models will be saved
    save_dir = os.path.join(save_dir_train, "models")

    # Create the training configuration dictionary

    config = {
        "model": row["model"],
        "id_test": row["id_test"],
        "data_path": row["path_base"],
        "test_path": test_path,
        "batch_size": row["batch_size"],
        "img_size": row["img_size"],
        "num_classes": len(categories),
        "split_valid": row["split_valid"],
        "last_activation": row["last_activation"],
        "save_dir": save_dir,
        "learning_rate": row["learning_rate"],
        "optimizer": row["optimizer"],
        "epochs": row["epochs"],
        "freeze": row["freeze"],
        "time_step": time_step,
    }

    return config

## train_model

In [5]:
%load_ext memory_profiler
def train_model(config, train_data, val_data, time_step):
    """
    Train a model with the given configuration and data. If time_step > 0, it will load a pre-trained model
    and continue training. Otherwise, it will train a new model from scratch.

    Parameters
    ----------
    config : dict
        A dictionary containing the class_class_class_class_class_class_class_configuration for model training.
    train_data : tuple
        A tuple containing the training data and labels.
    val_data : tuple
        A tuple containing the validation data and labels.
    time_step : int
        The current training step. If time_step > 0, it will load a model from the previous step.

    Returns
    -------
    model_inst : keras.Model
        The trained or reloaded model instance.
    res_train : dict
        A dictionary containing the training history and metrics.
    """
    print('\n[INFO]--> time_step ', time_step)
    
    # Reset model_inst to ensure it starts fresh for each time step
    model_inst = None
    
    # If time_step > 0, try to load the model from a previous step, otherwise train a new model
    if time_step > 0:
        # Build the model path for the previous step
        model_name = f"{config['id_test']}_{config['model']}_bestLoss_{time_step - 1}.keras"
        save_path = os.path.join(config['save_dir'], model_name)
        
        # Load the model from the previous time step
        if os.path.exists(save_path):
            print(f"[INFO]--> Loading model from {save_path}")
            model_inst = tf.keras.models.load_model(save_path)
            
            # Explicitly freeze the layers again
            for layer in model_inst.layers:
                layer.trainable = False  # Freeze all layers
            
            # Optionally unfreeze the last few layers if needed (based on config['freeze'])
            for i, layer in enumerate(model_inst.layers):
                if i >= config['freeze']:
                    layer.trainable = True  # Unfreeze layers after the specified freeze index
            
            print(f"[INFO]--> Model layers frozen up to layer {config['freeze']}")
        else:
            raise ValueError(f"[ERROR]--> Model from time_step {time_step - 1} not found at {save_path}")
    
    # else:
    #     # Instantiate the model from scratch for time_step == 0
    #     print("[INFO]--> Training a new model from scratch...")
    #     model_inst = models_pre.hyper_model_up(config, verbose=1)
    
    # Train the model with the training and validation data
    res_train = run_train(train_data, val_data, model_inst, config)
    
    # Save the model at the current time step
    model_name = f"{config['id_test']}_{config['model']}_bestLoss_{time_step}.keras"
    save_path = os.path.join(config['save_dir'], model_name)
    
    # Save the trained model
    model_inst.save(save_path)
    print(f"[INFO]--> Model saved at {save_path}")
    
    return model_inst, res_train


## select

In [6]:



def select(conf, data_uns_ini, _tempo):
    """
    Selects pseudo-labels from classified unlabeled data.

    Steps performed:
    1. Rename paths in the classified data.
    2. Filter the classified data by confidence level.
    3. Select pseudo-labels and exclude them from the original unlabeled dataset.
    4. Combine the previous training set with the selected pseudo-labels.
    5. Save the new training and unlabeled datasets.

    Parameters:
        conf (dict): Configuration settings for selection.
        data_uns_ini (DataFrame): Initial classified unlabeled data.
        _pseudo_csv (str): Path to save pseudo-labels.
        _tempo (int): Current iteration time.
        train_data_csv (DataFrame): Previous training dataset.
        limiar (float): Confidence threshold for selection.

    Returns:
        dict: Sizes of datasets and paths of new training set, or False if no labels selected.
    """
    
    # Step 1: Rename paths
    
    utils.renomear_path(conf, data_uns_ini)
    print("Initial Data Preview:", data_uns_ini.head())

    # Step 2: Filter by confidence level
    data_uns_fil = data_uns_ini[data_uns_ini['confidence'] > conf['limiar']]
    print(f'Filtered data size: {len(data_uns_fil)}')

    if data_uns_fil.empty:
        print('No pseudo-labels passed the confidence filter.')
        return False

    # Step 3: Exclude selected labels from the unlabeled dataset
    data_uns_ini = data_uns_ini[~data_uns_ini['file'].isin(data_uns_fil['file'])]
    print(f'Remaining unlabeled data size: {len(data_uns_ini)}')

    # Save the remaining unlabeled data
    tempo_px = _tempo + 1
    _csv_unlabels_t = os.path.join(conf['pseudo_csv'], f'unlabelSet_T{tempo_px}.csv')
    print(f'Saving remaining unlabeled data to {_csv_unlabels_t}')
    data_uns_ini.to_csv(_csv_unlabels_t, index=False)

    # Step 4: Combine with previous training set
    
    train_data_csv = pd.read_csv(conf['path_train'])
    
    if _tempo == 0:
        New_train_data = pd.concat([train_data_csv, data_uns_fil], ignore_index=True)
    else:
        previous_train_path = os.path.join(conf['pseudo_csv'], f'trainSet_T{_tempo}.csv')
        train_data_csv = pd.read_csv(previous_train_path)
        New_train_data = pd.concat([train_data_csv, data_uns_fil], ignore_index=True)

    # Save the new training set
    _csv_New_TrainSet = os.path.join(conf['pseudo_csv'], f'trainSet_T{tempo_px}.csv')
    print(f'Saving new training set to {_csv_New_TrainSet}')
    New_train_data.to_csv(_csv_New_TrainSet, index=False)

    # Return summary of selections and data sizes
    return {
        'ini': len(data_uns_ini) + len(data_uns_fil),
        'select': len(data_uns_fil),
        'rest': len(data_uns_ini),
        'train': len(train_data_csv),
        'new_train': len(New_train_data),
        '_csv_New_TrainSet': _csv_New_TrainSet
    }


## selection

In [7]:

def selection(pseudos_df, conf, _tempo, verbose=0):
    """
    Performs selection of pseudo-labels for training if unlabeled data is available.

    Steps:
    1. Checks if there is any unlabeled data.
    2. Calls the `selec` function to select pseudo-labels based on a confidence threshold.
    3. Returns a dictionary with paths and sizes of datasets if selection is successful.
    4. Returns None if no selection could be made or if there is no unlabeled data.

    Parameters:
        pseudos_df (DataFrame): Unlabeled data to be processed.
        conf (dict): Configuration dictionary with paths and threshold settings.
        res_pre (dict): Contains the path for saving pseudo-labels.
        _tempo (int): Current time or iteration index.
        training_data (Any): Training data used for comparison or updating with pseudo-labels.
        verbose (int, optional): Verbosity level for printing messages. Default is 0 (no output).

    Returns:
        dict or None: Returns a dictionary with new data paths and dataset sizes if successful; 
                      returns None if no selection was made or no unlabeled data.
    """
    if not pseudos_df.empty:
        if verbose > 0:
            print('\n[STEP 2].4 - Selection')

        # Perform pseudo-label selection
        res_sel = select(
            conf,
            pseudos_df,
            _tempo, 
        )

        if res_sel:
            # Return paths and dataset sizes for further processing
            return {
                'path_test': conf['path_test'],
                'save_dir_train': conf.get('path_model', ''),  # Assuming model save path in config
                'pseudo_csv': conf['pseudo_csv'],
                '_csv_New_TrainSet': res_sel['_csv_New_TrainSet'],
                'ini': res_sel['ini'],
                'select': res_sel['select'],
                'rest': res_sel['rest'],
                'train': res_sel['train'],
                'new_train': res_sel['new_train']
            }
        else:
            # No valid pseudo-labels selected
            if verbose > 0:
                print("[INFO] No valid pseudo-labels were selected.")
            return None
    else:
        # No unlabeled data to process
        if verbose > 0:
            print("[INFO] No unlabeled data available for processing.")
        return None

## classification

In [8]:
from models.get_data import load_unlabels, load_data_test
from models.reports_build import predict_unlabeled_data

def classification(class_config, model, _tempo, verbose=0):
    """
    Classifies unlabeled images and generates pseudo-labels.

    Parameters
    ----------
    class_config : dict
        Configuration dictionary containing:
            - 'path_base' (str): Base directory containing the images.
            - 'batch_size' (int): Batch size for data loading.
            - 'img_size' (tuple): Image size as (height, width).
    res_pre : dict
        Dictionary containing previous results and settings, including:
            - 'categories' (list): List of class categories.
            - 'pseudo_csv' (str): Directory path to save/load pseudo-labeled CSV files.
    model : torch.nn.Module
        The trained model used for making predictions.
    _tempo : int
        Current iteration of the pseudo-labeling process.
    verbose : int, optional
        Verbosity level (default is 0, no output). 
        Use higher values for more detailed output.

    Returns
    -------
    pd.DataFrame or None
        DataFrame containing pseudo-label predictions if successful, or None if no data is available.

    Notes
    -----
    The function handles two scenarios:
        1. If `_tempo` is 0, it loads unlabeled images directly from the specified directory.
        2. If `_tempo` > 0, it loads pseudo-labels from a previously saved CSV file.
    
    Any errors encountered during file loading or data processing will be handled gracefully.
    """
    
    # Extract parameters from config and res_pre dictionaries
    unlabels_path = os.path.join(class_config['path_base'], 'images_unlabels')
    batch_size = class_config['batch_size']
    img_size = class_config['img_size']
    categories = class_config['categories']
    pseudo_csv_dir = class_config['pseudo_csv']

    # Parameters for loading data
    params = {
        'unlabels': unlabels_path,
        'img_size': img_size,
        'batch_size': batch_size,
        'categories': categories
    }

    # Load unlabeled data or read CSV with previous pseudo-labels
    if _tempo == 0:
        if verbose:
            print(f"[INFO] Loading unlabeled images from: {unlabels_path}")
        unlabels_generator = load_unlabels(params)
    else:
        # Construct the path for the CSV file containing pseudo-labels
        unlabels_csv_path = os.path.join(pseudo_csv_dir, f'unlabelSet_T{_tempo}.csv')
        if verbose:
            print(f"[INFO] Loading pseudo-labels from CSV: {unlabels_csv_path}")
        
        # Attempt to read the CSV with error handling
        try:
            df_unlabels = pd.read_csv(unlabels_csv_path)
            if df_unlabels.empty:
                if verbose:
                    print(f"[WARNING] No data found in CSV {unlabels_csv_path}")
                return None
            if verbose:
                print("[DEBUG] Head of the pseudo-labels DataFrame:")
                print(df_unlabels.head())
            
            # Load data generator from the DataFrame
            unlabels_generator = load_data_test(df_unlabels, input_size=(img_size, img_size))
        except FileNotFoundError:
            if verbose:
                print(f"[ERROR] File not found: {unlabels_csv_path}")
            return None
        except pd.errors.EmptyDataError:
            if verbose:
                print(f"[ERROR] Empty data in CSV file: {unlabels_csv_path}")
            return None

    # Perform predictions to generate pseudo-labels
    if verbose:
        print("[INFO] Performing pseudo-labeling on the unlabeled dataset")
    
    pseudos_df = predict_unlabeled_data(
        unlabels_generator, model, batch_size, categories, verbose=verbose
    )
    
    if verbose:
        print(f"[INFO] Total pseudo-labels generated: {len(pseudos_df)}")

    return pseudos_df


## Reports

In [9]:
@profile
def build_reports_config(time_step, config, res_pre, model_inst, res_train, verbose=0):
    """
    Generates evaluation reports for a model based on given configurations, test data, and training results.

    Parameters:
    - time_step (int/float): The time step or timestamp associated with the evaluation.
    - config (dict): A dictionary containing model and report configuration parameters, such as image size and batch size.
    - res_pre (dict): Contains preprocessing results, including the test data path and category information.
    - model_inst: The trained model instance.
    - res_train (dict): Contains training results, including training history.
    - verbose (int, optional): Level of verbosity for printing messages. Default is 0 (no output).

    Returns:
    - report_metrics (Any): Generated metrics from the report generation process.

    Function Workflow:
    1. Conditionally prints log messages indicating the start of report generation based on verbosity level.
    2. Loads test data from a specified path.
    3. Prepares the input size for data processing.
    4. Loads test data for evaluation.
    5. Creates necessary directories for report storage.
    6. Configures and generates reports using provided data and model.
    """
    if verbose > 0:
        print('\nReports Generation')
        print(f'\n[INFO]--> Step 1.4 - Evaluation Time Step: {time_step}')
    
    # Load test data
    test_data = pd.read_csv(res_pre['path_test'])
    print("\n[INFO]--> res_pre['path_test']", res_pre['path_test'])
    print('\n[INFO]--> test_data.head()', test_data.head())

    img_size = config['img_size']
    input_size = (img_size, img_size)
    
    if verbose > 0:
        print(f'\n[INFO]--> Input size: {input_size}')
    
    # Load processed test data
    test = load_data_test(test_data, input_size)
    categories = res_pre['categories']
    
    # Create report saving directory
    save_dir = os.path.join(res_pre['save_dir'], 'reports')
    utils.create_folders(save_dir, 0)
    
    # Configure report generation settings
    reports_config = {
        'save_dir': save_dir,
        'time': time_step,
        'batch_size': config['batch_size'],
        'id_test': config['id_test'],
        'model': config['model']
    }
    
    # Generate reports
    history = res_train['history']
    report_metrics = reports_build.reports_gen(test, model_inst, categories, history, reports_config)
    
    return report_metrics

In [10]:
@profile
def rel_data(time_step, report_metrics, res_train, res_sel, workbook_path, config_index, verbose=0):
    """
    Saves data into an Excel workbook for reporting purposes.

    Parameters:
        time_step (str/int): Current time step or identifier for data logging.
        report_metrics (dict): Metrics from the report generation process.
        res_train (dict): Training result data, including timing and accuracy metrics.
        res_sel (dict): Selection result data, containing training set sizes and other statistics.
        workbook_path (str): Path to the Excel workbook.
        config_index (str/int): Identifier for the configuration used.
        verbose (int, optional): Verbosity level for printing messages. Default is 0 (no output).
    """
    if verbose > 0:
        print("\n[INFO] Workbook name:", workbook_path)
    
    try:
        workbook = openpyxl.load_workbook(workbook_path)
        if verbose > 0:
            print("Sheets in workbook:", workbook.sheetnames)
    except FileNotFoundError:
        if verbose > 0:
            print("[ERROR] Workbook not found, creating a new one.")
        workbook = openpyxl.Workbook()

    sheet_name = 'Table'
    
    # Check if the sheet already exists
    if sheet_name in workbook.sheetnames:
        if verbose > 0:
            print(f'Sheet "{sheet_name}" exists.')
        Met_page = workbook[sheet_name]  # Access the existing sheet
    else:
        if verbose > 0:
            print(f'Creating new sheet: "{sheet_name}".')
        Met_page = workbook.create_sheet(sheet_name)  # Create a new sheet
        if verbose > 0:
            print('[INFO] -rel_data- Saving test header.')
        cols_exe = ['Tempo', 'test_loss', 'test_accuracy', 'precision', 'recall', 'fscore', 
                    'kappa', 'str_time', 'end_time', 'delay', 'best_epoch', 
                    'ini', 'select', 'rest', 'train', 'new_train', 'id_test']
        Met_page.append(cols_exe)  # Add header row with column names
    
    # Append data to the sheet
    data = [
        str(time_step),
        report_metrics.get('test_loss', ''),
        report_metrics.get('test_accuracy', ''),
        report_metrics.get('precision', ''),
        report_metrics.get('recall', ''),
        report_metrics.get('fscore', ''),
        report_metrics.get('kappa', ''),
        res_train.get('start_time', ''),
        res_train.get('end_time', ''),
        res_train.get('duration', ''),
        res_train.get('best_epoch', ''),
        res_sel.get('ini', ''),
        res_sel.get('select', ''),
        res_sel.get('rest', ''),
        res_sel.get('train', ''),
        res_sel.get('new_train', ''),
        config_index
    ]
    Met_page.append(data)
    
    # Save the workbook
    workbook.save(workbook_path)
    
    if verbose > 0:
        print("Data saved successfully. Sheets available:", workbook.sheetnames)

In [11]:
@profile
def rel_data(time_step, report_metrics, res_train, res_sel, workbook_path, config_index, verbose=0):
    """
    Saves data into an Excel workbook for reporting purposes.

    Parameters:
        time_step (str/int): Current time step or identifier for data logging.
        report_metrics (dict): Metrics from the report generation process.
        res_train (dict): Training result data, including timing and accuracy metrics.
        res_sel (dict): Selection result data, containing training set sizes and other statistics.
        workbook_path (str): Path to the Excel workbook.
        config_index (str/int): Identifier for the configuration used.
        verbose (int, optional): Verbosity level for printing messages. Default is 0 (no output).
    """
    if verbose > 0:
        print("\n[INFO] Workbook name:", workbook_path)
    
    try:
        workbook = openpyxl.load_workbook(workbook_path)
        if verbose > 0:
            print("Sheets in workbook:", workbook.sheetnames)
    except FileNotFoundError:
        if verbose > 0:
            print("[ERROR] Workbook not found, creating a new one.")
        workbook = openpyxl.Workbook()

    sheet_name = 'Table'
    
    # Check if the sheet already exists
    if sheet_name in workbook.sheetnames:
        if verbose > 0:
            print(f'Sheet "{sheet_name}" exists.')
        Met_page = workbook[sheet_name]  # Access the existing sheet
    else:
        if verbose > 0:
            print(f'Creating new sheet: "{sheet_name}".')
        Met_page = workbook.create_sheet(sheet_name)  # Create a new sheet
        if verbose > 0:
            print('[INFO] -rel_data- Saving test header.')
        cols_exe = ['Tempo', 'test_loss', 'test_accuracy', 'precision', 'recall', 'fscore', 
                    'kappa', 'str_time', 'end_time', 'delay', 'best_epoch', 
                    'ini', 'select', 'rest', 'train', 'new_train', 'id_test']
        Met_page.append(cols_exe)  # Add header row with column names
    
    # Append data to the sheet
    data = [
        str(time_step),
        report_metrics.get('test_loss', ''),
        report_metrics.get('test_accuracy', ''),
        report_metrics.get('precision', ''),
        report_metrics.get('recall', ''),
        report_metrics.get('fscore', ''),
        report_metrics.get('kappa', ''),
        res_train.get('start_time', ''),
        res_train.get('end_time', ''),
        res_train.get('duration', ''),
        res_train.get('best_epoch', ''),
        res_sel.get('ini', ''),
        res_sel.get('select', ''),
        res_sel.get('rest', ''),
        res_sel.get('train', ''),
        res_sel.get('new_train', ''),
        config_index
    ]
    Met_page.append(data)
    
    # Save the workbook
    workbook.save(workbook_path)
    
    if verbose > 0:
        print("Data saved successfully. Sheets available:", workbook.sheetnames)

In [12]:
%load_ext memory_profiler
def rec_id(workbook_path, id_test):
    # Load configuration data from 'Sheet'
    config_data = pd.read_excel(workbook_path, sheet_name="Sheet")
    config = config_data.loc[id_test]
    rec_csv = pd.read_excel(workbook_path, sheet_name="Table")
    fil=rec_csv[rec_csv['id_test'] == id_test]
    tempo_px=len(fil)

    return config, tempo_px

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


# Main

In [13]:
def build_class_config(config, model_inst, pseudo_csv, categories, tempo_px):
    print('\n[STEP] Classification phase')

    class_config={'path_base': config['path_base'],
                  'batch_size': config['batch_size'],
                  'img_size': config['img_size'],
                  'categories': categories,
                  'pseudo_csv': pseudo_csv
    }
    return class_config    

In [14]:
def test1(workbook_path, id_test):
    print('\n[STEP] Training phase')
    %memit  config, tempo_px = rec_id(workbook_path, id_test)
    print("\nconfig", config)
    print("\ntempo_px", tempo_px) 

    # Label directory path
    labels_dir = os.path.join(config['path_base'], "labels")
    categories = sorted(os.listdir(labels_dir))

    # recurarar csv_NewtainSet14
    _pseudo_csv=f'{working_dir}/0_pseudo_labels/Reports/{id_test}_{config["model"]}_{config["aug"]}_{config["base"]}/pseudo_csv/'
    _csv_New_TrainSet = os.path.join(_pseudo_csv, f'trainSet_T{tempo_px}.csv')
    print(_csv_New_TrainSet)

    confi_load={
        'aug': config['aug'],
        'img_size': config['img_size'],
    }

    train, val = reload_data_train(confi_load, _csv_New_TrainSet)

    save_dir = f'{working_dir}/0_pseudo_labels/Reports/{id_test}_{config["model"]}_{config["aug"]}_{config["base"]}/models/'
    print(f"save_dir: {save_dir}")

    config_train={
        'id_test': id_test,
        'model': config['model'],
        'save_dir': save_dir,
        'freeze': config['freeze'],
        'batch_size': config['batch_size'],
        'epochs': config['epochs'],
        }

    %memit 
    model_inst, res_train = train_model(config_train, train, val, tempo_px)
    
    class_config=build_class_config(config, model_inst, _pseudo_csv, categories, tempo_px)

    pseudos_df=classification(class_config, model_inst, tempo_px)
    res_sel = selection(pseudos_df, config, tempo_px)

    if res_sel is None:
        if verbose > 0:
            print("[INFO] No more unlabeled data to process.")
        #break

    time_step=tempo_px
    """
    #Test_path
    test_path=f'{working_dir}/0_pseudo_labels/Reports/{config['base']}_testSet.csv'
    report_metrics = build_reports_config(time_step, config, res_pre, model_inst, res_train)

    config_rel={
        'test_path': test_path,
        'categories': categories,
        
    }

    rel_data(time_step, report_metrics, res_train, res_sel, workbook_path, config_index, verbose=0)
    """

## run

In [15]:
def run(workbook_path, start_index, end_index=None, verbose=0):
    if verbose > 0:
        print("\n[INFO] Workbook name:", workbook_path)
    
    workbook = openpyxl.load_workbook(workbook_path)
    if verbose > 0:
        print("Sheets in workbook:", workbook.sheetnames)

    # Set root path
    root_path = os.path.dirname(workbook_path)
    
    # Load configuration data from 'Sheet'
    config_data = pd.read_excel(workbook_path, sheet_name="Sheet")
    
    # Validate start_index
    if start_index >= len(config_data):
        if verbose > 0:
            print(f"[ERROR] start_index ({start_index}) is out of range.")
        return
    if end_index is None:
        end_index = len(config_data) - start_index

    # Iterate over each row of configuration starting from `start_index`
    for row_idx in range(end_index):

        config_index = start_index + row_idx
        config = config_data.loc[config_index]
        if verbose > 0:
            print("Current configuration:", config)

        # Initialize time control and unlabeled data flag
        time_step = 0
        has_unlabeled_data = True
        res_pre = prepare_data(config, root_path)

        conf_load = {
            'path_train': res_pre['path_train'],
            'path_val': res_pre['path_val'],
            'img_size': config['img_size'],
            'aug': config['aug']
        }
        if verbose > 0:
            print("Loading data with config:", conf_load)

        train, val = load_data_labels(conf_load)
        del conf_load

        while has_unlabeled_data:
            maneger_gpu.monitor_memory_and_run()

            if verbose > 0:
                print('\n[STEP] Training phase')
            conf_train = build_train_config(config, res_pre, time_step)
            model_train, res_train = train_model(conf_train, train, val, time_step)
            del conf_train
            maneger_gpu.log_memory_usage('conf_train')

            report_metrics = build_reports_config(time_step, config, res_pre, model_train, res_train)

## main

In [16]:
if __name__ == "__main__":
    workbook_path = "/media/jczars/4C22F02A22F01B22/Pollen_classification_view/0_pseudo_labels/Reports0/config_pseudo_label_pre.xlsx"
    start_index = 5

    #run(workbook_path, id_test)

    res_pre=run(workbook_path, start_index, end_index=1, verbose=0)
     

ERROR: Could not find file /tmp/ipykernel_89055/2659114944.py
folders test already exists:  /media/jczars/4C22F02A22F01B22/Pollen_classification_view/0_pseudo_labels/Reports0
folders test already exists:  /media/jczars/4C22F02A22F01B22/Pollen_classification_view/0_pseudo_labels/Reports0/5_DenseNet201_sem_BI_5
folders test already exists:  /media/jczars/4C22F02A22F01B22/Pollen_classification_view/0_pseudo_labels/Reports0/5_DenseNet201_sem_BI_5/pseudo_csv
Training save directory: /media/jczars/4C22F02A22F01B22/Pollen_classification_view/0_pseudo_labels/Reports0/5_DenseNet201_sem_BI_5, ID: 5_DenseNet201_sem_BI_5
CSV data path: BD/BI_5/BI_5.csv
_path_data  BD/BI_5/labels
_csv_data  BD/BI_5/BI_5.csv
CATEGORIES:  ['equatorial_alongada', 'equatorial_circular', 'equatorial_eliptica', 'polar_circular', 'polar_triangular', 'polar_tricircular']
BD/BI_5/BI_5.csv
                     file
labels                   
equatorial_alongada    70
equatorial_circular   231
equatorial_eliptica    44
polar_c