In [1]:

import os # Import the os module

# Define the base folder in Google Drive for data and outputs
# This should be the folder containing your CSV data
base_drive_folder = "/kaggle/input/dataset1" # Keep this for data and general outputs
base_drive_folder_output = "/kaggle/working/" # Keep this for data and general outputs

# Define the directory where the trimmed model and tokenizer were saved by vocabtrimmer
# IMPORTANT: Update this path if you saved the trimmed model to a different location
trimmed_model_path = "/kaggle/input/dataset1"
print(f"Attempting to load trimmed model and tokenizer from: {trimmed_model_path}")

# Create the base output folder if it doesn't exist
os.makedirs(base_drive_folder, exist_ok=True)
print(f"Training outputs (checkpoints, logs, results) will be saved to: {base_drive_folder}")

# Step 1: Setup and Installation
# Install necessary libraries
# Removed peft and added adapters
!pip install --upgrade transformers
!pip install --upgrade datasets
!pip install adapters accelerate evaluate rouge_score nltk pandas rawpy Pillow tensorboard

# Print transformers version to verify installation
import transformers
print(f"Transformers version: {transformers.__version__}")

# Note: Meteor also requires Java to be installed on your system.
# If you still get errors for Meteor, you might need to install Java.
# Restart your Colab runtime after installation if prompted.

import json
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
import numpy as np
import gc # Import garbage collector

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq, TrainerCallback
# CORRECTED: Import AdapterConfig only, and initialize adapters for the model
from adapters import AdapterConfig
import adapters # Import the adapters library to patch AutoModel classes

from datasets import load_dataset, DatasetDict, Dataset

import evaluate
import nltk

# Ensure necessary nltk data is downloaded for metrics
nltk.download("punkt", quiet=True)
nltk.download("wordnet", quiet=True)
nltk.download('omw-1.4', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True) # sometimes needed for meteor, but keeping for now as it's small

# Step 2: Load the Trimmed Model and Tokenizer
# Load the model and tokenizer from the directory where vocabtrimmer saved them
try:
    print(f"\nLoading trimmed model from {trimmed_model_path}...")
    model = AutoModelForSeq2SeqLM.from_pretrained(trimmed_model_path)
    print(f"Loading trimmed tokenizer from {trimmed_model_path}...")
    tokenizer = AutoTokenizer.from_pretrained(trimmed_model_path)
    print("Trimmed model and tokenizer loaded successfully!")
    print(f"Loaded model vocabulary size: {model.config.vocab_size}")
    print(f"Loaded tokenizer vocabulary size: {len(tokenizer)}")

    # Initialize adapters for the model AFTER loading it
    # This patches the model to enable adapter functionalities
    adapters.init(model)
    print("Adapters initialized for the trimmed model.")

except Exception as e:
    print(f"Error loading trimmed model or tokenizer from {trimmed_model_path}: {e}")
    print("Please ensure the trimmed model and tokenizer were saved correctly by vocabtrimmer")
    print("and that the 'trimmed_model_path' variable points to the correct directory.")
    print("Exiting script.")
    raise SystemExit("Failed to load trimmed model/tokenizer.")


# Check for CUDA availability and move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model moved to device: {device}")


# Step 3: Load and Preprocess Your Data (Using three CSV files from Google Drive)

# Define the paths to your CSV files within the Google Drive folder
train_csv_path = os.path.join(base_drive_folder, 'amharic_3_train.csv')
valid_csv_path = os.path.join(base_drive_folder, 'amharic_3_valid.csv')
test_csv_path = os.path.join(base_drive_folder, 'amharic_3_test.csv')

# IMPORTANT: Make sure you have uploaded amharic_3_train.csv, amharic_3_valid.csv,
# and amharic_3_test.csv into the specified base_drive_folder in your Google Drive.

# Load each dataset split from its respective CSV file
try:
    # load_dataset returns a DatasetDict, get the 'train' split for each file
    train_dataset = load_dataset('csv', data_files=train_csv_path)['train']
    valid_dataset = load_dataset('csv', data_files=valid_csv_path)['train']
    test_dataset = load_dataset('csv', data_files=test_csv_path)['train']

    # Select only the 'text' and 'summary' columns for each dataset
    train_dataset = train_dataset.select_columns(['text', 'summary'])
    valid_dataset = valid_dataset.select_columns(['text', 'summary'])
    test_dataset = test_dataset.select_columns(['text', 'summary'])

    # Combine into a DatasetDict
    dataset = DatasetDict({
        'train': train_dataset,
        'validation': valid_dataset,
        'test': test_dataset
    })
    print("\nDataset loaded from CSV files.")

except FileNotFoundError:
    print("\nError: One or more of the specified CSV files were not found in Google Drive.")
    print(f"Please make sure amharic_3_train.csv, amharic_3_valid.csv, and amharic_3_test.csv")
    print(f"are uploaded to your Google Drive folder: {base_drive_folder}")
    # Create dummy datasets for demonstration if files not found
    print("Creating dummy datasets for demonstration.")
    dummy_train_data = {"text": ["Train document one for dummy data.", "Train document two for dummy data."], "summary": ["Train sum 1.", "Train sum 2."]}
    dummy_valid_data = {"text": ["Valid document one for dummy data."], "summary": ["Valid sum 1."]}
    dummy_test_data = {"text": ["Test document one for dummy data."], "summary": ["Test sum 1."]}
    dataset = DatasetDict({
        'train': Dataset.from_dict(dummy_train_data),
        'validation': Dataset.from_dict(dummy_valid_data),
        'test': Dataset.from_dict(dummy_test_data)
    })


print("Dataset sizes:")
print(f"Train size: {len(dataset['train'])}")
print(f"Validation size: {len(dataset['validation'])}")
print(f"Test size: {len(dataset['test'])}")

# Define the preprocessing function (DataCollator will handle padding)
# Use the loaded (trimmed) tokenizer
def preprocess_function(examples):
    # Tokenize documents (text column)
    # Adjust max_length based on the typical length of your documents
    model_inputs = tokenizer(
        examples['text'],
        max_length=512, # Max length for inputs
        truncation=True,
        # padding is handled by DataCollatorForSeq2Seq
    )

    # Tokenize summaries (summary column)
    # Adjust max_length based on the typical length of your summaries
    labels = tokenizer(
        examples['summary'],
        max_length=128, # Max length for labels
        truncation=True,
        # padding is handled by DataCollatorForSeq2Seq
    )

    # Assign input_ids of summaries as labels
    model_inputs["labels"] = labels["input_ids"]

    return model_inputs

# Apply the preprocessing function to all splits
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Set the format of the datasets to PyTorch tensors
# Specify which columns should be converted to tensors
tokenized_dataset.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])

# Now, the variables for each split are directly available
tokenized_train_dataset = tokenized_dataset['train']
tokenized_eval_dataset = tokenized_dataset['validation']
tokenized_test_dataset = tokenized_dataset['test']

# Step 4: Configure and Apply Adapters (Replaced LoRA)
print("\nConfiguring and applying Adapters...")
# Define adapter configuration
adapter_name = "amharic_summarization_adapter"
# CORRECTED: Load adapter configuration using AdapterConfig.load()
# "double_seq_bn" is the configuration name for the Houlsby adapter type
adapter_config = AdapterConfig.load("double_seq_bn")

# Add the adapter to the model using the loaded configuration object
model.add_adapter(adapter_name, config=adapter_config)

# Set the active adapter for training
model.set_active_adapters(adapter_name)

# Enable adapter training: This freezes the base model parameters and only trains the adapter layers
model.train_adapter(adapter_name)

# Print trainable parameters (will now show adapter parameters)
print("\nTrainable parameters (Adapters on Trimmed Model):")
#model.print_trainable_parameters()
# Print trainable parameters (Manual Calculation with generator expression)
print("\nTrainable parameters (Adapter baseline):")
# Calculate total trainable parameters using a generator expression
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

# Calculate total parameters (optional, for context)
total_all_params = sum(p.numel() for p in model.parameters())

print(f"Total parameters: {total_all_params}")
print(f"Trainable parameters: {total_trainable_params}")
# Calculate and print the percentage of trainable parameters
if total_all_params > 0:
    print(f"Percentage of trainable parameters: {100 * total_trainable_params / total_all_params:.2f}%")
else:
    print("Percentage of trainable parameters: 0.00%")


# --- Custom Callback for Cache Clearing ---
class ClearCacheCallback(TrainerCallback):
    """
    A TrainerCallback that clears the CUDA cache before evaluation steps.
    Using on_step_end hook to check if evaluation is about to happen.
    Includes gc.collect().
    """
    def on_step_end(self, args, state, control, **kwargs):
        # Check if evaluation is scheduled for the next step
        # This condition is true when state.global_step is a multiple of args.eval_steps
        # and state.global_step > 0
        if args.eval_strategy == "steps" and state.global_step > 0 and args.eval_steps and state.global_step % args.eval_steps == 0:
             if torch.cuda.is_available():
                torch.cuda.empty_cache()
                # Also try garbage collection
                gc.collect()
                print(f"\nCUDA cache cleared and gc.collect() called by callback before evaluation at step {state.global_step}.")
                # Set control.should_evaluate to True to ensure evaluation happens right after this step end
                # This might already be handled by the Trainer, but explicit can help sometimes.
                control.should_evaluate = True

# --- Custom Trainer to filter unexpected arguments ---
class FilteredArgsSeq2SeqTrainer(Seq2SeqTrainer):
    # Override training_step to filter inputs before model call
    # Updated signature to accept num_items_in_batch
    def training_step(self, model, inputs, num_items_in_batch=None):
        # Remove 'num_items_in_batch' if it's present in inputs (belt-and-suspenders approach)
        if 'num_items_in_batch' in inputs:
            del inputs['num_items_in_batch']
        # Also explicitly ignore the positional argument if it was passed
        if num_items_in_batch is not None:
             # You could potentially log a warning here if you want to know this happened
             pass # Simply ignore the argument

        # Call the parent class's training_step with the filtered inputs
        return super().training_step(model, inputs)

    # Override prediction_step to filter inputs before model call
    # Updated signature to accept num_items_in_batch
    def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys=None, num_items_in_batch=None):
         # Remove 'num_items_in_batch' if it's present in inputs (belt-and-suspenders approach)
        if 'num_items_in_batch' in inputs:
            del inputs['num_items_in_batch']
         # Also explicitly ignore the positional argument if it was passed
        if num_items_in_batch is not None:
             # You could potentially log a warning here if you want to know this happened
             pass # Simply ignore the argument

        # Call the parent class's prediction_step with the filtered inputs
        return super().prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)



# Step 5: Define Training Arguments and Trainer (Configured for GPU with Drive Outputs)

# Define the output directory within your Google Drive for training logs and checkpoints
# Using the base_drive_folder for training outputs
training_output_dir = os.path.join(base_drive_folder_output, "trimmed_adapters_checkpoints") # CHANGED output directory name
# Create the directory if it doesn't exist
os.makedirs(training_output_dir, exist_ok=True)
print(f"Training outputs (checkpoints, logs) will be saved to: {training_output_dir}")


# Instantiate Data Collator (Handles padding batches dynamically)
# We pass the tokenizer so it knows the pad token ID
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model) # Pass model (Adapter model on trimmed base) here

# Define training arguments (Revised for GPU with Drive Checkpointing and TensorBoard)
training_args = Seq2SeqTrainingArguments(
    output_dir=training_output_dir, # Output directory for checkpoints and logs (in Drive)
    num_train_epochs=3, # Number of training epochs - Adjust based on convergence
    per_device_train_batch_size=2, # Batch size per device during training
    per_device_eval_batch_size=1, # Evaluation batch size
    gradient_accumulation_steps=2, # Gradient accumulation steps to compensate for smaller batch size
    learning_rate=1e-5, # Keep reduced learning rate for stability
    weight_decay=0.01, # Weight decay - Adjust based on experimentation
    eval_strategy="steps", # Evaluate every N steps to match save_strategy
    eval_steps=500, # Evaluate every 500 training steps (matches save_steps)
    save_strategy="steps", # Save checkpoint every N steps
    save_steps=500, # Save a checkpoint every 500 training steps
    save_total_limit=3, # Optional: Limit the total number of checkpoints to save
    load_best_model_at_end=True, # Load the best model at the end of training based on metric_for_best_model
    metric_for_best_model="rougeL", # Metric to monitor for best model - Change if needed
    greater_is_better=True, # Set to False for metrics like loss
    report_to="tensorboard", # Log metrics to TensorBoard
    push_to_hub=False, # Set to True if you want to push to Hugging Face Hub
    label_names=["labels"], # Explicitly tell the Trainer your label column name
    fp16=False, # Keep mixed precision training disabled for stability
    # bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(), # Use bf16 if GPU supports it
)

# Define compute_metrics function for evaluation
metric_rouge = evaluate.load("rouge")
metric_bleu = evaluate.load("bleu")

def compute_metrics(eval_pred):
    predictions = eval_pred.predictions
    labels = eval_pred.label_ids

    # Debugging prints to inspect types and shapes
    print("\n--- Inside compute_metrics (DEBUG) ---")
    print(f"Type of eval_pred.predictions (initial): {type(eval_pred.predictions)}")
    print(f"Type of eval_pred.label_ids (initial): {type(eval_pred.label_ids)}")

    # CRITICAL FIX: If predictions is a tuple (e.g., logits and hidden states), take the first element (logits)
    if isinstance(predictions, tuple):
        print(f"eval_pred.predictions is a tuple. Length: {len(predictions)}")
        predictions = predictions[0] # Assume the first element is the actual generated sequences (logits)
        print(f"Taking first element of predictions tuple. New type: {type(predictions)}")

    # Ensure predictions and labels are NumPy arrays
    if isinstance(predictions, torch.Tensor):
        predictions = predictions.cpu().numpy()
    if isinstance(labels, torch.Tensor):
        labels = labels.cpu().numpy()

    print(f"Predictions type (after initial processing): {type(predictions)}")
    print(f"Labels type (after initial processing): {type(labels)}")

    # CRITICAL FIX: If predictions are logits (3D array), convert to token IDs using argmax
    if predictions.ndim == 3:
        print(f"WARNING: Predictions has unexpected 3 dimensions. Assuming logits and applying argmax.")
        predictions = np.argmax(predictions, axis=-1) # Convert logits to token IDs
        print(f"Predictions after argmax: New shape: {predictions.shape}, New dtype: {predictions.dtype}")

    # Further check for dimensions: if predictions is 3D (e.g., batch_size, seq_len, 1), squeeze it
    # This handles cases where the model output might have an extra dimension of size 1
    if predictions.ndim == 3 and predictions.shape[-1] == 1:
        predictions = predictions.squeeze(-1)
        print(f"Predictions was 3D with last dim 1, squeezed to 2D. New shape: {predictions.shape}")
    elif predictions.ndim > 2:
        # If it's more than 2D and not just a trailing 1, this is unexpected for token IDs
        print(f"WARNING: Predictions has unexpected {predictions.ndim} dimensions (after argmax check). Expected 2D.")
        predictions = predictions.reshape(predictions.shape[0], -1) # Attempt to flatten if possible
        print(f"Attempted to reshape predictions to 2D. New shape: {predictions.shape}")

    # Ensure predictions are integers (important for tokenizer.batch_decode)
    if predictions.dtype != np.int64 and predictions.dtype != np.int32:
        print(f"WARNING: Predictions dtype is {predictions.dtype}, converting to int64.")
        predictions = predictions.astype(np.int64)
    if labels.dtype != np.int64 and labels.dtype != np.int32:
        print(f"WARNING: Labels dtype is {labels.dtype}, converting to int64.")
        labels = labels.astype(np.int64)

    print(f"Predictions shape (after processing): {predictions.shape}")
    print(f"Labels shape (after processing): {labels.shape}")

    # Replace -100 in labels as they are ignored in loss calculation
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    print("Labels processed (replaced -100).")

    try:
        # Decode predictions and labels, removing special tokens
        decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
        print(f"Successfully decoded {len(decoded_preds)} predictions and {len(decoded_labels)} labels.")
    except Exception as e:
        print(f"ERROR: Failed to decode predictions/labels in compute_metrics: {e}")
        import traceback
        traceback.print_exc()
        raise # Re-raise to crash session with specific error

    # FIX: Replace empty strings with a single character 'a' to prevent ZeroDivisionError in BLEU
    # This is crucial for metrics like BLEU that perform length calculations.
    decoded_preds_for_metrics = [pred if pred else "a" for pred in decoded_preds]
    decoded_labels_for_metrics = [label if label else "a" for label in decoded_labels]
    print("Empty strings in decoded predictions/labels replaced with 'a' for robust metric calculation.")

    # Some metrics expect a list of lists for references
    formated_decoded_labels_for_some_metrics = [[label] for label in decoded_labels_for_metrics] # Use the cleaned lists for metrics
    print("Labels formatted for metrics.")

    # --- Compute Metrics ---
    try:
        # Rouge
        rouge_results = metric_rouge.compute(predictions=decoded_preds_for_metrics, references=decoded_labels_for_metrics)
        print("ROUGE computed.")
    except Exception as e:
        print(f"ERROR: Failed to compute ROUGE in compute_metrics: {e}")
        import traceback
        traceback.print_exc()
        raise # Re-raise

    try:
        # BLEU (requires references as list of lists)
        bleu_results = metric_bleu.compute(predictions=decoded_preds_for_metrics, references=formated_decoded_labels_for_some_metrics)
        print("BLEU computed.")
    except Exception as e:
        print(f"ERROR: Failed to compute BLEU in compute_metrics: {e}")
        import traceback
        traceback.print_exc()
        raise # Re-raise

    # Combine metrics - only include ROUGE and BLEU
    combined_results = {
        "rouge1": rouge_results["rouge1"], # F1 score is commonly reported
        "rouge2": rouge_results["rouge2"],
        "rougeL": rouge_results["rougeL"],
        "bleu": bleu_results["bleu"],
    }
    print("Metrics combined.")
    print("--- Exiting compute_metrics ---")
    # The Trainer expects a dictionary of metrics
    return combined_results


# Create Trainer instance, adding the custom callback
# CHANGED: Use FilteredArgsSeq2SeqTrainer instead of Seq2SeqTrainer
trainer = FilteredArgsSeq2SeqTrainer(
    model=model, # This is the Adapter model on the TRIMMED base model
    args=training_args, # These are the GPU-configured args with Drive checkpointing and reporting
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    tokenizer=tokenizer, # Pass the trimmed tokenizer here
    data_collator=data_collator, # Add the data collator here
    compute_metrics=compute_metrics, # Pass the compute_metrics function
    callbacks=[ClearCacheCallback()], # Add the custom cache clearing callback here
)


# Step 6: Train the Model & Measure Time (Includes Resuming from Checkpoint in Drive)
print("\nStarting training (Adapters on Trimmed Model)...") # CHANGED print statement

# --- Resume Training Logic ---
# Check if there's a checkpoint to resume from in the output directory
# The Trainer saves checkpoints in subdirectories like 'checkpoint-XXXX'
latest_checkpoint_dir = None
if os.path.isdir(training_args.output_dir):
    # Find all checkpoint directories
    checkpoints = [m for m in os.listdir(training_args.output_dir) if m.startswith('checkpoint-')]
    if checkpoints:
        # Sort checkpoints by step number to find the latest
        latest_checkpoint_dir = os.path.join(training_args.output_dir, sorted(checkpoints, key=lambda x: int(x.split('-')[-1]))[-1])
        print(f"Found latest checkpoint: {latest_checkpoint_dir}")

# Set resume_from_checkpoint to the latest found checkpoint if it exists
resume_from_checkpoint = latest_checkpoint_dir if latest_checkpoint_dir and os.path.isdir(latest_checkpoint_dir) else None

if resume_from_checkpoint:
     print(f"Resuming training from checkpoint: {resume_from_checkpoint}")
else:
     print("No checkpoint found, starting fresh training.")

start_time = time.time()

# Start training (pass resume_from_checkpoint if a checkpoint was found)
train_output = trainer.train(resume_from_checkpoint=resume_from_checkpoint)

end_time = time.time()
total_training_time_seconds = end_time - start_time
total_training_time_minutes = total_training_time_seconds / 60.0

print(f"\nTotal training time (Adapters on Trimmed Model): {total_training_time_seconds:.2f} seconds ({total_training_time_minutes:.2f} minutes)") # CHANGED print statement


# Step 7: Evaluate the Model on the Test Set and Get Metrics
print("\n--- Starting Model Evaluation on Test Set ---")
try:
    # --- Explicitly Clear CUDA Cache Before Final Evaluation ---
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect() # Also call gc.collect() here
        print("\nCUDA cache cleared and gc.collect() called explicitly before final evaluation.")
    else:
        print("\nCUDA not available, skipping cache clear before final evaluation.")

    print("Calling trainer.evaluate...")
    evaluation_results = trainer.evaluate(eval_dataset=tokenized_test_dataset)
    print("trainer.evaluate completed.")

    print("\nEvaluation Metrics (Adapters on Trimmed Model):") # CHANGED print statement
    print(evaluation_results)

    # --- Save Evaluation Results to File ---
    eval_results_file = os.path.join(base_drive_folder_output, "trimmed_adapters_evaluation_results.json") # CHANGED file name
    with open(eval_results_file, "w", encoding="utf-8") as f:
        json.dump(evaluation_results, f, indent=4)
    print(f"\nEvaluation results saved to: {eval_results_file}")

except Exception as e:
    print(f"\nERROR during Step 7 (Model Evaluation): {e}")
    import traceback
    traceback.print_exc() # Print full traceback for detailed error


# Step 8: Plot Training vs Validation Loss for Convergence Analysis
print("\n--- Starting Loss Curve Plotting ---")
try:
    # Extract loss and eval_loss from the trainer's log history
    train_loss_entries = [entry for entry in trainer.state.log_history if 'loss' in entry]
    eval_loss_entries = [entry for entry in trainer.state.log_history if 'eval_loss' in entry]

    train_steps = [entry['step'] for entry in train_loss_entries]
    train_losses = [entry['loss'] for entry in train_loss_entries]

    # For evaluation, the step corresponds to the global step at which evaluation was performed
    eval_steps = [entry['step'] for entry in eval_loss_entries]
    eval_losses = [entry['eval_loss'] for entry in eval_loss_entries]

    if train_steps and eval_steps:
        plt.figure(figsize=(10, 6))
        plt.plot(train_steps, train_losses, label="Training Loss", marker='o', linestyle='-')
        plt.plot(eval_steps, eval_losses, label="Validation Loss", marker='s', linestyle='--')
        plt.xlabel("Training Steps")
        plt.ylabel("Loss")
        plt.title("Training vs Validation Loss Over Steps (Adapters)") # CHANGED plot title
        plt.legend()
        plt.grid(True)

        # Save the plot to Google Drive
        loss_plot_path = os.path.join(base_drive_folder_output, "trimmed_adapters_loss_curve.png") # CHANGED file name
        plt.savefig(loss_plot_path)
        print(f"Loss curve plot saved to: {loss_plot_path}")
        plt.show()
    else:
        print("Not enough data in log history to plot loss curves. Ensure eval_strategy and logging_steps are configured.")
except Exception as e:
    print(f"\nERROR during Step 8 (Loss Plotting): {e}")
    import traceback
    traceback.print_exc()


# Step 9: Save the Final (Best) Adapter Weights to Drive
print("\n--- Starting Adapter Weights Saving ---") # CHANGED print statement
try:
    # After trainer.train() finishes (and because load_best_model_at_end=True),
    # the model object in memory is the best model found during training.
    # Save only the adapter weights, not the full trimmed base model
    adapters_output_dir = os.path.join(base_drive_folder_output, "trimmed_adapters_final_best_adapter") # CHANGED output directory name
    
    # The adapters library provides a specific method to save only the adapter weights
    model.save_adapter(adapters_output_dir, adapter_name) # Save only the specified adapter
    print(f"\nFinal (best) adapter weights for '{adapter_name}' saved to {adapters_output_dir}")

    # To save the tokenizer along with the adapter weights (recommended):
    tokenizer.save_pretrained(adapters_output_dir)
    print(f"Tokenizer saved to {adapters_output_dir}")

    # --- How to Load the Saved Trimmed Model with Adapter Weights Later ---
    print("\n--- How to Load the Saved Trimmed Model with Adapter Weights Later ---")
    print(f"To load the trimmed base model and apply the saved adapter weights later, you would do:")
    print(f"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM")
    print(f"from adapters import AdapterConfig") # CORRECTED import
    print(f"import adapters") # CORRECTED import
    print(f"")
    print(f"# Path to the directory where you saved the trimmed base model (from vocabtrimmer)")
    print(f"trimmed_base_model_path = '{trimmed_model_path}'")
    print(f"# Path to the directory where you saved the fine-tuned adapter weights")
    print(f"saved_adapters_dir = '{adapters_output_dir}'")
    print(f"adapter_name = '{adapter_name}'")
    print(f"")
    print(f"# Load the trimmed base model")
    print(f"loaded_trimmed_model = AutoModelForSeq2SeqLM.from_pretrained(trimmed_base_model_path)")
    print(f"")
    print(f"# Initialize adapters for the loaded model") # CORRECTED
    print(f"adapters.init(loaded_trimmed_model)") # CORRECTED
    print(f"")
    print(f"# Load the adapter weights and add them to the model")
    print(f"# Use AdapterConfig.load() to get the correct config type") # CORRECTED
    print(f"loaded_trimmed_model.load_adapter(saved_adapters_dir, config=AdapterConfig.load('double_seq_bn'), load_as=adapter_name)") # CORRECTED
    print(f"")
    print(f"# Set the adapter as active for inference")
    print(f"loaded_trimmed_model.set_active_adapters(adapter_name)")
    print(f"")
    print(f"# Load the tokenizer")
    print(f"loaded_tokenizer = AutoTokenizer.from_pretrained(saved_adapters_dir)") # Tokenizer was saved with adapter weights
    print(f"")
    print(f"# If running on GPU, ensure model is on GPU")
    print(f"# import torch")
    print(f"# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')")
    print(f"# loaded_trimmed_model.to(device)")
    print(f"")
    print(f"# Set to evaluation mode")
    print(f"# loaded_trimmed_model.eval()")
    print(f"")
    print(f"print(f'Successfully loaded trimmed model and applied adapter weights from {saved_adapters_dir}')")
    print(f"print(f'Loaded model vocabulary size: {loaded_trimmed_model.config.vocab_size}')")
    print(f"print(f'Loaded tokenizer vocabulary size: {len(loaded_tokenizer)}')")
    print(f"")
    print(f"# Example Inference (Optional)")
    print(f"# text_to_summarize = 'የአማርኛ ጽሑፍ እዚህ ይገባል...'")
    print(f"# inputs = loaded_tokenizer(text_to_summarize, return_tensors='pt', max_length=512, truncation=True).to(device)")
    print(f"# output_tokens = loaded_trimmed_model.generate(**inputs, max_new_tokens=128, num_beams=4, early_stopping=True)")
    print(f"# generated_summary = loaded_tokenizer.decode(output_tokens[0], skip_special_tokens=True)")
    print(f"# print(f'Generated Summary: {generated_summary}')")

except Exception as e:
    print(f"\nERROR during Step 9 (Adapter Saving): {e}")
    import traceback
    traceback.print_exc()



# Step 10: Display Example Summaries with ROUGE Scores
print("\n--- Starting Example Summaries Generation ---")
try:
    # Get predictions for the test set
    generated_predictions = trainer.predict(
        test_dataset=tokenized_test_dataset,
        max_new_tokens=128, # Max length of generated summary (matches labels)
        num_beams=4, # Number of beams for beam search
    )
    print("trainer.predict for example summaries completed.")

    preds = generated_predictions.predictions
    labels = generated_predictions.label_ids

    # --- Start of FIX for Step 10 Decoding Error ---
    print("\n--- Inside Step 10 Decoding (DEBUG) ---")
    print(f"Type of preds (initial): {type(preds)}")
    print(f"Type of labels (initial): {type(labels)}")

    # CRITICAL FIX: If predictions is a tuple (e.g., logits and hidden states), take the first element (logits)
    if isinstance(preds, tuple):
        print(f"preds is a tuple. Length: {len(preds)}")
        preds = preds[0] # Assume the first element is the actual generated sequences (logits)
        print(f"Taking first element of preds tuple. New type: {type(preds)}")

    # Ensure predictions and labels are NumPy arrays
    if isinstance(preds, torch.Tensor):
        preds = preds.cpu().numpy()
    if isinstance(labels, torch.Tensor):
        labels = labels.cpu().numpy()

    print(f"preds type (after initial processing): {type(preds)}")
    print(f"labels type (after initial processing): {type(labels)}")

    # CRITICAL FIX: If predictions are logits (3D array), convert to token IDs using argmax
    if preds.ndim == 3:
        print(f"WARNING: preds has unexpected 3 dimensions. Assuming logits and applying argmax.")
        preds = np.argmax(preds, axis=-1) # Convert logits to token IDs
        print(f"preds after argmax: New shape: {preds.shape}, New dtype: {preds.dtype}")

    # Further check for dimensions: if preds is 3D (e.g., batch_size, seq_len, 1), squeeze it
    if preds.ndim == 3 and preds.shape[-1] == 1:
        preds = preds.squeeze(-1)
        print(f"preds was 3D with last dim 1, squeezed to 2D. New shape: {preds.shape}")
    elif preds.ndim > 2:
        print(f"WARNING: preds has unexpected {preds.ndim} dimensions (after argmax check). Expected 2D.")
        preds = preds.reshape(preds.shape[0], -1) # Attempt to flatten if possible
        print(f"Attempted to reshape preds to 2D. New shape: {preds.shape}")

    # Ensure predictions are integers (important for tokenizer.batch_decode)
    if preds.dtype != np.int64 and preds.dtype != np.int32:
        print(f"WARNING: preds dtype is {preds.dtype}, converting to int64.")
        preds = preds.astype(np.int64)
    if labels.dtype != np.int64 and labels.dtype != np.int32:
        print(f"WARNING: labels dtype is {labels.dtype}, converting to int64.")
        labels = labels.astype(np.int64)

    print(f"preds shape (after processing): {preds.shape}")
    print(f"labels shape (after processing): {labels.shape}")

    # Replace -100 in labels as they are ignored in loss calculation
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    print("Labels processed (replaced -100).")
    # --- End of FIX for Step 10 Decoding Error ---

    # Decode predictions and labels back to text
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    print(f"Successfully decoded {len(decoded_preds)} predictions and {len(decoded_labels)} labels.")

    # FIX: Replace empty strings with a single character 'a' to prevent ZeroDivisionError in BLEU
    decoded_preds_for_metrics = [pred if pred else "a" for pred in decoded_preds]
    decoded_labels_for_metrics = [label if label else "a" for label in decoded_labels]
    print("Empty strings in decoded predictions/labels replaced with 'a' for robust metric calculation.")


    # Some metrics expect a list of lists for references
    formated_decoded_labels_for_some_metrics = [[label] for label in decoded_labels_for_metrics]
    print("Labels formatted for metrics.")

    # --- Compute Metrics ---
    # Re-compute ROUGE and BLEU for the example summaries
    try:
        rouge_results_examples = metric_rouge.compute(predictions=decoded_preds_for_metrics, references=decoded_labels_for_metrics)
        print("ROUGE computed for example summaries.")
    except Exception as e:
        print(f"ERROR: Failed to compute ROUGE for example summaries: {e}")
        import traceback
        traceback.print_exc()
        rouge_results_examples = {"rouge1": 0.0, "rouge2": 0.0, "rougeL": 0.0} # Fallback

    print("\nExample Summaries with ROUGE Scores (Adapters):")
    num_examples_to_display = 5 # Display a few examples
    for i in range(min(num_examples_to_display, len(tokenized_test_dataset))):
        original_text = dataset['test'][i]['text']
        reference_summary = decoded_labels[i]
        generated_summary = decoded_preds[i]

        # Compute ROUGE for individual example (optional, but good for inspection)
        single_rouge = metric_rouge.compute(predictions=[generated_summary if generated_summary else "a"],
                                            references=[reference_summary if reference_summary else "a"])

        print(f"\n--- Example {i+1} ---")
        print(f"Original Text: {original_text[:500]}...") # Truncate long texts for display
        print(f"Reference Summary: {reference_summary}")
        print(f"Generated Summary: {generated_summary}")
        print(f"  ROUGE-1 F1: {single_rouge['rouge1']:.4f}")
        print(f"  ROUGE-2 F1: {single_rouge['rouge2']:.4f}")
        print(f"  ROUGE-L F1: {single_rouge['rougeL']:.4f}")

except Exception as e:
    print(f"\nERROR during Step 10 (Example Summaries Generation): {e}")
    import traceback
    traceback.print_exc()

print("\nScript finished.")

Attempting to load trimmed model and tokenizer from: /kaggle/input/dataset1
Training outputs (checkpoints, logs, results) will be saved to: /kaggle/input/dataset1
Collecting transformers
  Downloading transformers-4.52.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.2/40.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.52.2-py3-none-any.whl (10.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.5/10.5 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.51.3
    Uninstalling transformers-4.51.3:
      Successfully uninstalled transformers-4.51.3
Successfully installed transformers-4.52.2
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Down

2025-05-22 09:49:02.083566: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747907342.287037      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747907342.350022      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Loading trimmed model from /kaggle/input/dataset1...
Loading trimmed tokenizer from /kaggle/input/dataset1...
Trimmed model and tokenizer loaded successfully!
Loaded model vocabulary size: 2766
Loaded tokenizer vocabulary size: 2866
Adapters initialized for the trimmed model.
Model moved to device: cuda


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]


Dataset loaded from CSV files.
Dataset sizes:
Train size: 23492
Validation size: 2937
Test size: 2937


Map:   0%|          | 0/23492 [00:00<?, ? examples/s]

Map:   0%|          | 0/2937 [00:00<?, ? examples/s]

Map:   0%|          | 0/2937 [00:00<?, ? examples/s]


Configuring and applying Adapters...

Trainable parameters (Adapters on Trimmed Model):

Trainable parameters (Adapter baseline):
Total parameters: 47960448
Trainable parameters: 1065984
Percentage of trainable parameters: 2.22%
Training outputs (checkpoints, logs) will be saved to: /kaggle/working/trimmed_adapters_checkpoints


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

  trainer = FilteredArgsSeq2SeqTrainer(



Starting training (Adapters on Trimmed Model)...
No checkpoint found, starting fresh training.


  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.int64)


Step,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Bleu
500,26.2133,4.493471,0.008387,0.001021,0.00824,0.00115



CUDA cache cleared and gc.collect() called by callback before evaluation at step 500.

--- Inside compute_metrics (DEBUG) ---
Type of eval_pred.predictions (initial): <class 'tuple'>
Type of eval_pred.label_ids (initial): <class 'numpy.ndarray'>
eval_pred.predictions is a tuple. Length: 2
Taking first element of predictions tuple. New type: <class 'numpy.ndarray'>
Predictions type (after initial processing): <class 'numpy.ndarray'>
Labels type (after initial processing): <class 'numpy.ndarray'>
Predictions after argmax: New shape: (2937, 107), New dtype: int64
Predictions shape (after processing): (2937, 107)
Labels shape (after processing): (2937, 107)
Labels processed (replaced -100).
Successfully decoded 2937 predictions and 2937 labels.
Empty strings in decoded predictions/labels replaced with 'a' for robust metric calculation.
Labels formatted for metrics.
ROUGE computed.
BLEU computed.
Metrics combined.
--- Exiting compute_metrics ---





CUDA cache cleared and gc.collect() called by callback before evaluation at step 1000.


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.67 GiB. GPU 0 has a total capacity of 14.74 GiB of which 2.66 GiB is free. Process 2549 has 12.08 GiB memory in use. Of the allocated memory 8.92 GiB is allocated by PyTorch, and 2.71 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)