In [None]:
# Authorize Colab to access your Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import zipfile
from sklearn.metrics import cohen_kappa_score
from torch.utils.data import Dataset as TorchDataset
from transformers import (
    AutoTokenizer,
    AutoModel,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
import gc
import ast # To safely evaluate string-formatted lists
import json # Added import for json

# --- Hugging Face Authentication ---
from huggingface_hub import login
from google.colab import userdata

# Log in to Hugging Face using the token stored in Colab secrets
try:
    login(token=userdata.get('HF_TOKEN'))
    print("✔️ Successfully logged into Hugging Face.")
except (NameError, KeyError):
    print("⚠️ Hugging Face token not found in Colab secrets. Please add it as 'HF_TOKEN'.")
    # Fallback for local execution or if login() is preferred manually
    login()


In [None]:

# --- Model & Training ---
MODEL_NAME = "CAMeL-Lab/readability-arabertv2-d3tok-reg"

NUM_LABELS = 1
TARGET_CLASSES = 19
NUM_FEATURES = 7

# --- IMPORTANT: Set the path to your project folder on Google Drive ---
PROJECT_DRIVE_PATH = '/content/drive/MyDrive/BAREC_Competition'

# --- File & Directory Paths (Now relative to your Google Drive) ---
BASE_DIR = PROJECT_DRIVE_PATH
PROCESSED_DATA_DIR = os.path.join(BASE_DIR, "lex")
CHECKPOINT_DIR = os.path.join(BASE_DIR, "results", f"hybrid_constrained_samer_regression_v2_{MODEL_NAME.split('/')[-1]}")
SUBMISSION_DIR = os.path.join(BASE_DIR, "submission")

# Ensure the output directories exist on your Google Drive
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(SUBMISSION_DIR, exist_ok=True)

# Paths to the preprocessed input files on Google Drive
TRAIN_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'train_processed_full.csv')
DEV_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'dev_processed_full.csv')
TEST_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'test_processed_full.csv')

# --- Submission Paths on Google Drive ---
SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission_hybrid_constrained_samer_regression.csv")
ZIPPED_SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission_hybrid_constrained_samer_regression.zip")

print(f"✔️ All paths configured to use Google Drive folder: {BASE_DIR}")


# 2. DATA LOADING FUNCTION

In [None]:
# =====================================================================================
# 2. DATA LOADING FUNCTION
# =====================================================================================

def load_preprocessed_data():
    """Loads the pre-processed CSV files directly from Google Drive."""
    print("\n--- Loading Preprocessed Data from Google Drive ---")
    try:
        train_df = pd.read_csv(TRAIN_PROCESSED_PATH)
        val_df = pd.read_csv(DEV_PROCESSED_PATH)

        print("Converting 'features' column from string to list...")
        train_df['features'] = train_df['features'].apply(ast.literal_eval)
        val_df['features'] = val_df['features'].apply(ast.literal_eval)

        # Convert labels for regression
        train_df['label'] = (train_df['label'].astype(int) - 1).astype(float)
        val_df['label'] = (val_df['label'].astype(int) - 1).astype(float)

        print(f"✔ Successfully loaded {len(train_df)} training and {len(val_df)} validation records.")
        return train_df, val_df
    except FileNotFoundError as e:
        print(f"❌ ERROR: Preprocessed file not found: {e}.")
        print("Please make sure your data is uploaded to the correct Google Drive folder.")
        return None, None
    except Exception as e:
        print(f"❌ ERROR during data loading: {e}")
        return None, None


# 3. MODEL, DATASET, AND METRICS DEFINITIONS

In [None]:
# =====================================================================================
# 3. MODEL, DATASET, AND METRICS DEFINITIONS
# =====================================================================================

class ReadabilityDataset(TorchDataset):
    """
    Custom PyTorch Dataset for readability prediction.
    It takes text, extra numerical features, and optional labels.
    """
    def __init__(self, texts, features, labels=None, tokenizer_obj=None, max_len=256):
        self.texts = texts
        self.features = features
        self.labels = labels
        self.tokenizer = tokenizer_obj
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        feature_vec = torch.tensor(self.features[idx], dtype=torch.float)

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        item = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'extra_features': feature_vec
        }

        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)

        return item

class HybridRegressionModel(nn.Module):
    """
    A hybrid model that combines a transformer base with additional numerical features.
    The output is a single regression value.
    """
    def __init__(self, model_name, num_extra_features):
        super(HybridRegressionModel, self).__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        # The regression head takes the transformer's output + extra features
        self.regressor = nn.Linear(self.transformer.config.hidden_size + num_extra_features, 1)

    def forward(self, input_ids, attention_mask, extra_features, labels=None):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        pooler_output = outputs.pooler_output

        # Concatenate transformer output with extra features
        combined_features = torch.cat((pooler_output, extra_features), dim=1)

        logits = self.regressor(combined_features)

        loss = None
        if labels is not None:
            loss_fct = nn.MSELoss()
            loss = loss_fct(logits.squeeze(), labels.squeeze())

        return (loss, logits) if loss is not None else logits

def compute_metrics(p):
    """
    Computes the Quadratic Weighted Kappa score for regression predictions.
    Predictions are rounded and clipped to be valid class labels.
    """
    predictions, labels = p
    # Round predictions to nearest integer and clip to valid range [0, 18]
    clipped_preds = np.clip(np.round(predictions), 0, TARGET_CLASSES - 1)
    qwk = cohen_kappa_score(labels, clipped_preds, weights='quadratic')
    return {'qwk': qwk}


# 4. & 5. MAIN EXECUTION FUNCTIONS

In [None]:
# =====================================================================================
# 4. & 5. MAIN EXECUTION FUNCTIONS
# =====================================================================================

def main_train():
    print("===== 🚀 STARTING HYBRID REGRESSION MODEL PIPELINE =====\n")

    print("Initializing Tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    train_df, val_df = load_preprocessed_data()
    if train_df is None:
        print("\n! Aborting script due to data loading failure.")
        return

    print("\nCreating Torch Datasets...")
    train_dataset = ReadabilityDataset(train_df['d3tok_text'].tolist(), train_df['features'].tolist(), train_df['label'].tolist(), tokenizer)
    val_dataset = ReadabilityDataset(val_df['d3tok_text'].tolist(), val_df['features'].tolist(), val_df['label'].tolist(), tokenizer)
    print("✔ Datasets created.")

    print("\nInitializing Hybrid Regression Model...")
    model = HybridRegressionModel(MODEL_NAME, num_extra_features=NUM_FEATURES)

    training_args = TrainingArguments(
        output_dir=CHECKPOINT_DIR,
        num_train_epochs=15,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        learning_rate=3e-5,
        warmup_ratio=0.1,
        weight_decay=0.01,
        logging_steps=100,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="qwk",
        greater_is_better=True,
        save_total_limit=2,
        fp16=torch.cuda.is_available(),
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=4)]
    )

    print("\nStarting model training... Checkpoints will be saved to Google Drive.")

    latest_checkpoint = None
    if os.path.exists(CHECKPOINT_DIR):
        checkpoints = [d for d in os.listdir(CHECKPOINT_DIR) if d.startswith("checkpoint-")]
        if checkpoints:
            checkpoints.sort(key=lambda x: int(x.split('-')[-1]))
            latest_checkpoint = os.path.join(CHECKPOINT_DIR, checkpoints[-1])
            print(f"Resuming training from latest checkpoint: {latest_checkpoint}")
        else:
             print("No checkpoints found to resume training from. Starting from scratch.")
    else:
        print("Checkpoint directory not found. Starting training from scratch.")


    trainer.train(resume_from_checkpoint=latest_checkpoint)
    print("✔ Training finished.")

    del model, trainer, train_dataset, val_dataset, train_df, val_df
    gc.collect()
    torch.cuda.empty_cache()


def main_predict():
    print("\n===== 🏆 GENERATING FINAL PREDICTIONS & SUBMISSION =====\n")
    try:
        print("Initializing Tokenizer for prediction...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

        print("Loading preprocessed test data from Google Drive...")
        test_df = pd.read_csv(TEST_PROCESSED_PATH)
        test_df['features'] = test_df['features'].apply(ast.literal_eval)

        print("\nLoading the best trained model from Google Drive checkpoints...")
        best_checkpoint_path = None
        best_qwk = -1.0

        if not os.path.exists(CHECKPOINT_DIR):
             raise FileNotFoundError(f"Checkpoint directory not found on Google Drive: {CHECKPOINT_DIR}.")

        checkpoints = [d for d in os.listdir(CHECKPOINT_DIR) if d.startswith("checkpoint-")]
        if not checkpoints:
             raise FileNotFoundError(f"No checkpoint found in the results directory on Google Drive: {CHECKPOINT_DIR}.")

        checkpoints.sort(key=lambda x: int(x.split('-')[-1]), reverse=True)

        for checkpoint in checkpoints:
            checkpoint_path = os.path.join(CHECKPOINT_DIR, checkpoint)
            trainer_state_path = os.path.join(checkpoint_path, "trainer_state.json")
            model_file_path = os.path.join(checkpoint_path, "pytorch_model.bin")

            if os.path.exists(model_file_path) and os.path.exists(trainer_state_path):
                 try:
                     with open(trainer_state_path, 'r') as f:
                         trainer_state = json.load(f)
                         if 'best_metric' in trainer_state and trainer_state['best_metric'] > best_qwk:
                             best_qwk = trainer_state['best_metric']
                             best_checkpoint_path = checkpoint_path
                             print(f"Found improved eval_qwk {best_qwk} in {checkpoint_path}")
                 except Exception as e:
                    print(f"Warning: Could not parse trainer_state.json in {checkpoint_path}: {e}")

        if not best_checkpoint_path:
            print("Could not find best checkpoint via trainer_state.json. Falling back to the latest checkpoint with a model file.")
            for checkpoint in checkpoints:
                checkpoint_path = os.path.join(CHECKPOINT_DIR, checkpoint)
                if os.path.exists(os.path.join(checkpoint_path, "pytorch_model.bin")):
                    best_checkpoint_path = checkpoint_path
                    print(f"Using latest valid checkpoint: {best_checkpoint_path}")
                    break

        if not best_checkpoint_path:
            raise FileNotFoundError(f"No valid checkpoint with 'pytorch_model.bin' found in: {CHECKPOINT_DIR}.")

        print(f"Loading model from: {best_checkpoint_path}")
        model = HybridRegressionModel(MODEL_NAME, num_extra_features=NUM_FEATURES)
        model.load_state_dict(torch.load(os.path.join(best_checkpoint_path, "pytorch_model.bin")))

        trainer = Trainer(model=model, args=TrainingArguments(output_dir=CHECKPOINT_DIR))

        print("Generating predictions on the test set...")
        # Note: No labels for the test dataset
        test_dataset = ReadabilityDataset(test_df['d3tok_text'].tolist(), test_df['features'].tolist(), tokenizer_obj=tokenizer)
        predictions = trainer.predict(test_dataset)

        clipped_preds = np.clip(np.round(predictions.predictions.flatten()), 0, TARGET_CLASSES - 1)
        test_df['Prediction'] = (clipped_preds + 1).astype(int)

        submission_df = test_df.rename(columns={'ID': 'id'})[['id', 'Prediction']]

        print(f"\nSaving prediction file to: {SUBMISSION_PATH}")
        submission_df.to_csv(SUBMISSION_PATH, index=False)

        print(f"Compressing into {os.path.basename(ZIPPED_SUBMISSION_PATH)}...")
        with zipfile.ZipFile(ZIPPED_SUBMISSION_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
            zipf.write(SUBMISSION_PATH, arcname=os.path.basename(SUBMISSION_PATH))

        print(f"✔ Submission file '{os.path.basename(ZIPPED_SUBMISSION_PATH)}' created successfully in your Drive!")

    except FileNotFoundError as e:
        print(f"❌ ERROR: File not found: {e}. Ensure training was completed and checkpoints exist.")
    except Exception as e:
        print(f"❌ An error occurred during final prediction: {e}")

# Running script

In [None]:
# =====================================================================================
# 6. SCRIPT RUNNER
# =====================================================================================

# Start the training process
main_train()

# Once training is done, generate predictions
main_predict()

print("\n--- ✅ All Done! Check your Google Drive for results and submission files. ---")

Mounted at /content/drive
✔️ Successfully logged into Hugging Face.
✔️ All paths configured to use Google Drive folder: /content/drive/MyDrive/BAREC_Competition
===== 🚀 STARTING HYBRID REGRESSION MODEL PIPELINE =====

Initializing Tokenizer...


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]


--- Loading Preprocessed Data from Google Drive ---
Converting 'features' column from string to list...
✔ Successfully loaded 97874 training and 7310 validation records.

Creating Torch Datasets...
✔ Datasets created.

Initializing Hybrid Regression Model...


config.json:   0%|          | 0.00/840 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/541M [00:00<?, ?B/s]


Starting model training... Checkpoints will be saved to Google Drive.
No checkpoints found to resume training from. Starting from scratch.


Epoch,Training Loss,Validation Loss,Qwk
1,12.8592,4.368601,0.803595
2,11.5612,3.982326,0.787287
3,10.6631,4.629837,0.792533
4,9.4595,4.046183,0.809687
5,10.0395,4.235983,0.801671
6,8.4152,4.130218,0.80839
7,7.7583,4.304597,0.804555
8,7.9803,4.257917,0.798454


✔ Training finished.

===== 🏆 GENERATING FINAL PREDICTIONS & SUBMISSION =====

Initializing Tokenizer for prediction...
Loading preprocessed test data from Google Drive...

Loading the best trained model from Google Drive checkpoints...
Could not find best checkpoint via trainer_state.json. Falling back to the latest checkpoint with a model file.
❌ ERROR: File not found: No valid checkpoint with 'pytorch_model.bin' found in: /content/drive/MyDrive/BAREC_Competition/results/hybrid_constrained_samer_regression_v2_readability-arabertv2-d3tok-reg.. Ensure training was completed and checkpoints exist.

--- ✅ All Done! Check your Google Drive for results and submission files. ---


In [None]:
BEST_CHECKPOINT_PATH = '/content/drive/MyDrive/BAREC_Competition/results/hybrid_constrained_samer_regression_v2_readability-arabertv2-d3tok-reg/checkpoint-48944'


# --- Input & Output Directories (derived from base paths) ---
PROCESSED_DATA_DIR = os.path.join(PROJECT_DRIVE_PATH, "lex")
SUBMISSION_DIR = os.path.join(PROJECT_DRIVE_PATH, "submission")

# Ensure the submission output directory exists
os.makedirs(SUBMISSION_DIR, exist_ok=True)

# --- File Paths ---
TEST_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'test_processed_full.csv')
SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission_final_prediction.csv")
ZIPPED_SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission.zip")

print(f"✔️ Configuration loaded. Model will be loaded from: {BEST_CHECKPOINT_PATH}")



# 3. MODEL AND DATASET CLASS DEFINITIONS

In [None]:
# =====================================================================================
# 3. MODEL AND DATASET CLASS DEFINITIONS
# =====================================================================================

class ReadabilityDataset(TorchDataset):
    """
    Custom PyTorch Dataset for readability prediction.
    It takes text, extra numerical features, and optional labels.
    """
    def __init__(self, texts, features, labels=None, tokenizer_obj=None, max_len=256):
        self.texts = texts
        self.features = features
        self.labels = labels
        self.tokenizer = tokenizer_obj
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        feature_vec = torch.tensor(self.features[idx], dtype=torch.float)

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        item = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'extra_features': feature_vec
        }

        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)

        return item

class HybridRegressionModel(nn.Module):
    """
    A hybrid model that combines a transformer base with additional numerical features.
    The output is a single regression value.
    """
    def __init__(self, model_name, num_extra_features):
        super(HybridRegressionModel, self).__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        self.regressor = nn.Linear(self.transformer.config.hidden_size + num_extra_features, 1)

    def forward(self, input_ids, attention_mask, extra_features, labels=None):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        pooler_output = outputs.pooler_output
        combined_features = torch.cat((pooler_output, extra_features), dim=1)
        logits = self.regressor(combined_features)

        loss = None
        if labels is not None:
            # Using MSELoss for regression
            loss_fct = nn.MSELoss()
            loss = loss_fct(logits.squeeze(), labels.squeeze())

        return (loss, logits) if loss is not None else logits

# 4. PREDICTION AND SUBMISSION SCRIPT

In [None]:
# =====================================================================================
# 4. PREDICTION AND SUBMISSION SCRIPT
# =====================================================================================

def generate_predictions():
    """
    Loads the trained model from the specified checkpoint, runs predictions on the
    test set, and saves the formatted submission file.
    """
    print("\n===== 🏆 STARTING PREDICTION PIPELINE =====\n")
    try:
        # --- Validate Checkpoint Path ---
        model_weights_path = os.path.join(BEST_CHECKPOINT_PATH, 'model.safetensors')
        if not os.path.exists(model_weights_path):
            print(f"❌ ERROR: 'model.safetensors' not found at the specified path: {model_weights_path}")
            print("Please ensure the BEST_CHECKPOINT_PATH variable is set correctly to the directory containing the model weights.")
            return

        # --- Initialize Tokenizer ---
        print("1. Initializing tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

        # --- Load Test Data ---
        print(f"2. Loading preprocessed test data from: {TEST_PROCESSED_PATH}")
        test_df = pd.read_csv(TEST_PROCESSED_PATH)
        test_df['features'] = test_df['features'].apply(ast.literal_eval)
        print(f"   Loaded {len(test_df)} test records.")

        # --- Load Model from Checkpoint ---
        print(f"3. Loading model from checkpoint: {BEST_CHECKPOINT_PATH}")
        model = HybridRegressionModel(MODEL_NAME, num_extra_features=NUM_FEATURES)

        # Load the state dictionary from the .safetensors file
        state_dict = load_file(model_weights_path)

        # Load the state dictionary into the model
        model.load_state_dict(state_dict)
        print("   Model weights loaded successfully from model.safetensors.")

        # --- Initialize Trainer ---
        # A minimal Trainer is sufficient for making predictions
        trainer = Trainer(model=model, args=TrainingArguments(output_dir="./temp_results"))

        # --- Create Test Dataset ---
        print("4. Creating test dataset...")
        test_dataset = ReadabilityDataset(
            texts=test_df['d3tok_text'].tolist(),
            features=test_df['features'].tolist(),
            tokenizer_obj=tokenizer
        )

        # --- Generate Predictions ---
        print("5. Generating predictions on the test set...")
        raw_predictions = trainer.predict(test_dataset)

        # Predictions are in the .predictions attribute; flatten them to a 1D array
        predictions_logits = raw_predictions.predictions.flatten()

        # --- Process Predictions ---
        # Round to the nearest integer, clip to the valid label range [0, 18],
        # and convert back to original class labels [1, 19]
        clipped_preds = np.clip(np.round(predictions_logits), 0, TARGET_CLASSES - 1)
        final_predictions = (clipped_preds + 1).astype(int)
        test_df['Prediction'] = final_predictions
        print("   Predictions generated and processed.")

        # --- Create and Save Submission File ---
        submission_df = test_df.rename(columns={'ID': 'id'})[['id', 'Prediction']]
        print(f"\n6. Saving final prediction CSV to: {SUBMISSION_PATH}")
        submission_df.to_csv(SUBMISSION_PATH, index=False)

        print(f"7. Compressing submission file into: {ZIPPED_SUBMISSION_PATH}")
        with zipfile.ZipFile(ZIPPED_SUBMISSION_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
            zipf.write(SUBMISSION_PATH, arcname=os.path.basename(SUBMISSION_PATH))

        print("\n--- ✅ All Done! ---")
        print(f"Submission file '{os.path.basename(ZIPPED_SUBMISSION_PATH)}' has been saved to your Google Drive.")
        print(f"Location: {SUBMISSION_DIR}")

    except FileNotFoundError as e:
        print(f"❌ ERROR: A required file was not found: {e}")
        print("   Please ensure all paths in the configuration section are correct.")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")

# 5. EXECUTE SCRIPT

In [None]:
# =====================================================================================
# 5. EXECUTE SCRIPT
# =====================================================================================

if __name__ == '__main__':
    generate_predictions()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✔️ Google Drive mounted successfully.
✔️ Configuration loaded. Model will be loaded from: /content/drive/MyDrive/BAREC_Competition/results/hybrid_constrained_samer_regression_v2_readability-arabertv2-d3tok-reg/checkpoint-48944

===== 🏆 STARTING PREDICTION PIPELINE =====

1. Initializing tokenizer...
2. Loading preprocessed test data from: /content/drive/MyDrive/BAREC_Competition/lex/test_processed_full.csv
   Loaded 3420 test records.
3. Loading model from checkpoint: /content/drive/MyDrive/BAREC_Competition/results/hybrid_constrained_samer_regression_v2_readability-arabertv2-d3tok-reg/checkpoint-48944
   Model weights loaded successfully from model.safetensors.
4. Creating test dataset...
5. Generating predictions on the test set...




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mfatemah2024[0m ([33mfatemah2024-cu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


   Predictions generated and processed.

6. Saving final prediction CSV to: /content/drive/MyDrive/BAREC_Competition/submission/submission_final_prediction.csv
7. Compressing submission file into: /content/drive/MyDrive/BAREC_Competition/submission/submission.zip

--- ✅ All Done! ---
Submission file 'submission.zip' has been saved to your Google Drive.
Location: /content/drive/MyDrive/BAREC_Competition/submission


# Results of Sentence-level Readability Assessment - Constrained on The Blind Test
{'accuracy': 42.1, 'accuracy+-1': 71.6, 'avg_abs_dist': 1.2, 'qwk': 82.1, 'accuracy_7': 59.9, 'accuracy_5': 65.4, 'accuracy_3': 73.4}