In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import os
import random


In [2]:
def add_noise(matrix, noise_level=0.2):
    """Introduce noise in matrix by flipping values randomly."""
    noisy_matrix = [row[:] for row in matrix]  
    num_changes = int(noise_level * len(noisy_matrix) * len(noisy_matrix[0]))

    for _ in range(num_changes):
        row_idx = random.randint(0, len(noisy_matrix) - 1)
        col_idx = random.randint(0, len(noisy_matrix[0]) - 1)
        noisy_matrix[row_idx][col_idx] = random.randint(0, 9)  # Random value

    return noisy_matrix


In [3]:
data_dir = "json_files/training"
dataset = []

for file in os.listdir(data_dir):
    file_path = os.path.join(data_dir, file)

    if file.endswith(".json"):
        with open(file_path, "r") as f:
            data = json.load(f)

        train_examples = [(example["input"], example["output"]) for example in data["train"]]
        test_input = data["test"][0]["input"]
        correct_output = data["test"][0]["output"]

        candidates = [{"candidate_output": correct_output, "confidence": 1.0}]
        for i, noise in enumerate([0.2, 0.4, 0.6, 0.8]):
            noisy_output = add_noise(correct_output, noise_level=noise)
            candidates.append({"candidate_output": noisy_output, "confidence": round(1 - noise, 2)})

        dataset.append({
            "train_examples": train_examples,
            "test_input": test_input,
            "candidates": candidates
        })

print(f"✅ Dataset Created! Total Examples: {len(dataset)}")


✅ Dataset Created! Total Examples: 400


In [4]:
tokenizer = AutoTokenizer.from_pretrained("google/bigbird-roberta-base")

def tokenize_data(example):
    """
    Tokenizes train_examples, test_input, and candidates.
    """
    train_encodings = [tokenizer(json.dumps(pair), padding="max_length", truncation=True, max_length=4096) for pair in example["train_examples"]]
    test_encoding = tokenizer(json.dumps(example["test_input"]), padding="max_length", truncation=True, max_length=4096)
    candidate_encodings = [tokenizer(json.dumps(cand["candidate_output"]), padding="max_length", truncation=True, max_length=4096) for cand in example["candidates"]]

    return {
        "train_input_ids": torch.tensor([enc["input_ids"] for enc in train_encodings]),
        "train_attention_mask": torch.tensor([enc["attention_mask"] for enc in train_encodings]),
        "test_input_ids": torch.tensor(test_encoding["input_ids"]),
        "test_attention_mask": torch.tensor(test_encoding["attention_mask"]),
        "candidate_input_ids": torch.tensor([enc["input_ids"] for enc in candidate_encodings]),
        "candidate_attention_mask": torch.tensor([enc["attention_mask"] for enc in candidate_encodings]),
        "labels": torch.tensor([cand["confidence"] for cand in example["candidates"]])
    }

# Apply tokenization
tokenized_dataset = [tokenize_data(ex) for ex in dataset]

print("✅ Tokenization Completed!")


✅ Tokenization Completed!


In [5]:
class CandidateDataset(Dataset):
    def __init__(self, tokenized_data):
        self.data = tokenized_data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return {
            "test_input_ids": self.data[idx]["test_input_ids"],
            "test_attention_mask": self.data[idx]["test_attention_mask"],
            "candidate_input_ids": self.data[idx]["candidate_input_ids"],
            "candidate_attention_mask": self.data[idx]["candidate_attention_mask"],
            "labels": self.data[idx]["labels"]
        }

train_dataset = CandidateDataset(tokenized_dataset)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

print(f"✅ Dataset Ready! Total Samples: {len(train_dataset)}")


✅ Dataset Ready! Total Samples: 400


In [6]:
model = AutoModelForSequenceClassification.from_pretrained(
    "google/bigbird-roberta-base",
    num_labels=1  
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.MSELoss()

print(f"✅ Model Loaded on {device}")


2025-02-19 00:16:29.709294: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-19 00:16:30.078577: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-19 00:16:30.078640: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-19 00:16:30.078679: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-19 00:16:30.225819: I tensorflow/core/platform/cpu_feature_g

✅ Model Loaded on cuda


In [7]:
EPOCHS = 2  

for epoch in range(EPOCHS):
    print(f"\n🔄 Epoch {epoch+1}/{EPOCHS}")
    
    model.train()
    total_loss = 0

    for batch in train_dataloader:
        optimizer.zero_grad()

        input_ids = batch["candidate_input_ids"].to(device).squeeze(0)
        attention_mask = batch["candidate_attention_mask"].to(device).squeeze(0)
        labels = batch["labels"].to(device).unsqueeze(1)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs.logits, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"✅ Epoch {epoch+1} Completed - Loss: {total_loss / len(train_dataloader)}")

print("🎉 Training Completed Successfully!")



🔄 Epoch 1/2


OutOfMemoryError: CUDA out of memory. Tried to allocate 60.00 MiB. GPU 0 has a total capacty of 31.73 GiB of which 22.25 MiB is free. Including non-PyTorch memory, this process has 31.68 GiB memory in use. Of the allocated memory 29.90 GiB is allocated by PyTorch, and 1.42 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [8]:
import torch
torch.cuda.empty_cache()


In [9]:
EPOCHS = 2  
gradient_accumulation_steps = 4  # Reduce effective batch size

scaler = torch.cuda.amp.GradScaler()  # Mixed Precision

for epoch in range(EPOCHS):
    print(f"\n🔄 Epoch {epoch+1}/{EPOCHS}")

    model.train()
    total_loss = 0

    for step, batch in enumerate(train_dataloader):
        optimizer.zero_grad()

        input_ids = batch["candidate_input_ids"].to(device).squeeze(0)
        attention_mask = batch["candidate_attention_mask"].to(device).squeeze(0)
        labels = batch["labels"].to(device).unsqueeze(1)

        with torch.cuda.amp.autocast():  # Enable mixed precision training
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs.logits, labels)

        scaler.scale(loss).backward()

        if (step + 1) % gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()

        total_loss += loss.item()

    print(f"✅ Epoch {epoch+1} Completed - Loss: {total_loss / len(train_dataloader)}")

print("🎉 Training Completed Successfully!")



🔄 Epoch 1/2


OutOfMemoryError: CUDA out of memory. Tried to allocate 60.00 MiB. GPU 0 has a total capacty of 31.73 GiB of which 22.25 MiB is free. Including non-PyTorch memory, this process has 31.68 GiB memory in use. Of the allocated memory 29.92 GiB is allocated by PyTorch, and 1.39 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [11]:
tokenizer = AutoTokenizer.from_pretrained("google/bigbird-roberta-base")

def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=MAX_LENGTH)

dataset = dataset.map(tokenize_function, batched=True)


AttributeError: 'list' object has no attribute 'map'

In [12]:
print(f"Type of dataset: {type(dataset)}")
print(f"First 3 entries: {dataset[:3]}")


Type of dataset: <class 'list'>
First 3 entries: [{'train_examples': [([[0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [6, 0, 0, 8, 0, 6, 0, 8, 0, 0, 6], [0, 0, 6, 8, 0, 0, 0, 8, 0, 6, 0], [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], [0, 6, 0, 8, 0, 0, 6, 8, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [0, 6, 0, 8, 0, 0, 0, 8, 6, 0, 0], [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], [0, 0, 6, 8, 0, 0, 0, 8, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 8, 0, 6, 0], [0, 0, 0, 8, 6, 0, 0, 8, 0, 0, 0]], [[1, 0, 1], [1, 0, 0], [0, 0, 0]]), ([[6, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [0, 0, 0, 8, 0, 0, 6, 8, 0, 0, 6], [0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], [6, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 8, 0, 6, 0], [0, 0, 0, 8, 0, 0, 6, 8, 6, 0, 0], [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], [0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [6, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0], [0, 6, 0, 8, 0, 6, 0, 8, 0, 0, 6]], [[0, 0, 0], [0, 0, 1], [1, 0, 0]]), ([[0, 0, 0, 8, 0, 6, 0, 8, 0, 0, 6], [0, 0, 0, 8, 0, 0, 0, 8, 0, 6, 0], [0, 6, 0, 

In [14]:
# Ensure dataset is a list
assert isinstance(dataset, list), "Dataset should be a list of dictionaries!"

tokenized_data = []
for entry in dataset:
    tokenized_entry = {
        "train_input_ids": tokenizer(str(entry["train_examples"]), padding="max_length", truncation=True, max_length=4096)["input_ids"],
        "train_attention_mask": tokenizer(str(entry["train_examples"]), padding="max_length", truncation=True, max_length=4096)["attention_mask"],
        "test_input_ids": tokenizer(str(entry["test_input"]), padding="max_length", truncation=True, max_length=4096)["input_ids"],
        "test_attention_mask": tokenizer(str(entry["test_input"]), padding="max_length", truncation=True, max_length=4096)["attention_mask"],
        "candidate_input_ids": [tokenizer(str(cand["candidate_output"]), padding="max_length", truncation=True, max_length=4096)["input_ids"] for cand in entry["candidates"]],
        "candidate_attention_mask": [tokenizer(str(cand["candidate_output"]), padding="max_length", truncation=True, max_length=4096)["attention_mask"] for cand in entry["candidates"]],
        "labels": [cand["confidence"] for cand in entry["candidates"]]  # Confidence scores as labels
    }
    tokenized_data.append(tokenized_entry)

# Convert list to final dataset format
tokenized_dataset = tokenized_data
print("✅ Tokenization Successful!")


✅ Tokenization Successful!


In [15]:
import torch

# Convert to PyTorch tensors
for entry in tokenized_dataset:
    entry["train_input_ids"] = torch.tensor(entry["train_input_ids"])
    entry["train_attention_mask"] = torch.tensor(entry["train_attention_mask"])
    entry["test_input_ids"] = torch.tensor(entry["test_input_ids"])
    entry["test_attention_mask"] = torch.tensor(entry["test_attention_mask"])
    entry["candidate_input_ids"] = torch.tensor(entry["candidate_input_ids"])
    entry["candidate_attention_mask"] = torch.tensor(entry["candidate_attention_mask"])
    entry["labels"] = torch.tensor(entry["labels"], dtype=torch.float32)  # Ensure float for regression

print("✅ Dataset converted to tensors!")


✅ Dataset converted to tensors!


In [16]:
from torch.utils.data import Dataset

class CandidateRankingDataset(Dataset):
    def __init__(self, data):
        self.data = data  # Tokenized dataset

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        return {
            "input_ids": entry["test_input_ids"],  # Test input as main input
            "attention_mask": entry["test_attention_mask"],  # Corresponding attention mask
            "candidate_input_ids": entry["candidate_input_ids"],  # Candidate solutions
            "candidate_attention_mask": entry["candidate_attention_mask"],
            "labels": entry["labels"],  # Confidence scores
        }

# Create dataset object
train_dataset = CandidateRankingDataset(tokenized_dataset)

print("✅ PyTorch dataset created!")


✅ PyTorch dataset created!


In [17]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

# Load BigBird model for ranking
model = AutoModelForSequenceClassification.from_pretrained(
    "google/bigbird-roberta-base",
    num_labels=1  # Single output per candidate
)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("✅ Model loaded on:", device)


Some weights of BigBirdForSequenceClassification were not initialized from the model checkpoint at google/bigbird-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model loaded on: cuda


In [18]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,  # Adjust based on GPU memory
    gradient_accumulation_steps=4,  # Accumulate gradients for stability
    fp16=True,  # Enable mixed precision
    evaluation_strategy="no",
    save_strategy="epoch",
    logging_dir="./logs",
    report_to="none",  # Disable WandB logging for now
)

print("✅ Training arguments set!")


✅ Training arguments set!


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [19]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)

# Start Training
trainer.train()

print("✅ Training Completed Successfully!")


  trainer = Trainer(
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/parallel/parallel_apply.py", line 85, in _worker
    output = module(*input, **kwargs)
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/pfs/data5/home/ma/ma_ma/ma_abthomas/llama-env/lib64/python3.9/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2749, in forward
    loss = loss_fct(logits.squeeze(), labels.squeeze())
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/modules/loss.py", line 535, in forward
    return F.mse_loss(input, target, reduction=self.reduction)
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/nn/functional.py", line 3328, in mse_loss
    expanded_input, expanded_target = torch.broadcast_tensors(input, target)
  File "/opt/bwhpc/common/jupyter/tensorflow/2023-10-10/lib/python3.9/site-packages/torch/functional.py", line 73, in broadcast_tensors
    return _VF.broadcast_tensors(tensors)  # type: ignore[attr-defined]
RuntimeError: The size of tensor a (2) must match the size of tensor b (5) at non-singleton dimension 1


In [20]:
for batch in train_dataset:
    print("🔹 Test Input Shape:", batch["input_ids"].shape)  # Shape of test input
    print("🔹 Candidate Input Shape:", batch["candidate_input_ids"].shape)  # Candidate solutions
    print("🔹 Labels Shape:", batch["labels"].shape)  # Confidence scores
    break  # Only print for the first batch


🔹 Test Input Shape: torch.Size([4096])
🔹 Candidate Input Shape: torch.Size([5, 4096])
🔹 Labels Shape: torch.Size([5])


In [21]:
class FlattenedCandidateDataset(Dataset):
    def __init__(self, data):
        self.flattened_data = []
        for entry in data:
            for i in range(len(entry["candidate_input_ids"])):  # Iterate over candidates
                self.flattened_data.append({
                    "input_ids": torch.tensor(entry["test_input_ids"]),  # Same test input
                    "attention_mask": torch.tensor(entry["test_attention_mask"]),
                    "candidate_input_ids": torch.tensor(entry["candidate_input_ids"][i]),  # Single candidate
                    "candidate_attention_mask": torch.tensor(entry["candidate_attention_mask"][i]),
                    "labels": torch.tensor(entry["labels"][i], dtype=torch.float32)  # Single confidence score
                })

    def __len__(self):
        return len(self.flattened_data)

    def __getitem__(self, idx):
        return self.flattened_data[idx]

# Create dataset
train_dataset = FlattenedCandidateDataset(tokenized_dataset)
print("✅ Flattened Dataset Created!")


✅ Flattened Dataset Created!


  "input_ids": torch.tensor(entry["test_input_ids"]),  # Same test input
  "attention_mask": torch.tensor(entry["test_attention_mask"]),
  "candidate_input_ids": torch.tensor(entry["candidate_input_ids"][i]),  # Single candidate
  "candidate_attention_mask": torch.tensor(entry["candidate_attention_mask"][i]),
  "labels": torch.tensor(entry["labels"][i], dtype=torch.float32)  # Single confidence score


In [22]:
for batch in train_dataset:
    print("Test Input Shape:", batch["input_ids"].shape)  # Should be [4096]
    print("Candidate Input Shape:", batch["candidate_input_ids"].shape)  # Should be [4096] (single candidate)
    print("Labels Shape:", batch["labels"].shape)  # Should be a single value
    break


Test Input Shape: torch.Size([4096])
Candidate Input Shape: torch.Size([4096])
Labels Shape: torch.Size([])


In [23]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)

trainer.train()


  trainer = Trainer(
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss




TrainOutput(global_step=375, training_loss=0.3571114095052083, metrics={'train_runtime': 1590.7762, 'train_samples_per_second': 3.772, 'train_steps_per_second': 0.236, 'total_flos': 1.2716303597568e+16, 'train_loss': 0.3571114095052083, 'epoch': 3.0})

In [24]:
from huggingface_hub import notebook_login

notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [25]:
repo_name = "abhay-thomas/bigbird-ranking-model"  # Change this!


In [26]:
from transformers import AutoModelForSequenceClassification

# Define your save directory
save_directory = "./bigbird_ranking_model"

# Save the trained model and tokenizer
trainer.save_model(save_directory)
tokenizer.save_pretrained(save_directory)

print("✅ Model saved successfully!")


✅ Model saved successfully!


In [27]:
from huggingface_hub import HfApi

api = HfApi()
api.create_repo(repo_name, exist_ok=True)

# Upload the entire directory to the repo
from huggingface_hub import upload_folder

upload_folder(
    folder_path=save_directory,
    repo_id=repo_name,
    commit_message="Uploading trained BigBird ranking model"
)

print(f"🚀 Model successfully uploaded to: https://huggingface.co/{repo_name}")


model.safetensors:   0%|          | 0.00/512M [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/846k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

🚀 Model successfully uploaded to: https://huggingface.co/abhay-thomas/bigbird-ranking-model


In [28]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load Model and Tokenizer
model_name = "abhay-thomas/bigbird-ranking-model"  # Replace with your actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("✅ Model loaded successfully on:", device)


tokenizer_config.json:   0%|          | 0.00/17.9k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/846k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.48M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/960 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/512M [00:00<?, ?B/s]

✅ Model loaded successfully on: cuda


In [29]:
# Example Test Input
test_input = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]

# Candidate Solutions (one correct, others slightly incorrect)
candidate_solutions = [
    [[1, 0, 1], [0, 1, 0], [1, 0, 1]],  # ✅ Correct
    [[1, 1, 1], [0, 1, 0], [1, 0, 1]],  # ❌ Noise added
    [[0, 0, 0], [0, 1, 0], [1, 0, 1]],  # ❌ Another noisy version
]

# Tokenize Test Input
test_encodings = tokenizer(str(test_input), padding="max_length", truncation=True, max_length=4096, return_tensors="pt")

# Tokenize Candidate Solutions
candidate_encodings = tokenizer([str(c) for c in candidate_solutions], padding="max_length", truncation=True, max_length=4096, return_tensors="pt")

# Move to device
test_encodings = {k: v.to(device) for k, v in test_encodings.items()}
candidate_encodings = {k: v.to(device) for k, v in candidate_encodings.items()}


In [31]:
# Find the candidate with the highest score
best_index = confidence_scores.index(max(confidence_scores))
best_solution = candidate_solutions[best_index]

print("\n🏆 Selected Best Solution:")
print(best_solution)



🏆 Selected Best Solution:
[[1, 1, 1], [0, 1, 0], [1, 0, 1]]
