This is a copy and adaptation of Unsloth's Qwen2.5_(7B)-Alpaca.ipynb file
<div class="align-center">
<a href="https://unsloth.ai/"><img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="115"></a>

Visit Unsloth docs for all of their [model uploads](https://docs.unsloth.ai/get-started/all-our-models) and [notebooks](https://docs.unsloth.ai/get-started/unsloth-notebooks).


### Unsloth

In [None]:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model_name = "unsloth/Qwen2.5-7B"
# model_name = "unsloth/Qwen2.5-14B"
# model_name = "unsloth/Qwen2.5-32B"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.4.7: Fast Qwen2 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/106k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/4.72k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.4.7 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


# Data Prep

In [4]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass


from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [5]:
from google.colab import drive
drive.mount('/content/drive')

folder = f"/content/drive/MyDrive/DH"

import random, re, json, os
import pandas as pd
from tqdm import notebook as tqdm
from torch.nn import CosineSimilarity
cos_sim = CosineSimilarity(dim=0, eps=1e-6)
random.seed(42)

Mounted at /content/drive


In [6]:
bible = {}

b_versions = ['AKJV','ODRV','Geneva', 'Douay-Rheims', 'Tyndale', 'Wycliffe','Vulgate']
for bname in b_versions:
    data = pd.read_csv(f"{folder}/Early-Modern-Sermons/assets/Bibles/{bname}.csv",header=None)
    data = data.to_dict(orient="records")
    for entry in tqdm.tqdm(data):
        key = entry[0]
        v_id = key.split(" (")[0]
        text = entry[6].strip()
        if re.search("Douay-Rheims",key):
            if re.sub("Douay-Rheims","ODRV",key) in bible: continue
        if len(text.split(" ")) < 200:
            bible[key] = f"{v_id} {text}"

        parts = re.split(r'(?<=[\.\?]) (?=[A-Z])|(?<=[\!\:\;])', text)
        parts = [re.sub(r'\s+', ' ', p).strip() for p in parts if len(p.strip(" ")) > 0]
        if (len(parts[0].split(" ")) <= 5 or len(parts[-1].split(" ")) <= 5 or re.search(r"\&\w+\;",parts[0])):
            for pidx, p in enumerate(parts): continue
        elif len(parts) > 1:
            for pidx, p in enumerate(parts):
              p_id = f"{key} - {pidx}"
              if len(p) == 0: continue
              if re.search(r"\&\w+\;",p) or len(p.split(" ")) <= 5: continue
              bible[p_id] = f"Part {pidx+1} of {v_id}: {p}"

bible['NonQP 0.0'] = "No Biblical quotation or paraphrase found"

bible_verses = list(bible.values())
bible_ids = list(bible.keys())
verse_to_id = {v:k for k,v in bible.items()}
len(bible_verses)

  0%|          | 0/36702 [00:00<?, ?it/s]

  0%|          | 0/14737 [00:00<?, ?it/s]

  0%|          | 0/31090 [00:00<?, ?it/s]

  0%|          | 0/35811 [00:00<?, ?it/s]

  0%|          | 0/7954 [00:00<?, ?it/s]

  0%|          | 0/9622 [00:00<?, ?it/s]

  0%|          | 0/35809 [00:00<?, ?it/s]

276628

In [7]:
def fix_name(v_id):
  if "1 Kings" in v_id:
    v_id = "3 Kings" + v_id.split("1 Kings")[-1]
  elif "2 Kings" in v_id:
    v_id = "4 Kings" + v_id.split("2 Kings")[-1]
  elif "1 Samuel" in v_id:
    v_id = "1 Kings" + v_id.split("1 Samuel")[-1]
  elif "2 Samuel" in v_id:
    v_id = "2 Kings" + v_id.split("2 Samuel")[-1]
  elif re.search(r"^\d+ Chronicles",v_id):
    v_id = re.sub(r"Chronicles","Paralipomenon",v_id)
  return v_id

In [8]:
with open(f'{folder}/Early-Modern-Sermons/assets/QP_Datasets/parallel_train_test_indices.json') as file:
     training_split = json.load(file)
training_split = {k:{x:None for x in v} for k,v in training_split.items()}

In [9]:
model_checkpoint = f"{folder}/models/EEPS_emanjavacas-MacBERTh_2025-05-05/checkpoint-1560"
model_name = "EEPS_emanjavacas-MacBERTh_2025-05-05_checkpoint-1560"
bible_vectors = torch.load(f'{folder}/EEPS/Bibles_{model_name}.pt')


full_ids = [] # idx to orig idx
full_vectors = []
full_verses = []
idx = 0
for orig_idx, v_id in enumerate(bible_ids):
  if " - " not in v_id: # a full verse
    full_ids.append(v_id)
    full_verses.append(bible_verses[orig_idx])
    full_vectors.append(bible_vectors[orig_idx])
    idx += 1
print(len(full_ids),len(full_verses),len(full_vectors))

id_to_idx = {id:idx for idx, id in enumerate(full_ids)}
verse_to_id = {verse:full_ids[idx] for idx, verse in enumerate(full_verses)}

157008 157008 157008


In [12]:
train = []
test = []
all_data = {}

fps = [f"{folder}/Early-Modern-Sermons/assets/QP_Datasets/parallel_predictions_SAMPLE - ALL.csv"]
for fp in fps:
  data = pd.read_csv(fp).to_dict(orient='records')
  for qp_pair in tqdm.tqdm(data):
    v_id = re.findall(r"^([\w\s]+ \d+\.\d+)",qp_pair['verse_text'])[0]
    v_id = f"{v_id} ({qp_pair['version']})"
    if v_id not in bible:
      if qp_pair['version'] in ['Vulgate', 'Douay-Rheims','ODRV']:
        v_id = fix_name(v_id)
    if v_id not in bible: continue


    v = id_to_idx[v_id]
    if qp_pair['label'] is True:
      label = True
    elif qp_pair['prediction'] is True and qp_pair['label'] is not False:
      label = True
    else:
      label = False

    text = re.sub(r"^([\w\s]+ \d+\.\d+)",'',qp_pair['text']).strip()
    if qp_pair['index'] not in all_data:
        all_data[qp_pair['index']] = {'text': text,
                                    'pos':{}, # verse ids with version
                                  }
    if label:
      all_data[qp_pair['index']]['pos'][v_id] = True

    entry = {
        'instruction': 'You are given pairs of Bible verses from different Bible versions. Verse 1 is always from the English Standard Version. Verse 2 comes from a different version. As an Early Modern English and Latin expert, you determine whether they have similar meanings or very similar language. If Verse 2 is in Latin, compare its translation with Verse 1 to see if it refers to the same idea, even if the numbering might differ. For example, "Psalms 113.2 facta est Judaea sanctificatio ejus; Israel potestas ejus." from the Vulgate is equivalent to "Psalms 114.2 Judah became his sanctuary, Israel his dominion." from the ESV. Return a True/False answer without giving any of your reasoning.',
        'input': f"Input Verse 1 from the ESV: {text}\nInput Verse 2 from the {qp_pair['version']}: {bible_verses[v]}",
        'output':label,
    }
    if qp_pair['index'] not in training_split['Train']:
      if isinstance(qp_pair['prediction'],float): continue
      entry['prediction'] = qp_pair['prediction']
      entry['index'] = qp_pair['index']
      entry['v_id'] = v_id
      test.append(entry)
      continue
    train.append(entry)

  0%|          | 0/13831 [00:00<?, ?it/s]

In [13]:
for index, entry in tqdm.tqdm(all_data.items()):
  if index in training_split['Train']:
    for p in entry['pos']:
      num_neg = 0
      seen = 0
      v = id_to_idx[p]
      p_ver = re.findall(r"\((.*?)\)",p)[0]
      while num_neg <= 5:
        rand_neg = random.choice(range(0,len(full_ids)))
        if "0.0" in full_ids[rand_neg]: continue

        version = re.findall(r"\((.*?)\)",full_ids[rand_neg])[0]
        if version != p_ver: # must be from the same version
          continue
        if full_ids[v].split(".")[0] != full_ids[rand_neg].split(".")[0]: # must be from the same chapter
          continue
        if full_ids[rand_neg] in entry['pos']: continue

        if rand_neg != v:
          rand_neg_sim = float(cos_sim(full_vectors[v],full_vectors[rand_neg]) )
          if rand_neg_sim <= 0.5:
            rand_neg_sim = None
          train.append({
          'instruction': 'You are given pairs of Bible verses from different Bible versions. Verse 1 is always from the English Standard Version. Verse 2 comes from a different version. As an Early Modern English and Latin expert, you determine whether they have similar meanings or very similar language. If Verse 2 is in Latin, compare its translation with Verse 1 to see if it refers to the same idea, even if the numbering might differ. For example, "Psalms 113.2 facta est Judaea sanctificatio ejus; Israel potestas ejus." from the Vulgate is equivalent to "Psalms 114.2 Judah became his sanctuary, Israel his dominion." from the ESV. Return a True/False answer without giving any of your reasoning.',
          'input': f"Input Verse 1 from the ESV: {entry['text']}\nInput Verse 2 from the {version}: {full_verses[rand_neg]}",
          'output':False,
            })
          num_neg += 1
        seen += 1
        if seen >= 10: break
print(len(train))

  0%|          | 0/2500 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
from datasets import Dataset

train = Dataset.from_list(train)
train = train.map(formatting_prompts_func, batched = True,)

In [None]:
train[0]

<a name="Train"></a>
### Train the model
Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

<a name="Inference"></a>
### Inference
Let's run the model! You can change the instruction and input - leave the output blank!

**[NEW] Try 2x faster inference in a free Colab for Llama-3.1 8b Instruct [here](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Unsloth_Studio.ipynb)**

In [None]:
inputfname = "parallel_predictions_SAMPLE_test"
fname = f"{inputfname}"
# model_name = "EEPS_Lora_unsloth_Qwen2.5_7B"
# model_name = "unsloth_Qwen2.5_7B"
# model_name = "unsloth_Qwen2.5_14B"
# model_name = "EEPS_Lora_unsloth_Qwen2.5_14B"
model_name = "EEPS_Checkpoint-1560_Lora_unsloth_Qwen2.5_7B"

fname, model_name, len(test), test[0]

In [None]:
labels = {}
for idx, qp_pair in enumerate(tqdm.tqdm(test)):
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference
    inputs = tokenizer(
    [
        alpaca_prompt.format(
            qp_pair['instruction'], # instruction
            qp_pair['input'], # input
            "", # output
        )
    ], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
    result = tokenizer.batch_decode(outputs)
    r = result[0].split("Response:")[-1].strip()
    r = re.sub(r"\<\|endoftext\|\>",'',r)
    labels[f"{qp_pair['index']},{qp_pair['v_id']}"] = r
    break
#     if (idx +1) % 1000 == 0:
#       with open(f"{folder}/QP/predictions/{fname}_{model_name}.json",'w+') as f:
#           json.dump(labels,f)

# with open(f"{folder}/QP/predictions/{fname}_{model_name}.json",'w+') as f:
#     json.dump(labels,f)

In [None]:
labels

<a name="Save"></a>
### Saving, loading finetuned models
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.

**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

In [None]:
model.save_pretrained(f"{folder}/lora_model")  # Local saving
tokenizer.save_pretrained(f"{folder}/lora_model")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

('/content/drive/MyDrive/DH/lora_model/tokenizer_config.json',
 '/content/drive/MyDrive/DH/lora_model/special_tokens_map.json',
 '/content/drive/MyDrive/DH/lora_model/vocab.txt',
 '/content/drive/MyDrive/DH/lora_model/added_tokens.json',
 '/content/drive/MyDrive/DH/lora_model/tokenizer.json')

Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:

In [None]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = f"{folder}/lora_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference


inputs = tokenizer(
[
    alpaca_prompt.format(
        "What is a famous tall tower in Paris?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

# Evaluate LLM Performance

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, f1_score, cohen_kappa_score,recall_score
from collections import defaultdict


def new_label(label):
    if str(label).capitalize() == 'True':
        return 1
    elif str(label).capitalize() == 'False':
        return 0
    else:
        return None

def most_confused(category, model, function=None):
  # the label for a feature that contributes most to the model's misclassifications
  df = pd.DataFrame(annotations)

  if category == 'Length Disparity':
     df[category] = df['length_disparity']
  elif category == "Ground Label":
    df[category] = df['label']
  else:
    df[category] = df['verse_id'].apply(function)

  df = df[df[category].notnull()]
  df['misclassified'] = df['label'] != df[model]
  total_misclassified = df['misclassified'].sum()
  def misclassification(group):
    p = (group['misclassified'].sum() / len(group)) * 100
    # p = (group['misclassified'].sum() / total_misclassified) * 100
    return round(p,2)
  print('---------------------------\n',(
      df.groupby(category)
      .apply(misclassification,include_groups=False)
      .sort_values(ascending=False)
      .head(10)
  ))

# Features: Book, Chapter, Version, isPart, length_disparity, isNonQP
# what is the base fine-tuned LLM most confused about?

def get_book(v_id):
  if " (" not in str(v_id):
    return None
  v_id = v_id.split(" - ")[0].split(" (")[0]
  v_id = re.sub(" \d+\.\d+","",v_id)
  return v_id

def get_chapter(v_id):
  if " (" not in str(v_id):
    return None
  v_id = v_id.split(" - ")[0].split(" (")[0]
  v_id = re.sub("\.\d+","",v_id)
  return v_id

def get_version(v_id):
  if " (" not in str(v_id):
    return None
  if " (" not in v_id: return None
  return re.findall(r"\((.*?)\)",v_id)[0]

def isPart(v_id):
  if " - " in str(v_id):
    return True
  return False

def isNonQP(v_id):
  if " (" not in str(v_id):
    return True
  return False

def get_Bible_part(v_id):
  if " (" not in str(v_id):
    return None
  if v_id not in bible:
    return None
  return bible[v_id][1]


def get_eval_report(verbose=False ):

  scores = {
      "Model": [],
      "Accuracy": [],
      "Recall":[],
      "Precision": [],
      "F1_Score": [],
      "Cohen's_Kappa":[]
  }
  for pred_name, pred_labels in model_to_pred.items():
      if len(ground) == len(pred_labels):
          pred_name = pred_name.split("_P")[0]
          Accuracy = round(accuracy_score(ground, pred_labels),4)
          Recall = round(recall_score(ground, pred_labels),4)
          Precision = round(precision_score(ground, pred_labels),4)
          F1_Score = round(f1_score(ground, pred_labels),4)
          kappa = round(cohen_kappa_score(ground,pred_labels),4)
          scores["Model"].append(pred_name)
          scores["Accuracy"].append(Accuracy)
          scores["Recall"].append(Recall)
          scores["Precision"].append(Precision)
          scores["F1_Score"].append(F1_Score)
          scores["Cohen's_Kappa"].append(kappa)

  print(inputfname)
  print("----------------------------------------------------------------------------------")
  df = pd.DataFrame(scores)
  print(df.to_string(index=False))
  print("----------------------------------------------------------------------------------")
  if verbose:
    for model in model_to_pred:
      print(f"\n\n#####################################\n{model} Confusion")
      most_confused('Bible Part',model,get_Bible_part)
      most_confused('Version',model,get_version)
      most_confused('Book',model,get_book)
      most_confused('Length Disparity',model)
      most_confused('isPart',model,isPart)
      most_confused('isNonQP',model,isNonQP)



In [None]:
models = [
        "EEPS_Lora_unsloth_Qwen2.5_7B",
        "unsloth_Qwen2.5_7B",
        "EEPS_Lora_unsloth_Qwen2.5_14B",
        "unsloth_Qwen2.5_14B",
        'DSV3',
      ]


annotations = []
ground = []
verses = []
model_to_pred = {x:[] for x in models}


predictions = {}
for m in models:
  if m == 'DSV3':
    for entry in test:
      key = f"{entry['index']},{entry['v_id']}"
      pred = entry['prediction']
      if key not in predictions:
        predictions[key] = {}
      predictions[key][m] = pred
  else:
    with open(f"{folder}/QP/predictions/{inputfname}_{m}.json","r") as fp:
      preds = json.load(fp)
    for key, pred in preds.items():
      if key not in predictions:
        predictions[key] = {}
      predictions[key][m] = pred

num_false = 0
for idx, entry in enumerate(test):
    key = f"{entry['index']},{entry['v_id']}"
    if key not in predictions: continue
    if len(predictions[key]) != len(models): continue

    ground.append(new_label(entry['output']))
    if entry['output'] is False:
      num_false += 1
    for m in models:
      model_to_pred[m].append(new_label(predictions[key][m]))
    annotations.append(entry)

print(len(ground), num_false)
get_eval_report()

1221 192


ValueError: Classification metrics can't handle a mix of binary and unknown targets

In [None]:
from google.colab import runtime
runtime.unassign()