#### Install Dependencies

In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
#import torch
#if torch.cuda.get_device_capability()[0] >= 8:
#    !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"

In [None]:
!pip install git+https://github.com/huggingface/transformers

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-2t9dueqe
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-2t9dueqe
  Resolved https://github.com/huggingface/transformers to commit 2801d7bcf6ebb256ad807f41815a0a2a50e83f0e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tokenizers<0.21,>=0.20 (from transformers==4.47.0.dev0)
  Downloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m81.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: transformers
  

#### Mount Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd drive/MyDrive/mistral_unsloth_4

Mounted at /content/drive
/content/drive/MyDrive/mistral_unsloth_4


#### Path

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 1024 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


#model_name = "unsloth/mistral-7b"
model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
#model_name = "unsloth/Mistral-Nemo-Instruct-2407"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.7: Fast Mistral patching. Transformers = 4.47.0.dev0.
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/157 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, #Original: 16, # Best: 256
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj", ], # New:  "lora_magnitude_vector"
    lora_alpha = 16, # Original # Best: 128
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
    #use_dora=True # Comment it out
    #lora_query=True, # Comment it out not the original setting.
    #lora_value=True # Comment it out not the original setting.

)

Unsloth: Already have LoRA adapters! We shall skip this step.


#### Data Preparation

In [None]:
import pandas as pd
from datasets import Dataset

#data_type='Original'
data_type='Synthetic'

# Load Data
#df = pd.read_excel('LR_Dataset_Original_Sythetic_Final.xlsx')
df = pd.read_excel('LR_Dataset_Original_Sythetic_Experiment_70.xlsx') # 20% of the training and validation sets


if data_type=='Original':
 df = df[df['Source'] == 'Original'] # Only original

df= df[['Sentence','Category' ,'Classification']]

df['Sentence'] = df['Sentence'].str.capitalize()
df=df.sample(frac=1).reset_index(drop=True)
df['Sentence'] = (df['Sentence']
                  .str.strip()
                  .str.replace(r'\n|\r', ' ', regex=True)
                  .str.replace(r'\s{2,}', ' ', regex=True))
df


Unnamed: 0,Sentence,Category,Classification
0,"Furthermore, people believed that avs should h...",RESULT,TRAINING
1,"The relationship between increased co2, sea-ic...",RESEARCH GAP,TRAINING
2,Since these last names are not gender-specific...,LIMITATION,TRAINING
3,"In conclusion, the theoretical grasp of uncons...",OVERALL,TRAINING
4,"According to the findings of li et al. [21], t...",RESULT,VALIDATION
...,...,...,...
1955,The content is solely the responsibility of th...,OTHER,TRAINING
1956,Another limitation of this work is that our ge...,LIMITATION,TRAINING
1957,The algorithms suggested in [9] are subjected ...,EXTENSION,TRAINING
1958,Our methodology allows two interconnected syst...,EXTENSION,TRAINING


In [None]:
train_df =df[df['Classification'] == 'TRAINING']
#train_df =train_df.shuffle(seed=42)
val_df =df[df['Classification'] == 'VALIDATION']
#eval_df =eval_df.shuffle(seed=42)
test_df = df[df['Classification'] == 'TEST']
#test_df =test_df.shuffle(seed=42)

In [None]:
print(train_df.columns)

Index(['Sentence', 'Category', 'Classification'], dtype='object')


In [None]:
print(train_df.head(1))

                                            Sentence Category Classification
0  Furthermore, people believed that avs should h...   RESULT       TRAINING


In [None]:
def create_conversation(row):
    sentence = row['Sentence']
    answer = row['Category']

    human = (
                "You are a researcher that should assign a classification to a sentence from scientific articles, choosing from one of the following seven categories. Each category corresponds to a specific aspect of scientific discourse, either related to a topic or a study. A topic is defined as a scientific domain, such as “Computer Science” or “Machine  Learning”. A previous study refers to a prior paper on the topic.\n"
                "Categories:\n "
                "1. OVERALL: Describes, introduces, classifies, or defines research topics often based on the discussion of multiple previous studies together.\n "
                "2. RESEARCH GAP: Highlights the need for further research within the topic.\n"
                "3. DESCRIPTION: Outlines the objectives, methodology, or design of one previous study, without mentioning results.\n"
                "4. RESULT: Describes specific findings or outcomes drawn from previous studies. This category includes empirical results, theoretical insights, and observed patterns reported by researchers. It often uses verbs like “showed”, “found”, “demonstrated”, and “observed” or phrases like “the findings indicate”.\n"
                "5. LIMITATION: Describes a constraint, challenge, or weakness inherent in the methodology of a previous study that hinders generalizability or reliability in a previous study.\n"
                "6. EXTENSION: Describes how the current study addresses or extends previous studies by stating the overall idea, contrasting ideas or elaborating further ideas. It usually uses the words “we” or “our”.\n"
                "7. OTHER: Any text that does not fit the above categories.\n"
                "Procedure:\n"
                "1. Determine whether the subject of the sentence is a topic or a study.\n"
                "2. Identify the most suitable category based on the content. Do not create new categories. Use the categories given above.\n"
                "3. Provide the category number that best fits the sentence. Just provide the category number without any explanation.\n"

                f"Sentence: {sentence}.\n"
            )

    gpt = f"Classification: {answer}"



    return [
        {"from": "human", "value": human},
        {"from": "gpt", "value": gpt},
    ]

In [None]:
def create_conversation_test(row):
    sentence = row['Sentence']


    human = (
                "You are a researcher that should assign a classification to a sentence from scientific articles, choosing from one of the following seven categories. Each category corresponds to a specific aspect of scientific discourse, either related to a topic or a study. A topic is defined as a scientific domain, such as “Computer Science” or “Machine  Learning”. A previous study refers to a prior paper on the topic.\n"
                "Categories:\n "
                "1. OVERALL: Describes, introduces, classifies, or defines research topics often based on the discussion of multiple previous studies together.\n "
                "2. RESEARCH GAP: Highlights the need for further research within the topic.\n"
                "3. DESCRIPTION: Outlines the objectives, methodology, or design of one previous study, without mentioning results.\n"
                "4. RESULT: Describes specific findings or outcomes drawn from previous studies. This category includes empirical results, theoretical insights, and observed patterns reported by researchers. It often uses verbs like “showed”, “found”, “demonstrated”, and “observed” or phrases like “the findings indicate”.\n"
                "5. LIMITATION: Describes a constraint, challenge, or weakness inherent in the methodology of a previous study that hinders generalizability or reliability in a previous study.\n"
                "6. EXTENSION: Describes how the current study addresses or extends previous studies by stating the overall idea, contrasting ideas or elaborating further ideas. It usually uses the words “we” or “our”.\n"
                "7. OTHER: Any text that does not fit the above categories.\n"
                "Procedure:\n"
                "1. Determine whether the subject of the setence is a topic or a study.\n"
                "2. Identify the most suitable category based on the content. Do not create new categories. Use the categories given above.\n"
                "3. Provide the category number that best fits the sentence. Just provide the category number without any explanation.\n"

                f"Sentence: {sentence}.\n"
            )


    return [
        {"from": "human", "value": human},

    ]


In [None]:
train_df['conversations'] = train_df.apply(create_conversation, axis=1)
val_df['conversations'] = val_df.apply(create_conversation, axis=1)
test_df['conversations'] = test_df.apply(create_conversation_test, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['conversations'] = train_df.apply(create_conversation, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_df['conversations'] = val_df.apply(create_conversation, axis=1)


In [None]:
train_df['conversations']
#val_df['conversations']
#test_df['conversations']


Unnamed: 0,conversations
0,"[{'from': 'human', 'value': 'You are a researc..."
1,"[{'from': 'human', 'value': 'You are a researc..."
2,"[{'from': 'human', 'value': 'You are a researc..."
3,"[{'from': 'human', 'value': 'You are a researc..."
5,"[{'from': 'human', 'value': 'You are a researc..."
...,...
1955,"[{'from': 'human', 'value': 'You are a researc..."
1956,"[{'from': 'human', 'value': 'You are a researc..."
1957,"[{'from': 'human', 'value': 'You are a researc..."
1958,"[{'from': 'human', 'value': 'You are a researc..."


In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "mistral", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

In [None]:
from datasets import load_dataset

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)
val_dataset = val_dataset.map(formatting_prompts_func, batched = True,)
test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/1715 [00:00<?, ? examples/s]

Map:   0%|          | 0/245 [00:00<?, ? examples/s]

In [None]:
print(train_dataset[97]["text"])

<s>[INST] You are a researcher that should assign a classification to a sentence from scientific articles, choosing from one of the following seven categories. Each category corresponds to a specific aspect of scientific discourse, either related to a topic or a study. A topic is defined as a scientific domain, such as “Computer Science” or “Machine  Learning”. A previous study refers to a prior paper on the topic.
Categories:
 1. OVERALL: Describes, introduces, classifies, or defines research topics often based on the discussion of multiple previous studies together.
 2. RESEARCH GAP: Highlights the need for further research within the topic.
3. DESCRIPTION: Outlines the objectives, methodology, or design of one previous study, without mentioning results.
4. RESULT: Describes specific findings or outcomes drawn from previous studies. This category includes empirical results, theoretical insights, and observed patterns reported by researchers. It often uses verbs like “showed”, “found”

#### Train the model

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback
from unsloth import is_bfloat16_supported


# Project and Model Setup
project = "lro-finetune"
base_model_name = "Mistral-Nemo-Instruct"
run_name = base_model_name + "-" + project
output_directory = "./" + run_name

early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=2,  # Numero di valutazioni senza miglioramenti prima di fermare
    early_stopping_threshold=0.01  # Soglia di miglioramento minimo
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        per_device_eval_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60, # Orginal 60
        #eval_strategy= "steps",  # Can also be "epoch"
        evaluation_strategy = "steps",  # Can also be "epoch"
        eval_steps = 10,  # Evaluate every 10 steps
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,# Original: 0.01
        lr_scheduler_type = "linear",
        seed = 3407,
        #load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        output_dir = output_directory,
        report_to = "none",
    ),
    #callbacks = [early_stopping_callback]# Delete the , and the code below for original setting.
    #neftune_noise_alpha=5   # Add noise to embeddings

)



Map (num_proc=2):   0%|          | 0/1715 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/245 [00:00<?, ? examples/s]

  super().__init__(
max_steps is given, it will override any value given in num_train_epochs


In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA L4. Max memory = 22.168 GB.
4.52 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()



==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,715 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss,Validation Loss
10,0.1096,0.375423
20,0.0772,0.295332
30,0.0736,0.284818
40,0.0639,0.277988
50,0.0681,0.272823
60,0.0666,0.271265


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

1332.2437 seconds used for training.
22.2 minutes used for training.
Peak reserved memory = 7.172 GB.
Peak reserved memory for training = 2.652 GB.
Peak reserved memory % of max memory = 32.353 %.
Peak reserved memory for training % of max memory = 11.963 %.


#### Save the model

In [None]:
model.save_pretrained(output_directory)
tokenizer.save_pretrained(output_directory)

('./Mistral-Nemo-Instruct-lro-finetune/tokenizer_config.json',
 './Mistral-Nemo-Instruct-lro-finetune/special_tokens_map.json',
 './Mistral-Nemo-Instruct-lro-finetune/tokenizer.model',
 './Mistral-Nemo-Instruct-lro-finetune/added_tokens.json',
 './Mistral-Nemo-Instruct-lro-finetune/tokenizer.json')

In [None]:
test_df_converted = test_dataset.to_pandas()
test_df_converted

Unnamed: 0,Sentence,Category,Classification,conversations,__index_level_0__,text
0,In order to reflect the users complete spectru...,DESCRIPTION,TEST,"[{'from': 'human', 'value': 'You are a researc...",2,<s>[INST] You are a researcher that should ass...
1,This procedure was approved by the ethics boar...,OTHER,TEST,"[{'from': 'human', 'value': 'You are a researc...",6,<s>[INST] You are a researcher that should ass...
2,"Motivated by these problems, automated graph l...",OVERALL,TEST,"[{'from': 'human', 'value': 'You are a researc...",7,<s>[INST] You are a researcher that should ass...
3,Despite progress in automated taxonomy creatio...,RESEARCH GAP,TEST,"[{'from': 'human', 'value': 'You are a researc...",8,<s>[INST] You are a researcher that should ass...
4,Mcauley et al. [21] mine key attributes from t...,DESCRIPTION,TEST,"[{'from': 'human', 'value': 'You are a researc...",12,<s>[INST] You are a researcher that should ass...
...,...,...,...,...,...,...
135,Since its introduction as a model to identify ...,OVERALL,TEST,"[{'from': 'human', 'value': 'You are a researc...",680,<s>[INST] You are a researcher that should ass...
136,Therefore alignment methods are needed that ad...,RESEARCH GAP,TEST,"[{'from': 'human', 'value': 'You are a researc...",687,<s>[INST] You are a researcher that should ass...
137,"Ribi´ere and dieng [25] and heman et al. [11,1...",DESCRIPTION,TEST,"[{'from': 'human', 'value': 'You are a researc...",690,<s>[INST] You are a researcher that should ass...
138,"In particular, we use the block sizes (16 kb, ...",LIMITATION,TEST,"[{'from': 'human', 'value': 'You are a researc...",693,<s>[INST] You are a researcher that should ass...


In [None]:
prompt=test_df_converted['text'].loc[13]
prompt


In [None]:
FastLanguageModel.for_inference(model)

inputs = tokenizer(prompt,
                return_tensors="pt"
            ).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

answer

In [None]:
import re
def extract_text(text):
    # Extract text after "Classification: "
    match = re.search(r"Classification:\s*(.*)", text)
    if match:
        classification = match.group(1)
        # Remove extra spaces and convert to uppercase
        cleaned_classification = ' '.join(classification.split()).strip().upper()
        return cleaned_classification
    return ""

In [None]:
extract_text(answer)

'EXTENSION'

In [None]:
# Getting the Classification
def get_classification(data_point,model,tokenizer):
    """
    Gets the classification for a data point using the fine-tuned model.
    """
    FastLanguageModel.for_inference(model)

    inputs = tokenizer(
                data_point['text'],
                return_tensors="pt"
            ).to("cuda")

    outputs = model.generate(**inputs, max_new_tokens=100, use_cache=True)
    answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    print(f"Sentence : {data_point.index[-1]}")
    print(answer)
    data_point['Prediction_Finetune']=answer
    data_point['Prediction_Finetune_Clean']=extract_text(answer)



    return data_point

# Apply the get_classification function to the dataset using map
test_df_converted = test_df_converted.apply(lambda row: get_classification(row, model, tokenizer), axis=1)

In [None]:
test_df_converted

Unnamed: 0,Sentence,Category,Classification,conversations,__index_level_0__,text,Prediction_Finetune,Prediction_Finetune_Clean
0,In order to reflect the users complete spectru...,DESCRIPTION,TEST,"[{'from': 'human', 'value': 'You are a researc...",2,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,DESCRIPTION
1,This procedure was approved by the ethics boar...,OTHER,TEST,"[{'from': 'human', 'value': 'You are a researc...",6,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,OTHER
2,"Motivated by these problems, automated graph l...",OVERALL,TEST,"[{'from': 'human', 'value': 'You are a researc...",7,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,OVERALL
3,Despite progress in automated taxonomy creatio...,RESEARCH GAP,TEST,"[{'from': 'human', 'value': 'You are a researc...",8,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,RESEARCH GAP
4,Mcauley et al. [21] mine key attributes from t...,DESCRIPTION,TEST,"[{'from': 'human', 'value': 'You are a researc...",12,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,DESCRIPTION
...,...,...,...,...,...,...,...,...
135,Since its introduction as a model to identify ...,OVERALL,TEST,"[{'from': 'human', 'value': 'You are a researc...",680,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,OVERALL
136,Therefore alignment methods are needed that ad...,RESEARCH GAP,TEST,"[{'from': 'human', 'value': 'You are a researc...",687,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,RESEARCH GAP
137,"Ribi´ere and dieng [25] and heman et al. [11,1...",DESCRIPTION,TEST,"[{'from': 'human', 'value': 'You are a researc...",690,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,OVERALL
138,"In particular, we use the block sizes (16 kb, ...",LIMITATION,TEST,"[{'from': 'human', 'value': 'You are a researc...",693,<s>[INST] You are a researcher that should ass...,You are a researcher that should assign a cla...,LIMITATION


In [None]:
test_df_converted.to_csv('Mistral7B_Nemo_FT_Test_Change2.csv', index=False)


#### Make Inference