In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import os
ROOT_PATH = "/content/drive/MyDrive/MNLP/project-m2-2024-ab-eh-me/M2_Clean/project-m2-2024-ab-eh-me/model"  # Replace with your directory to A3 folder
os.chdir(ROOT_PATH) # cd into directory

In [None]:
!pip install -r requirements.txt
#!pip install -U git+https://github.com/huggingface/trl
#!pip install trl
#!pip install peft
#!pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git" --q

In [None]:
#import model_dpo
import utils
from transformers import TrainingArguments
import json
from torch.utils.data import DataLoader
#from trl import DPOConfig, DPOTrainer, SFTTrainer, SFTConfig
from trl import DPOTrainer, SFTTrainer#, DPOConfig

from unsloth import FastLanguageModel
#from peft import LoraConfig, AutoPeftModelForCausalLM

import torch
import pandas as pd
from datasets import Dataset, DatasetDict


from datasets import load_dataset

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM


In [None]:
model_name_or_path = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
max_seq_length = 2048

pretrained_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

tokenizer.add_special_tokens({"pad_token": "<PAD>"})
tokenizer.padding_side = "right"
pretrained_model.resize_token_embeddings(len(tokenizer))
pretrained_model.pad_token_id = tokenizer.pad_token_id
pretrained_model.config.pad_token_id = tokenizer.pad_token_id

eos_token = tokenizer.eos_token
####### SFT #######
sft_data = utils.read_jsonl("datasets/sft_2000.jsonl")
df = pd.DataFrame(sft_data)

#<|system|>
#You are a friendly chatbot who always responds in the style of a pirate.</s>
#<|user|>
#How many helicopters can a human eat in one sitting?</s>
#<|assistant|>

df['text'] = df.apply(lambda row: f"<|system|>\nYou are an experienced teacher who answers the STEM-related question asked by a student below.{eos_token}\n<|user|>\n{row['prompt'].rstrip()}{eos_token}\n<|assistant|>\n {row['gold_output'].rstrip()}{eos_token}", axis=1)
dataset = Dataset.from_pandas(df)

split_dataset = dataset.train_test_split(test_size=0.1)

datasets_train_test_sft = DatasetDict({
    "train": split_dataset["train"],
    "validation": split_dataset["test"]
    })

model = FastLanguageModel.get_peft_model(
    pretrained_model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Dropout = 0 is currently optimized
    bias = "none",    # Bias = "none" is currently optimized
    use_gradient_checkpointing = "unsloth",
    random_state =69)

args = TrainingArguments(output_dir = "./output/SFT_DPO/SFT",
                 report_to = "tensorboard",
                 bf16 = torch.cuda.is_bf16_supported(),
                 fp16 = not torch.cuda.is_bf16_supported(),
                 num_train_epochs = 3,
                 evaluation_strategy = "steps",
                 logging_strategy = "steps",
                 per_device_train_batch_size = 4,
                 save_total_limit=3,
                 per_device_eval_batch_size = 4,
                 logging_steps = 100,
                 eval_steps = 700,
                 save_steps = 3000,
                )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    args = args,
    train_dataset = datasets_train_test_sft['train'],
    eval_dataset = datasets_train_test_sft['validation'],
    packing = False,
    max_seq_length = max_seq_length,
    dataset_text_field = "text"
    )

trainer.train()
model.save_pretrained_merged("./output/SFT_DPO/SFT/last_checkpoint", tokenizer, save_method = "merged_16bit")
#trainer.save_model("output/last_checkpoint")


In [None]:
trainer.model.save_pretrained_merged("./output/SFT_DPO/SFT/last_checkpoint", tokenizer, save_method = "merged_16bit")

In [None]:
model.num_parameters(only_trainable=True)

In [None]:
######### DPO #######
import utils
model_name_or_path = "output/SFT_DPO/SFT/last_checkpoint"

max_seq_length = 2048

ref_model, _ = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

pretrained_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
eos_token = tokenizer.eos_token
dpo_data = utils.read_jsonl("datasets/merged_DPO_train.jsonl")
df = pd.DataFrame(dpo_data)
instruction = "You are an experienced teacher who answers the STEM-related question asked by a student below."
df['prompt'] = df.apply(lambda row: f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{row['prompt'].rstrip()}{eos_token}\n<|assistant|>\n", axis=1)
df['chosen'] = df.apply(lambda row: row["chosen"].rstrip() + eos_token if not row["chosen"].rstrip().endswith(eos_token) else row["chosen"].rstrip(), axis=1)
df['rejected'] = df.apply(lambda row: row["rejected"].rstrip() + eos_token if not row["rejected"].rstrip().endswith(eos_token) else row["rejected"].rstrip(), axis=1)


dataset_train = Dataset.from_pandas(df)

dpo_data = utils.read_jsonl("datasets/merged_DPO_test.jsonl")
df = pd.DataFrame(dpo_data)
df['prompt'] = df.apply(lambda row: f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{row['prompt'].rstrip()}{eos_token}\n<|assistant|>\n", axis=1)
df['chosen'] = df.apply(lambda row: row["chosen"].rstrip() + eos_token if not row["chosen"].rstrip().endswith(eos_token) else row["chosen"].rstrip(), axis=1)
df['rejected'] = df.apply(lambda row: row["rejected"].rstrip() + eos_token if not row["rejected"].rstrip().endswith(eos_token) else row["rejected"].rstrip(), axis=1)
dataset_test = Dataset.from_pandas(df)
#split_dataset = dataset.train_test_split(test_size=0.1)
# Save the DataFrames to JSONL files

#split_dataset['train'].to_pandas().to_json("datasets/M1_2000_no_mcqa_train.jsonl", orient='records', lines=True)
#plit_dataset['test'].to_pandas().to_json("datasets/M1_2000_no_mcqa_test.jsonl", orient='records', lines=True)

#datasets_train_test_dpo = DatasetDict({
#    "train": split_dataset["train"],
#    "validation": split_dataset["test"]})

model = FastLanguageModel.get_peft_model(
    pretrained_model,
    r = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64,
    lora_dropout = 0, # Dropout = 0 is currently optimized
    bias = "none",    # Bias = "none" is currently optimized
    use_gradient_checkpointing = "unsloth",
    random_state = 3407
)

training_args = TrainingArguments(output_dir = "./output/SFT_DPO/DPO_Merged_2",
                 report_to = "tensorboard",
                 bf16 = torch.cuda.is_bf16_supported(),
                 fp16 = not torch.cuda.is_bf16_supported(),
                 num_train_epochs = 2,
                 evaluation_strategy = "steps",
                 logging_strategy = "steps",
                 per_device_train_batch_size = 4,
                 save_total_limit=3,
                 per_device_eval_batch_size = 4,
                 logging_steps = 100,
                 eval_steps = 1000,
                 save_steps = 3000,
                 learning_rate = 5e-7,
                 lr_scheduler_type="cosine",
                 warmup_ratio=0.1,
                )

# Define training arguments
#print(dataset[0])
dpo_trainer = DPOTrainer(
    model = model,                 # base model from SFT pipeline
    ref_model = ref_model,             # typically a copy of the SFT trained base model
    beta=0.1,              # temperature hyperparameter of DPO
    #peft_config=lora_config,
    eval_dataset=dataset_test,
    label_smoothing = 0.2,
    train_dataset=dataset_train, # dataset prepared above
    tokenizer=tokenizer,   # tokenizer
    args=training_args,# training arguments e.g. batch size, lr, etc.
    max_length = max_seq_length,
    max_prompt_length = max_seq_length
)



In [None]:
dpo_trainer.train()

model.save_pretrained_merged("./output/SFT_DPO/DPO_Merged_2/last_checkpoint", tokenizer, save_method = "merged_16bit")

In [None]:
%load_ext autoreload
%autoreload 2
!python3 evaluator.py

In [None]:
%load_ext tensorboard
%tensorboard --logdir checkpoints/runs


In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict
import matplotlib.pyplot as plt
from transformers import AutoTokenizer
import utils
from unsloth import FastLanguageModel
model_name_or_path = "output/SFT_DPO/SFT/last_checkpoint"

_, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

def calculate_dpo_lengths(data, tokenizer):
    lengths = []
    for item in data:
        prompt_tokens = len(tokenizer(item["prompt"], truncation=False)['input_ids'])
        chosen_tokens = len(tokenizer(item["chosen"], truncation=False)['input_ids'])
        rejected_tokens = len(tokenizer(item["rejected"], truncation=False)['input_ids'])
        max_length = max(prompt_tokens + chosen_tokens, prompt_tokens + rejected_tokens)
        lengths.append((max_length, item))
    return lengths

# Tokenize and calculate lengths for SFT data

# Read the DPO and SFT data using utils
dpo_data_train = utils.read_jsonl("datasets/SE_DPO_train.jsonl")
dpo_data_test = utils.read_jsonl("datasets/SE_DPO_test.jsonl")
# Process DPO data and calculate lengths
dpo_lengths_train = calculate_dpo_lengths(dpo_data_train, tokenizer)
dpo_lengths_test = calculate_dpo_lengths(dpo_data_test, tokenizer)

# Filter data points not exceeding 2048 tokens
filtered_dpo_data_train = [item for length, item in dpo_lengths_train if length <= 2000]
filtered_dpo_data_test = [item for length, item in dpo_lengths_test if length <= 2000]


# Write filtered data to new JSONL files using utils
utils.write_jsonl(filtered_dpo_data_train, "datasets/SE_DPO_train_2000.jsonl")
utils.write_jsonl(filtered_dpo_data_test, "datasets/SE_DPO_test_2000.jsonl")

# Generate histograms
dpo_lengths_values = [length for length, _ in dpo_lengths_train]
sft_lengths_values = [length for length, _ in dpo_lengths_test]

plt.hist(dpo_lengths_values, bins=100, alpha=0.5, label='DPO lengths', range=(-1000, 5000))
plt.hist(sft_lengths_values, bins=100, alpha=0.5, label='SFT lengths', range=(-1000, 5000))
plt.xlabel('Length in tokens')
plt.ylabel('Number of data points')
plt.legend(loc='upper right')
plt.title('Token Length Distribution')
plt.xlim(-1000, 5000)
plt.show()

# Calculate the percentage of data points exceeding 2048 tokens
dpo_exceeding_2048 = sum(1 for length in dpo_lengths_values if length > 2000) / len(dpo_lengths_values) * 100
sft_exceeding_2048 = sum(1 for length in sft_lengths_values if length > 2000) / len(sft_lengths_values) * 100

print(f"Percentage of DPO data points exceeding 2048 tokens: {dpo_exceeding_2048:.2f}%")
print(f"Percentage of SFT data points exceeding 2048 tokens: {sft_exceeding_2048:.2f}%")

In [None]:
#model = AutoModelForCausalLM.from_pretrained("output/DPO_ALI/tinyllama").to("cuda")
#model = model.merge_and_unload()
#model, tokenizer = FastLanguageModel.from_pretrained("output/SFT_DPO/DPO_Merged_2/last_checkpoint", load_in_4bit=False)
#model, tokenizer = FastLanguageModel.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", load_in_4bit=False)
tokenizer = AutoTokenizer.from_pretrained("output/DPO_ALI/tinyllama")

eos_token = tokenizer.eos_token
q = "Suppose i want to trace an algorithm (think of it as a flowchart) to understand how it works, i need an input(s) for this algorithm, the question is: how can i determine the right input(s) that goes through all the cases ? Is there any \"scientific\" method to determine this input?\n"
instruction = "You are an experienced teacher who answers the STEM-related question asked by a student below."
prompt = f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{q}{eos_token}\n<|assistant|>\n"
#prompt = q
#prompt = f"{q}"
#prompt2 = f"Below is a question that you will answer. \n: ### Question: {q}\n### Answer: "
#pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=120)
#print(pipe(prompt))
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_length=1000, temperature=0.7, top_k=50, top_p=0.95)
print(tokenizer.batch_decode(outputs)[0])


In [None]:
from transformers import pipeline
import torch
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
pipe.tokenizer.chat_template = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": "You are an experienced teacher who answers the STEM-related question asked by a student below.",
    },
    {"role": "user", "content": "Explain the merge sort algorithm in technical detail, and derive the time complexity."},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])

In [None]:
model

In [None]:
model_name_or_path = "checkpoints/last_checkpoint"
max_seq_length = 2048

pretrained_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

eos_token = tokenizer.eos_token
####### SFT #######
sft_data = utils.read_jsonl("datasets/MCQA_sft_train.jsonl")
df = pd.DataFrame(sft_data)

#<|system|>
#You are a friendly chatbot who always responds in the style of a pirate.</s>
#<|user|>
#How many helicopters can a human eat in one sitting?</s>
#<|assistant|>
instruction = "You are an experienced teacher who answers the STEM-related question asked by a student below. This is a multiple choice question, thus you must answer with only one letter corresponding to the correct answer."
df['text'] = df.apply(lambda row: f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{row['question'].rstrip()}{eos_token}\n<|assistant|>\n{row['answer'].rstrip().lstrip()}{eos_token}", axis=1)
dataset_train = Dataset.from_pandas(df)

sft_data = utils.read_jsonl("datasets/MCQA_sft_test.jsonl")
df = pd.DataFrame(sft_data)

instruction = "You are an experienced teacher who answers the STEM-related question asked by a student below. This is a multiple choice question, thus you must answer with only one letter corresponding to the correct answer."
df['text'] = df.apply(lambda row: f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{row['question'].rstrip()}{eos_token}\n<|assistant|>\n{row['answer'].rstrip().lstrip()}{eos_token}", axis=1)
dataset_test = Dataset.from_pandas(df)


datasets_train_test_sft = DatasetDict({
    "train": dataset_train,
    "validation": dataset_test
    })

model = FastLanguageModel.get_peft_model(
    pretrained_model,
    r = 8,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 8,
    lora_dropout = 0, # Dropout = 0 is currently optimized
    bias = "none",    # Bias = "none" is currently optimized
    use_gradient_checkpointing = "unsloth",
    random_state =69)

args = TrainingArguments(output_dir = "./Other_checkpoints/MCQA16_SFT",
                 report_to = "tensorboard",
                 bf16 = torch.cuda.is_bf16_supported(),
                 fp16 = not torch.cuda.is_bf16_supported(),
                 num_train_epochs = 5,
                 evaluation_strategy = "steps",
                 logging_strategy = "steps",
                 per_device_train_batch_size = 8,
                 save_total_limit=3,
                 per_device_eval_batch_size = 8,
                 logging_steps = 10,
                 eval_steps = 50,
                 save_steps = 10000,
                )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    args = args,
    train_dataset = datasets_train_test_sft['train'],
    eval_dataset = datasets_train_test_sft['validation'],
    packing = False,
    max_seq_length = max_seq_length,
    dataset_text_field = "text"
    )

trainer.train()

#trainer.save_model("output/last_checkpoint")


In [None]:
model.save_pretrained_merged("./Other_checkpoints/MCQA16_SFT", tokenizer, save_method = "merged_16bit")

In [None]:
######### DPO #######
import utils
model_name_or_path = "./Other_checkpoints/MCQA16_SFT/last_checkpoint"

max_seq_length = 2048

ref_model, _ = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

pretrained_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name_or_path,
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = False, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

eos_token = tokenizer.eos_token
dpo_data = utils.read_jsonl("datasets/MCQA_dpo_train.jsonl")
#print(dpo_data[0:5])
df = pd.DataFrame(dpo_data)
#print(df)
instruction = "You are an experienced teacher who answers the STEM-related question asked by a student below. This is a multiple choice question, thus you must answer with only one letter corresponding to the correct answer."

df['prompt'] = df.apply(lambda row: f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{row['chosen'].rstrip()}{eos_token}\n<|assistant|>\n", axis=1)
df['chosen'] = df.apply(lambda row: row["chosen"].rstrip() + eos_token if not row["chosen"].rstrip().endswith(eos_token) else row["chosen"].rstrip(), axis=1)
df['rejected'] = df.apply(lambda row: row["rejected"].rstrip() + eos_token if not row["rejected"].rstrip().endswith(eos_token) else row["rejected"].rstrip(), axis=1)

dataset_train = Dataset.from_pandas(df)

dpo_data = utils.read_jsonl("datasets/MCQA_dpo_test.jsonl")
df = pd.DataFrame(dpo_data)
instruction = "You are an experienced teacher who answers the STEM-related question asked by a student below. This is a multiple choice question, thus you must answer with only one letter corresponding to the correct answer."
df['prompt'] = df.apply(lambda row: f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{row['prompt'].rstrip()}{eos_token}\n<|assistant|>\n", axis=1)
df['chosen'] = df.apply(lambda row: row["chosen"].rstrip() + eos_token if not row["chosen"].rstrip().endswith(eos_token) else row["chosen"].rstrip(), axis=1)
df['rejected'] = df.apply(lambda row: row["rejected"].rstrip() + eos_token if not row["rejected"].rstrip().endswith(eos_token) else row["rejected"].rstrip(), axis=1)


dataset_test = Dataset.from_pandas(df)
#split_dataset = dataset.train_test_split(test_size=0.1)
# Save the DataFrames to JSONL files

#split_dataset['train'].to_pandas().to_json("datasets/M1_2000_no_mcqa_train.jsonl", orient='records', lines=True)
#plit_dataset['test'].to_pandas().to_json("datasets/M1_2000_no_mcqa_test.jsonl", orient='records', lines=True)

#datasets_train_test_dpo = DatasetDict({
#    "train": split_dataset["train"],
#    "validation": split_dataset["test"]})

model = FastLanguageModel.get_peft_model(
    pretrained_model,
    r = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64,
    lora_dropout = 0, # Dropout = 0 is currently optimized
    bias = "none",    # Bias = "none" is currently optimized
    use_gradient_checkpointing = "unsloth",
    random_state = 3407
)

training_args = TrainingArguments(output_dir = "./Other_checkpoints/MCQA_dpo",
                 report_to = "tensorboard",
                 bf16 = torch.cuda.is_bf16_supported(),
                 fp16 = not torch.cuda.is_bf16_supported(),
                 num_train_epochs = 2,
                 evaluation_strategy = "steps",
                 logging_strategy = "steps",
                 per_device_train_batch_size = 4,
                 save_total_limit=3,
                 per_device_eval_batch_size = 4,
                 logging_steps = 100,
                 eval_steps = 1000,
                 save_steps = 3000,
                 learning_rate = 5e-7,
                 lr_scheduler_type="cosine",
                 warmup_ratio=0.1,
                )

# Define training arguments
#print(dataset[0])
dpo_trainer = DPOTrainer(
    model = model,                 # base model from SFT pipeline
    ref_model = ref_model,             # typically a copy of the SFT trained base model
    beta=0.1,              # temperature hyperparameter of DPO
    #peft_config=lora_config,
    eval_dataset=dataset_test,
    label_smoothing = 0.2,
    train_dataset=dataset_train, # dataset prepared above
    tokenizer=tokenizer,   # tokenizer
    args=training_args,# training arguments e.g. batch size, lr, etc.
    max_length = max_seq_length,
    max_prompt_length = max_seq_length
)


In [None]:
dpo_trainer.train()

In [None]:
model.save_pretrained_merged("./Other_checkpoints/MCQA_dpo/last_checkpoint", tokenizer, save_method = "merged_16bit")

In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

# Create a BitsAndBytesConfig object with 8-bit quantization
quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Load the model with the new quantization configuration
model = AutoModelForCausalLM.from_pretrained(
   "Other_checkpoints/MCQA_dpo/last_checkpoint",
    quantization_config=quantization_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Other_checkpoints/MCQA_dpo/last_checkpoint")

model.save_pretrained("Other_checkpoints/MCQA_dpo/Quantized")
tokenizer.save_pretrained("Other_checkpoints/MCQA_dpo/Quantized")


In [None]:
model = AutoModelForCausalLM.from_pretrained("Other_checkpoints/MCQA_dpo/Quantized",  torch_dtype = "auto")
#model = model.merge_and_unload()
#model, tokenizer = FastLanguageModel.from_pretrained("output/SFT_DPO/DPO_Merged_2/last_checkpoint", load_in_4bit=False)
#model, tokenizer = FastLanguageModel.from_pretrained("Other_checkpoints/MCQA_dpo/Quantized", load_in_4bit=False)

tokenizer = AutoTokenizer.from_pretrained("Other_checkpoints/MCQA_dpo/Quantized")
print(model)
eos_token = tokenizer.eos_token
#q = "We want to return, from the two posting lists below, the top-2 documents matching a\nquery using Fagin\u2019s algorithm with the aggregation function taken as the sum of the tf-idf weights. How many entries (total of both lists) are accessed in the first phase of the\nalgorithm performing round-robin starting at List 1 (i.e., before performing the random\naccess)?\n\nOptions:\nA. 4\nB. 6\nC. 8\nD. 10"
q = "Could you explain the algorithm mergesort?"
#instruction =  ""
instruction =  "You are an experienced teacher who answers the STEM-related question asked by a student below. This is a multiple choice question, thus you must answer with only one letter corresponding to the correct answer."
prompt = f"<|system|>\n{instruction}{eos_token}\n<|user|>\n{q}{eos_token}\n<|assistant|>\n"
#prompt = q
#prompt = f"{q}"
#prompt2 = f"Below is a question that you will answer. \n: ### Question: {q}\n### Answer: "
#pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=120)
#print(pipe(prompt))
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_length=512)
print(tokenizer.batch_decode(outputs)[0])

In [None]:
from models.model_dpo import AutoDPOModelForCausalLM
mcqa_model_wrapper = AutoDPOModelForCausalLM.from_pretrained("./Other_checkpoints/MCQA16_SFT")
tokenizer = AutoTokenizer.from_pretrained("Other_checkpoints/MCQA16_SFT")

In [None]:
!pip install huggingface-hub

In [None]:
!huggingface-cli login

In [None]:
from models.model_dpo import AutoDPOModelForCausalLM

# Download the pre-trained model and tokenizer from the Hub
model_name = "Other_checkpoints/MCQA_dpo/last_checkpoint"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Initialize your model class and import the pre-trained model into your class
# Note that if you have a custom module in your class
# You should initialize the weights of this module in the `__init__` function
model_wrapper = AutoDPOModelForCausalLM(pretrained_model=model)