In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
# Authenticate Hugging Face
from huggingface_hub import login
token1 = ""
login(token=token1)

In [None]:
import torch
from google.colab import drive

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
drive.mount('/content/drive')
base_path = ""

Using device: cuda
Mounted at /content/drive


In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # optimized
    bias = "none",    # = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.1.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


<a name="Data"></a>
### Data Prep

In [None]:
import os
import json
from datasets import Dataset

# Base path to the directory containing the JSONL files
base_path = ""

# File paths for train, validation, and test datasets
train_file_path = os.path.join(base_path, "train.jsonl")
validation_file_path = os.path.join(base_path, "validation.jsonl")
test_file_path = os.path.join(base_path, "test.jsonl")
few_shot_test_file_path = os.path.join(base_path, "few_shot_test.jsonl")

# Define the Alpaca formatting template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Adjust based on tokenizer

# Function to process and reformat a JSONL file
def process_jsonl_file(file_path, output_path):
    # Load the dataset from the JSONL file
    with open(file_path, "r") as f:
        data = [json.loads(line) for line in f]

    # Parse the JSONL into a format suitable for processing
    formatted_data = []
    for entry in data:
        messages = entry["messages"]
        instruction = messages[0]["content"]  # Content from the 'system' role
        user_input = messages[1]["content"]  # Content from the 'user' role
        assistant_output = messages[2]["content"]  # Content from the 'assistant' role

        formatted_data.append({
            "instruction": instruction,
            "input": user_input,
            "output": assistant_output
        })

    # Convert to a Dataset object
    dataset = Dataset.from_list(formatted_data)

    # Formatting function for Alpaca prompt
    def formatting_prompts_func(examples):
        instructions = examples["instruction"]
        inputs = examples["input"]
        outputs = examples["output"]
        texts = []
        for instruction, input, output in zip(instructions, inputs, outputs):
            # Format each example with the Alpaca prompt and EOS token
            text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
            texts.append(text)
        return { "text": texts }

    # Apply formatting to the dataset
    dataset = dataset.map(formatting_prompts_func, batched=True)

    # Save the reformatted dataset
    dataset.to_json(output_path)
    print(f"Formatted dataset saved to {output_path}")

# Process each file and save the results
process_jsonl_file(train_file_path, os.path.join(base_path, "formatted_train.jsonl"))
process_jsonl_file(validation_file_path, os.path.join(base_path, "formatted_validation.jsonl"))
process_jsonl_file(test_file_path, os.path.join(base_path, "formatted_test.jsonl"))
process_jsonl_file(few_shot_test_file_path, os.path.join(base_path, "formatted_few_shot_test.jsonl"))

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Formatted dataset saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/formatted_train.jsonl


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Formatted dataset saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/formatted_validation.jsonl


Map:   0%|          | 0/767 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Formatted dataset saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/formatted_test.jsonl


Map:   0%|          | 0/767 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Formatted dataset saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/formatted_few_shot_test.jsonl


In [None]:
from datasets import load_dataset
import os
import json
from datasets import Dataset


# File paths for the datasets
formatted_train_path = os.path.join(base_path, "formatted_train.jsonl")
formatted_validation_path = os.path.join(base_path, "formatted_validation.jsonl")
formatted_test_path = os.path.join(base_path, "formatted_test.jsonl")

# Load the datasets
train_dataset = load_dataset("json", data_files={"train": formatted_train_path})
validation_dataset = load_dataset("json", data_files={"validation": formatted_validation_path})
test_dataset = load_dataset("json", data_files={"test": formatted_test_path})


Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [None]:
print("Sample from train dataset:", train_dataset["train"][0])
print("Sample from validation dataset:", validation_dataset["validation"][0])
print("Sample from test dataset:", test_dataset["test"][0])

Sample from train dataset: {'instruction': "{'role': 'You are an expert science assessment specialist that generates science questions based on specified metadata provided by the user. Your role is to ensure that the generated questions are of high quality, align with the intended learning objectives, and adhere to scientifically rigorous standards.'}", 'input': "Generate a science question and its answer using the following metadata: grade: 4, task: multiple choice, topic: physics, category: Materials, skill: Compare properties of materials, bloom's taxonomy: evaluate, dok:3", 'output': "Question: Which is the stickiest?,\n Choices: \n['leather belt' 'gum' 'silk kimono'],\n Answer: \n1", 'text': "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{'role': 'You are an expert science assessment specialist that generates science questions based on specified meta

<a name="Train"></a>
### Train the model

In [None]:
output_model="lora_model"

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset['train'],
    eval_dataset=validation_dataset['validation'],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 2,
        warmup_steps = 1,
        num_train_epochs = 3,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 42,
        output_dir = output_model,
        report_to = "wandb",
        push_to_hub=True,
        evaluation_strategy="steps",  # Evaluate every `eval_steps`
        eval_steps=1,  # Adjust as needed for how often to evaluate
        save_strategy="steps",  # Save the model at the same interval
        save_steps=1,
    ),
)



Map (num_proc=2):   0%|          | 0/200 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/80 [00:00<?, ? examples/s]

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA A100-SXM4-40GB. Max memory = 39.557 GB.
5.848 GB of memory reserved.


In [None]:
from unsloth import unsloth_train
# unsloth_train fixes gradient_accumulation_steps
# trainer_stats = trainer.train()
trainer_stats = unsloth_train(trainer)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 200 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 2
\        /    Total batch size = 16 | Total steps = 36
 "-____-"     Number of trainable parameters = 41,943,040


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mamjadkiwan1c[0m ([33mamjadkiwan1c-university-of-leeds[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss,Validation Loss
1,2.9382,2.918828
2,2.9264,2.867263
3,2.8403,2.743312
4,2.751,2.562351
5,2.5507,2.354621
6,2.3685,2.125128
7,2.1481,1.911134
8,1.8951,1.685903
9,1.6871,1.453799
10,1.472,1.25974


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

196.645 seconds used for training.
3.28 minutes used for training.
Peak reserved memory = 6.504 GB.
Peak reserved memory for training = 0.656 GB.
Peak reserved memory % of max memory = 16.442 %.
Peak reserved memory for training % of max memory = 1.658 %.


<a name="Inference"></a>
### Inference

In [None]:
model.push_to_hub("amjadkiwan/lora_model", token = "") # Online saving
tokenizer.push_to_hub("amjadkiwan/lora_model", token = "") # Online saving

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/amjadkiwan/lora_model


No files have been modified since last commit. Skipping to prevent empty commit.


In [None]:
from unsloth import FastLanguageModel
from transformers import TextStreamer  # Import TextStreamer for real-time output
import os
import json

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Adjust based on tokenizer

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="amjadkiwan/lora_model",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
  )
FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

# Incorporate dataset processing into the original inference structure with streaming
def process_dataset_with_inference(dataset, model, tokenizer, alpaca_prompt, max_new_tokens=1000):
    results = []
    streamer = TextStreamer(tokenizer)  # Initialize TextStreamer

    for entry in dataset["test"]:  # Assuming the split is 'train'
        # Extract instruction and input from the dataset
        instruction = entry["instruction"]
        input_text = entry["input"]

        # Prepare the prompt using alpaca_prompt
        inputs = tokenizer(
            [
                alpaca_prompt.format(
                    instruction,  # From dataset
                    input_text,   # From dataset
                    ""            # Leave output blank for generation
                )
            ],
            return_tensors="pt"
        ).to("cuda")

        # Generate the response with streaming
        print("\nStreaming Response:")
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            use_cache=True,
            streamer=streamer,  # Stream tokens as they are generated
        )

        # Convert the final output to text (in case you want to save it)
        response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

        # Save the result
        results.append({
            "instruction": instruction,
            "input": input_text,
            "generated_response": response
        })
    return results

# Apply inference to the test dataset
generated_results = process_dataset_with_inference(test_dataset, model, tokenizer, alpaca_prompt)

# Save the results to a JSONL file
output_file_path = os.path.join(base_path, "generated_test_results_FULL.jsonl")
with open(output_file_path, "w") as file:
    for result in generated_results:
        file.write(json.dumps(result) + "\n")

print(f"Generated responses saved to {output_file_path}")


==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

### Response:
Question: What is the temperature of the water?,
 Choices: 
['hot' 'warm' 'cool' 'cold'],
 Answer: 
2<|end_of_text|>

Streaming Response:
<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{'role': 'You are an expert science assessment specialist that generates science questions based on specified metadata provided by the user. Your role is to ensure that the generated questions are of high quality, align with the intended learning objectives, and adhere to scientifically rigorous standards.'}

### Input:
Generate a science question and its answer using the following metadata: grade: 3, task: multiple choice, topic: physics, category: Heat and thermal energy, skill: How is temperature related to thermal energy?, bloom's taxonomy: understand, dok:2

### Response:
Quest

In [None]:
import pandas as pd
import json
import os

# Read JSONL file and convert to a list of dictionaries
input_file = os.path.join(base_path, "generated_test_results_FULL.jsonl")  # Replace with your JSONL file path
output_file = os.path.join(base_path, "generated_test_results_FULL.csv")

# Read the JSONL file
data = []
with open(input_file, 'r') as f:
    for line in f:
        data.append(json.loads(line.strip()))

# Convert list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Save DataFrame to CSV
df.to_csv(output_file, index=False)

print(f"JSONL data has been successfully converted to CSV and saved to {output_file}")


JSONL data has been successfully converted to CSV and saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/generated_test_results_FULL.csv


Few Shot Feedback

In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
# Authenticate Hugging Face
from huggingface_hub import login
token1 = ""
login(token=token1)

In [None]:
import torch
from google.colab import drive

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
drive.mount('/content/drive')
base_path = ""

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # optimized
    bias = "none",    # = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
import os
import json
from datasets import Dataset

# Base path to the directory containing the JSONL files
base_path = ""

# Define the Alpaca formatting template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Adjust based on tokenizer

# Function to process and reformat a JSONL file
def process_jsonl_file(file_path, output_path):
    # Load the dataset from the JSONL file
    with open(file_path, "r") as f:
        data = [json.loads(line) for line in f]

    # Parse the JSONL into a format suitable for processing
    formatted_data = []
    for entry in data:
        messages = entry["messages"]
        instruction = messages[0]["content"]  # Content from the 'system' role
        user_input = messages[1]["content"]  # Content from the 'user' role
        assistant_output = messages[2]["content"]  # Content from the 'assistant' role

        formatted_data.append({
            "instruction": instruction,
            "input": user_input,
            "output": assistant_output
        })

    # Convert to a Dataset object
    dataset = Dataset.from_list(formatted_data)

    # Formatting function for Alpaca prompt
    def formatting_prompts_func(examples):
        instructions = examples["instruction"]
        inputs = examples["input"]
        outputs = examples["output"]
        texts = []
        for instruction, input, output in zip(instructions, inputs, outputs):
            # Format each example with the Alpaca prompt and EOS token
            text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
            texts.append(text)
        return { "text": texts }

    # Apply formatting to the dataset
    dataset = dataset.map(formatting_prompts_func, batched=True)

    # Save the reformatted dataset
    dataset.to_json(output_path)
    print(f"Formatted dataset saved to {output_path}")

# File paths for few shot test datasets
few_shot_test_file_path = os.path.join(base_path, "few_shot_test.jsonl")

process_jsonl_file(few_shot_test_file_path, os.path.join(base_path, "formatted_few_shot_test.jsonl"))

Map:   0%|          | 0/767 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Formatted dataset saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/formatted_few_shot_test.jsonl


In [None]:
from datasets import load_dataset
import os
import json
from datasets import Dataset


# File paths for the datasets
formatted_few_shot_test_path = os.path.join(base_path, "formatted_few_shot_test.jsonl")

# Load the datasets
few_shot_test_dataset = load_dataset("json", data_files={"test": formatted_few_shot_test_path})


Generating test split: 0 examples [00:00, ? examples/s]

In [None]:
print("Sample from test dataset:", few_shot_test_dataset["test"][0])

Sample from test dataset: {'instruction': "{'role': 'You are an expert science assessment specialist that generates science questions based on specified metadata provided by the user. Your role is to ensure that the generated questions are of high quality, align with the intended learning objectives, and adhere to scientifically rigorous standards.'}", 'input': '{\n  "task_description": "Improve low-scoring questions using high-performing questions as a guide. Given metadata about a low-scoring question, regenerate a new question that aligns with the high-scoring examples.",\n  "high_performing_examples": [\n    {\n      "question": "Which of these changes is a physical change?",\n      "choices": [\n        "a banana turning brown",\n        "a candle melting",\n        "a piece of metal rusting"\n      ],\n      "answer": "a candle melting",\n      "explanation": "The question aligns well with learning outcomes related to distinguishing physical and chemical changes, which is a commo

In [None]:
from unsloth import FastLanguageModel
from transformers import TextStreamer  # Import TextStreamer for real-time output
import os
import json

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Adjust based on tokenizer

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="amjadkiwan/lora_model",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
  )
FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

# Incorporate dataset processing into the original inference structure with streaming
def process_dataset_with_inference(dataset, model, tokenizer, alpaca_prompt, max_new_tokens=1000):
    results = []
    streamer = TextStreamer(tokenizer)  # Initialize TextStreamer

    for entry in dataset["test"]:  # Assuming the split is 'train'
        # Extract instruction and input from the dataset
        instruction = entry["instruction"]
        input_text = entry["input"]

        # Prepare the prompt using alpaca_prompt
        inputs = tokenizer(
            [
                alpaca_prompt.format(
                    instruction,  # From dataset
                    input_text,   # From dataset
                    ""            # Leave output blank for generation
                )
            ],
            return_tensors="pt"
        ).to("cuda")

        # Generate the response with streaming
        print("\nStreaming Response:")
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            use_cache=True,
            streamer=streamer,  # Stream tokens as they are generated
        )

        # Convert the final output to text (in case you want to save it)
        response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

        # Save the result
        results.append({
            "instruction": instruction,
            "input": input_text,
            "generated_response": response
        })
    return results

# Apply inference to the test dataset
generated_results = process_dataset_with_inference(few_shot_test_dataset, model, tokenizer, alpaca_prompt)

# Save the results to a JSONL file
output_file_path = os.path.join(base_path, "generated_test_results_few_shot.jsonl")
with open(output_file_path, "w") as file:
    for result in generated_results:
        file.write(json.dumps(result) + "\n")

print(f"Generated responses saved to {output_file_path}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      "explanation": "The question is relevant and aligns with learning outcomes related to understanding speed and comparisons, a fundamental concept in physics for many grade levels. This question fits well within the typical curriculum goals for middle school science or math, where speed and comparisons of different objects are often discussed. The question is accurate, clearly comparing the speeds of two different moving objects without any ambiguity. The question stem is simple and straightforward, making it easy for students to understand what is being asked without unnecessary complexity. The distractors are plausible. Both options represent realistic speeds for their respective objects, although one is clearly faster. The answer choices are free from cultural, gender, or regional biases. They are neutral in nature. The two choices are balanced in terms of length, structure, and complexity, facilitating an unbiased

In [None]:
import pandas as pd
import json
import os

# Read JSONL file and convert to a list of dictionaries
input_file = os.path.join(base_path, "generated_test_results_few_shot.jsonl")  # Replace with your JSONL file path
output_file = os.path.join(base_path, "generated_test_results_few_shot.csv")

# Read the JSONL file
data = []
with open(input_file, 'r') as f:
    for line in f:
        data.append(json.loads(line.strip()))

# Convert list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Save DataFrame to CSV
df.to_csv(output_file, index=False)

print(f"JSONL data has been successfully converted to CSV and saved to {output_file}")


JSONL data has been successfully converted to CSV and saved to /content/drive/MyDrive/Colab Notebooks/ScienceQA/1st Trial/generated_test_results_few_shot.csv
