In [None]:
%%capture
# Normally using pip install unsloth is enough

# Temporarily as of Jan 31st 2025, Colab has some issues with Pytorch
# Using pip install unsloth will take 3 minutes, whilst the below takes <1 minute:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 1024 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
]

model, tokenizer = FastLanguageModel.from_pretrained(

    model_name = fourbit_models[5],
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.2.12: Fast Mistral patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/165k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.31G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/162 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/181k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.2.12 patched 40 layers with 40 QKV layers, 40 O layers and 40 MLP layers.


<a name="Data"></a>
### Data Prep
We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.

**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).

**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!

If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).

For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing).

In [None]:
# prompt: read merger.json file

import json


with open('/content/updated_questions-with-topics.json', 'r') as f:
    data = json.load(f)

trainSize = 0.4
total_size = len(data)
selected_data = data[:int(trainSize * total_size)]

print(f"Total size of the dataset: {total_size}")
print(f"Train on size of the dataset {float(trainSize*100)}% : size:{len(selected_data)}")

data = selected_data


print(data[1])

Total size of the dataset: 416
Train on size of the dataset 40.0% : size:166
{'Question': 'Write a Prolog program to solve a constraint satisfaction problem using the resolution rule. The problem is defined as follows:\nImplement a Prolog program to solve a constraint satisfaction problem involving three variables (A, B, and C) with the following constraints:1. A is greater than B.\n2. B is less than C.\n3. A is not equal to C.Note: Each task builds upon the previous one, increasing in difficulty, and requires coding-based implementation with structured sub-parts. The question and cover conceptual understanding, critical thinking, and problem-solving.', 'Type': 'code', 'Difficulty': 'medium', 'CLO': 'Implement a Prolog program to solve a constraint satisfaction problem', 'Topic': 'Constraint Satisfaction', 'chunkNumber': 88, 'text': 'lifts a proof step from ground clauses up to general LIFTING LEMMA first-order clauses. In order to prove his basic lifting lemma, Robinson had to invent 

Working Example

In [None]:
from datasets import Dataset

# Define the Alpaca prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction: {}
### Input: {}
### Response: {}
"""
EOS_TOKEN = "<|endoftext|>"  # Replace with the tokenizer's EOS token if different



# Convert dataset to Hugging Face format for fine-tuning
def prepare_dataset(data):
    instructions = []
    inputs = []
    outputs = []

    for item in data:
        # Construct instruction for fine-tuning
        instructions.append("Generate an assignment question based on the given CLO, topic, question type, and difficulty level.")

        # Prepare input field with CLO, topic, question type, and difficulty level
        inputs.append(
            f"CLO: {item['CLO']}\n"
            f"Topic: {item['Topic']}\n"
            f"Question Type: {item['Type']}\n"
            f"Difficulty Level: {item['Difficulty']}"
        )

        # Output field is the question
        outputs.append(item["Question"])

    # Return the formatted dataset
    return {"instruction": instructions, "input": inputs, "output": outputs}
formatted_data = prepare_dataset(data)

# Convert to Hugging Face Dataset
hf_dataset = Dataset.from_dict(formatted_data)

# Formatting the dataset with prompts and EOS token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input_, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input_, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Apply formatting
hf_dataset = hf_dataset.map(formatting_prompts_func, batched=True)

# Save processed dataset (optional)
hf_dataset.to_json("formatted_question_generation_dataset.json")

print("Dataset preparation complete!")


Map:   0%|          | 0/166 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset preparation complete!


<a name="Train"></a>
### Train the model
Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# lora_config = LoraConfig(
#     r=8,  # Rank of LoRA matrices (small value reduces parameters)
#     lora_alpha=16,  # Scaling factor for LoRA adaptation
#     lora_dropout=0.05,  # Dropout to prevent overfitting
#     bias="none",  # No bias training, only LoRA parameters
#     task_type="CAUSAL_LM"  # Adapts for causal language modeling (like GPT/LLaMA)
# )

# Apply LoRA to the model
# model = get_peft_model(model, lora_config)


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = hf_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Converting train dataset to ChatML (num_proc=2):   0%|          | 0/166 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=2):   0%|          | 0/166 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/166 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/166 [00:00<?, ? examples/s]

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
7.982 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 166 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 57,016,320


Step,Training Loss
1,1.7601
2,1.7821
3,1.682
4,1.6167
5,1.3863
6,1.2814
7,1.0941
8,1.1211
9,0.8951
10,1.1186


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

949.2671 seconds used for training.
15.82 minutes used for training.
Peak reserved memory = 9.191 GB.
Peak reserved memory for training = 1.209 GB.
Peak reserved memory % of max memory = 62.35 %.
Peak reserved memory for training % of max memory = 8.202 %.



### Inference


In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference


# Get the EOS token
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token else "<|endoftext|>"  # Fallback if EOS token is not set

# Example query
query = {
    "CLO": "Solve a constraint satisfaction problem using Prolog",
    "Topic": "Prolog Reasoning",
    "Type": "Code",
    "Difficulty": "Hard"
}

# query = data[0]

# Format input text
input_text = alpaca_prompt.format(
    "Generate an assignment question based on the given CLO, topic, question type, and difficulty level.",
    f"CLO: {query['CLO']}\nTopic: {query['Topic']}\nQuestion Type: {query['Type']}\nDifficulty Level: {query['Difficulty']}",
     "", # output - leave this blank for generation!
)

# Tokenize input (ensuring `token_type_ids` is removed)
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
inputs.pop("token_type_ids", None)  # Remove token_type_ids for Mistral

# Generate output
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=250,
        eos_token_id=tokenizer.eos_token_id
    )

# Decode output
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

# Extract response part (removing input portion)
response_start = generated_text.find("### Response:") + len("### Response:")
generated_question = generated_text[response_start:].strip()

# Print generated assignment question
print("\nGenerated Assignment Question:")
print(generated_question)



Generated Assignment Question:
A Prolog program is given to solve a constraint satisfaction problem. The program consists of a set of clauses that define the constraints and a set of variables that represent the unknowns. The program should be able to solve the problem by finding a consistent assignment of values to the variables.a) Define the Prolog program and explain the purpose of each clause and variable.b) Implement a Prolog program to solve the constraint satisfaction problem using the given clauses and variables.c) Analyze the performance of the Prolog program and optimize it for better efficiency.d) Extend the Prolog program to handle a larger set of constraints and variables.e) Compare the performance of the Prolog program with a Python-based constraint satisfaction solver and discuss the advantages and disadvantages of each approach.
<|endoftext|>


In [None]:
print(input_text)
print("---------------------------\n\n")
print(generated_question)

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction: Generate an assignment question based on the given CLO, topic, question type, and difficulty level.
### Input: CLO: Solve a constraint satisfaction problem using Prolog
Topic: Prolog Reasoning
Question Type: Code
Difficulty Level: Hard
### Response: 

---------------------------


A Prolog program is given to solve a constraint satisfaction problem. The program consists of a set of clauses that define the constraints and a set of variables that represent the unknowns. The program should be able to solve the problem by finding a consistent assignment of values to the variables.a) Define the Prolog program and explain the purpose of each clause and variable.b) Implement a Prolog program to solve the constraint satisfaction problem using the given clauses and variables.c) Analyze the performance of the Prolog program

# Infrence MultiQuesions for Same TOPIC ALO

In [None]:
import torch

# Enable fast inference mode
FastLanguageModel.for_inference(model)

# Get the EOS token
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token else "<|endoftext|>"

# Example query
query = data[0]  # Use first entry from data

# Number of questions to generate
num_questions = 2  # Change this to generate more or fewer questions

# Store generated questions
generated_questions = []

for _ in range(num_questions):
    # Format input text
    input_text = alpaca_prompt.format(
        "Generate an assignment question based on the given CLO, topic, question type, and difficulty level.",
        f"CLO: {query['CLO']}\nTopic: {query['Topic']}\nQuestion Type: {query['Type']}\nDifficulty Level: {query['Difficulty']}",
        ""  # Output placeholder for generation
    )

    # Tokenize input (ensuring `token_type_ids` is removed)
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    inputs.pop("token_type_ids", None)  # Remove token_type_ids for Mistral

    # Generate output
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=200,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode output
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract response part (removing input portion)
    response_start = generated_text.find("### Response:") + len("### Response:")
    generated_question = generated_text[response_start:].strip()

    # Append generated question
    generated_questions.append(generated_question)

# Print all generated assignment questions
print("\nGenerated Assignment Questions:")
for idx, question in enumerate(generated_questions, 1):
    print(f"{idx}. {question}\n")



Generated Assignment Questions:
1. Design a Prolog program to handle equality reasoning in a simple knowledge base. The program should be able to infer equality between variables and constants.a) Define a Prolog predicate `equal/2` that takes two terms as arguments and returns true if they are equal. The predicate should handle both variable and constant terms.b) Implement a Prolog program that uses the `equal/2` predicate to infer equality between variables and constants. The program should be able to handle a simple knowledge base with a few facts and rules.c) Extend the program to handle a more complex knowledge base with multiple variables and constants. The program should be able to infer equality between variables and constants in a more complex scenario.d) Analyze the performance of the program and optimize it for better efficiency.e) Discuss the implications of using equality reasoning in a real-world application and provide examples of how it can be used.f) Implement a Prolog

# MULTPLE ALO/TOPICS Random pick

In [None]:
import torch
import random

# Enable fast inference mode
FastLanguageModel.for_inference(model)

# Get the EOS token
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token else "<|endoftext|>"

# Number of queries to process (control how many different queries are selected randomly)
num_queries = 3  # Change this to select more or fewer queries

# Number of questions to generate per query
num_questions_per_query = 2  # Change this to generate more or fewer questions per query

# Select random queries from data
selected_queries = random.sample(data, min(num_queries, len(data)))

# Store all generated questions
all_generated_questions = []

for query in selected_queries:
    generated_questions = []

    for _ in range(num_questions_per_query):
        # Format input text
        input_text = alpaca_prompt.format(
            "Generate an assignment question based on the given CLO, topic, question type, and difficulty level.",
            f"CLO: {query['CLO']}\nTopic: {query['Topic']}\nQuestion Type: {query['Type']}\nDifficulty Level: {query['Difficulty']}",
            ""  # Output placeholder for generation
        )

        # Tokenize input (ensuring `token_type_ids` is removed)
        inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
        inputs.pop("token_type_ids", None)  # Remove token_type_ids for Mistral

        # Generate output
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=200,
                eos_token_id=tokenizer.eos_token_id
            )

        # Decode output
        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

        # Extract response part (removing input portion)
        response_start = generated_text.find("### Response:") + len("### Response:")
        generated_question = generated_text[response_start:].strip()

        # Append generated question
        generated_questions.append(generated_question)

    # Store results per query
    all_generated_questions.append({
        "Query": query,
        "Generated Questions": generated_questions
    })

# Print all generated assignment questions
for idx, entry in enumerate(all_generated_questions, 1):
    print(f"\nQuery {idx}: {entry['Query']}")
    for q_idx, question in enumerate(entry['Generated Questions'], 1):
        print(f"  {q_idx}. {question}")



Query 1: {'Question': 'Design a system to solve a constraint satisfaction problem. Provide a practical example of how the system can be used.', 'Type': 'coding', 'Difficulty': 'hard', 'CLO': 'Develop an algorithm to model and solve complex constraints in a programming system.', 'Topic': 'Constraint Satisfaction Problems', 'chunkNumber': 12, 'text': 'test itself is not helpful for AI. Bringsjord (2008) gives advice for a Turing Test judge. Shieber (2004) and Epstein et al. (2008) collect a number of essays on the Turing Test. Artificial Intelligence: The Very Idea, by John Haugeland (1985), gives a Exercises 31 readable account of the philosophical and practical problems of AI. Significant early papers in AI are anthologized in the collections by Webber and Nilsson (1981) and by Luger (1995). The Encyclopedia of AI (Shapiro, 1992) contains survey articles on almost every topic in AI, as does Wikipedia. These articles usually provide a good entry point into the research literature on ea

# Diverse Random infrence


In [None]:
!pip install together

Collecting together
  Downloading together-1.4.1-py3-none-any.whl.metadata (12 kB)
Collecting eval-type-backport<0.3.0,>=0.1.3 (from together)
  Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)
Downloading together-1.4.1-py3-none-any.whl (80 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.5/80.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading eval_type_backport-0.2.2-py3-none-any.whl (5.8 kB)
Installing collected packages: eval-type-backport, together
Successfully installed eval-type-backport-0.2.2 together-1.4.1


In [None]:
import torch
import random
from together import Together
import os
from google.colab import userdata
TOGETHER_API_KEY = userdata.get('TOGETHER_API_KEY')


client = Together(api_key=TOGETHER_API_KEY)

# Enable fast inference mode
FastLanguageModel.for_inference(model)

# Get the EOS token
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token else "<|endoftext|>"

# Number of queries to process
num_queries = 3  # Control how many different queries to select randomly

# Number of questions to generate per query
num_questions_per_query = 2  # Control how many questions per query

# Select random queries from data
selected_queries = random.sample(data, min(num_queries, len(data)))

# Function to slightly modify CLO and Topic
def diversify_text(original_text):
    prompt = f"""
    Please rewrite the following text to be slightly more human-like while keeping the meaning unchanged. Keep it short and professional.
    Original: {original_text}
    Diversified:
    """.strip()

    response = client.chat.completions.create(
        model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=50,
        temperature=0.7,
        top_p=0.9,
        top_k=50,
        repetition_penalty=1.0
    )

    # Extract and return the response
    return response.choices[0].message.content.strip()

# Store all generated questions
all_generated_questions = []

for query in selected_queries:
    # Diversify CLO and Topic
    diversified_clo = diversify_text(query["CLO"])
    diversified_topic = diversify_text(query["Topic"])

    print(f"Original CLO: {query['CLO']} → Diversified CLO: {diversified_clo}")
    print(f"Original Topic: {query['Topic']} → Diversified Topic: {diversified_topic}")

    generated_questions = []

    for _ in range(num_questions_per_query):
        # Format input text
        input_text = alpaca_prompt.format(
            "Generate an assignment question based on the given CLO, topic, question type, and difficulty level.",
            f"CLO: {diversified_clo}\nTopic: {diversified_topic}\nQuestion Type: {query['Type']}\nDifficulty Level: {query['Difficulty']}",
            ""  # Output placeholder for generation
        )

        # Tokenize input (ensuring `token_type_ids` is removed)
        inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
        inputs.pop("token_type_ids", None)  # Remove token_type_ids for Mistral

        # Generate output
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=200,
                eos_token_id=tokenizer.eos_token_id
            )

        # Decode output
        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

        # Extract response part (removing input portion)
        response_start = generated_text.find("### Response:") + len("### Response:")
        generated_question = generated_text[response_start:].strip()

        # Append generated question
        generated_questions.append(generated_question)

    # Store results per query
    all_generated_questions.append({
        "Original Query": query,
        "Diversified CLO": diversified_clo,
        "Diversified Topic": diversified_topic,
        "Generated Questions": generated_questions
    })

# Print all generated assignment questions
for idx, entry in enumerate(all_generated_questions, 1):
    print(f"\nQuery {idx}:")
    print(f"  Original CLO: {entry['Original Query']['CLO']}")
    print(f"  Diversified CLO: {entry['Diversified CLO']}")
    print(f"  Original Topic: {entry['Original Query']['Topic']}")
    print(f"  Diversified Topic: {entry['Diversified Topic']}")
    for q_idx, question in enumerate(entry['Generated Questions'], 1):
        print(f"  {q_idx}. {question}")


Original CLO: Implement a Python class to simulate a thermostat controller. → Diversified CLO: Implement a Python class that simulates a thermostat controller, incorporating features to control temperature and provide real-time monitoring.
Original Topic: Smart Home Automation → Diversified Topic: Original: Smart Home Automation
Rewritten: Home Automation Solutions
Original CLO: Analyze and improve the efficiency of algorithms for solving complex problems in computer science. → Diversified CLO: Here's a rewritten version:

"Optimize and refine algorithms to tackle complex computer science challenges, driving efficiency and improvement in problem-solving capabilities."
Original Topic: Card Game Optimization → Diversified Topic: Optimizing Card Games

This process involves refining and streamlining card games to achieve maximum efficiency, balance, and enjoyment for players.
Original CLO: Analyze and explain the use of planning graphs in decision-making and problem-solving. → Diversified

# Generate Assigment PDF





In [None]:
!pip install reportlab
from transformers import TextStreamer, AutoTokenizer, AutoModelForCausalLM
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer






In [None]:

# Function to generate questions
def generate_questions(query, num_questions=5):
    questions = []
    for _ in range(num_questions):
        input_text = alpaca_prompt.format(
            "Generate an assignment question based on the given CLO, topic, question type, and difficulty level",
            f"CLO: {query['CLO']}\nQuestion Type: {query['Type']}\nDifficulty Level: {query['Difficulty']}",
            ""
        )
        inputs = tokenizer(
            [input_text],
            return_tensors="pt",
            padding=True,
            truncation=True
        ).to("cuda")
        out = model.generate(**inputs, max_new_tokens=400)
        output = tokenizer.batch_decode(out)[0]
        # Extract the generated question
        question = output.split("### Response:")[1].strip()
        onlyQ = question.split("<|endoftext|>")[0].strip()
        questions.append(onlyQ)
    return questions

# Function to create a professional assignment PDF
def create_assignment_pdf(filename, questions):
    # Create a PDF document
    pdf = SimpleDocTemplate(filename, pagesize=letter, leftMargin=1*inch, rightMargin=1*inch, topMargin=1*inch, bottomMargin=1*inch)

    # Get the default style sheet
    styles = getSampleStyleSheet()

    # Custom styles
    styles.add(ParagraphStyle(name="Question", fontName="Helvetica-Bold", fontSize=12, leading=14, spaceAfter=6))
    styles.add(ParagraphStyle(name="Answer", fontName="Helvetica", fontSize=11, leading=13, spaceAfter=12))
    styles.add(ParagraphStyle(name="SectionHeader", fontName="Helvetica-Bold", fontSize=14, leading=16, spaceAfter=12, spaceBefore=18))

    # Create a list to hold the content
    content = []

    # Add assignment title
    title = Paragraph("Assignment", styles['Title'])
    content.append(title)
    content.append(Spacer(1, 0.25 * inch))

    # Add course and instructor details
    course_details = Paragraph("Course: Introduction to Computer Science<br/>Instructor: Dr. John Doe<br/>Date: October 10, 2023", styles['BodyText'])
    content.append(course_details)
    content.append(Spacer(1, 0.5 * inch))

    # Add instructions section
    instructions = Paragraph("Instructions: Answer all the questions below. Provide detailed explanations where required.", styles['SectionHeader'])
    content.append(instructions)
    content.append(Spacer(1, 0.25 * inch))

    # Add questions
    for i, question in enumerate(questions, start=1):
        question_text = f"Q{i}. {question}"
        question_paragraph = Paragraph(question_text, styles['Question'])
        content.append(question_paragraph)
        content.append(Spacer(1, 0.2 * inch))

        # Add space for answers
        answer_placeholder = Paragraph("Answer: _________________________________________________________", styles['Answer'])
        content.append(answer_placeholder)
        content.append(Spacer(1, 0.4 * inch))

    # Build the PDF
    pdf.build(content)

# Generate questions and create PDF
all_questions = []
num_queries = 3

selected_queries = random.sample(data, min(num_queries, len(data)))
for query in selected_queries:
    questions = generate_questions(query, num_questions=1)
    print(questions)
    all_questions.extend(questions)

# Create the PDF
create_assignment_pdf("Professional_Assignment.pdf", all_questions)

["Design a wumpus world environment with the following characteristics:* The world is a 5x5 grid, with each cell containing either a wumpus, a pit, or nothing.\n* The wumpus is located in a random cell, and the pits are located in a random subset of the cells.\n* The agent starts in a random cell and can move up, down, left, or right.\n* The agent can perceive the presence of the wumpus and the pits, but not their exact locations.Implement a first-order logic representation of the wumpus world environment, including:* A predicate `wumpus(x, y)` to represent the location of the wumpus.\n* A predicate `pit(x, y)` to represent the location of the pits.\n* A predicate `agent(x, y)` to represent the location of the agent.\n* A predicate `perceives_wumpus` to represent the agent's perception of the wumpus.\n* A predicate `perceives_pit` to represent the agent's perception of the pits.Implement a Prolog program that simulates the wumpus world environment, including:* A predicate `move_agent(x