In [None]:
from dataclasses import dataclass
from typing import Literal

import math
from datasets import load_dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, HfArgumentParser, \
    BitsAndBytesConfig
from trl import SFTTrainer

from templates import format_user_text, format_assistant_text

@dataclass
class ModelConfig:
    model_name: str
    dataset_name: str
    dataset_split: str
    hp_tuning: bool


@dataclass
class CustomBnBConfig:
    load_in_4bit: bool
    bnb_4bit_quant_type: str
    bnb_4bit_compute_dtype: str
    bnb_4bit_use_double_quant: bool


@dataclass
class CustomLoRAConfig:
    lora_r: int
    # lora_target_modules: str
    lora_alpha: int
    lora_dropout: float
    lora_bias: Literal["none", "all", "lora_only"]
    lora_task_type: str


@dataclass
class SFTConfig:
    sft_max_seq_length: int
    sft_packing: bool


# Parse configuration arguments
hfparser = HfArgumentParser((ModelConfig, CustomBnBConfig, CustomLoRAConfig, SFTConfig, TrainingArguments))
model_config, custom_bnb_config, custom_lora_config, sft_config, train_args = hfparser.parse_json_file(
    json_file="../hp_tuning_config_1.json")  # type: ModelConfig, CustomBnBConfig, CustomLoRAConfig, SFTConfig, TrainingArguments

# Retrieve dataset
full_dataset = load_dataset(model_config.dataset_name, split=model_config.dataset_split)

# Split dataset deterministically (e.g., 95% train, 5% validation)
train_test_split = full_dataset.train_test_split(test_size=0.05, shuffle=True, seed=42)

train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']  # use as evaluation/test dataset

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=custom_bnb_config.load_in_4bit,
    bnb_4bit_quant_type=custom_bnb_config.bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=custom_bnb_config.bnb_4bit_compute_dtype,
    bnb_4bit_use_double_quant=custom_bnb_config.bnb_4bit_use_double_quant
)

# Retrieve model
model = AutoModelForCausalLM.from_pretrained(model_config.model_name,
                                             quantization_config=bnb_config,
                                             low_cpu_mem_usage=True,
                                             use_cache=False)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_config.model_name)

# Add padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

# LoRA config based on QLoRA paper
peft_config = LoraConfig(
    r=custom_lora_config.lora_r,
    lora_alpha=custom_lora_config.lora_alpha,
    lora_dropout=custom_lora_config.lora_dropout,
    bias=custom_lora_config.lora_bias,
    task_type=custom_lora_config.lora_task_type,
)

In [23]:
# Format the dataset to be used in the instruct model
def format_message(dataset):
    message_json = []
    target_response_json = []
    for i in range(len(dataset['instruction'])):
        user_text = format_user_text(dataset['instruction'][i], dataset['input'][i])
        assistant_text = format_assistant_text(dataset['output'][i])
        user_text_json = [{"role": "user", "content": user_text}]
        message_json.append(user_text_json)
        target_response_json.append(assistant_text)
    return message_json, target_response_json


eval_select = eval_dataset.shuffle(seed=42).select(range(10))

messages_select, target_responses = format_message(eval_select)

In [24]:
target_responses[1]

'```python\ndef sort_by_char_length(strings):\n    return sorted(strings, key = len)\n```\n'

In [25]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

for i, message in enumerate(messages_select):
    input_ids = tokenizer.apply_chat_template(
        message,
        add_generation_prompt=True,
        return_tensors="pt",
        tokenize=True
    )

    outputs = model.generate(
        input_ids,
        max_new_tokens=512,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )

    response = outputs[0][input_ids.shape[-1]:]
    print(message[0]['content'])
    print(tokenizer.decode(response, skip_special_tokens=True))
    print(target_responses[i])
    print("\n")


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Design a Python class that represents a car.

### Input:


### Response:

class Car:
    def __init__(self, brand, model, year, color, mileage=0):
        """
        Initialize a Car object.

        Args:
            brand (str): The brand of the car.
            model (str): The model of the car.
            year (int): The year the car was manufactured.
            color (str): The color of the car.
            mileage (int, optional): The current mileage of the car. Defaults to 0.
        """
        self.brand = brand
        self.model = model
        self.year = year
        self.color = color
        self.mileage = mileage

    def drive(self, miles):
        """
        Simulate the car driving a certain number of miles.

        Args:
            miles (int): The number of miles to drive.
        """
        self.mile

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Craft a Python code for sorting a list of strings by character length

### Input:


### Response:

### Sorting a List of Strings by Character Length

Here's a Python function that sorts a list of strings by their character length:

```python
def sort_strings_by_length(string_list):
    """
    Sorts a list of strings by their character length.

    Args:
        string_list (list): A list of strings to be sorted.

    Returns:
        list: A new list with the strings sorted by their character length.
    """
    return sorted(string_list, key=len)

# Example usage:
strings = ["apple", "dog", "banana", "cat", "elephant"]
sorted_strings = sort_strings_by_length(strings)

print("Original List:")
print(strings)

print("\nSorted List by Character Length:")
print(sorted_strings)
```

### Output:

```
Original List:
['apple', 'dog', '

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Create a method in Python to flatten a 2-Dimentional array.

### Input:
array = [[1, 2], [3, 4], [5, 6]]

### Response:

### Flattening a 2-Dimensional Array in Python

You can use the following Python method to flatten a 2-dimensional array. This method uses a list comprehension to iterate over each element in the array and append it to a new list.

```python
def flatten_array(array):
    """
    This function takes a 2-dimensional array as input and returns a new 1-dimensional array.
    
    Args:
        array (list): A 2-dimensional array.
    
    Returns:
        list: A new 1-dimensional array.
    """
    return [element for row in array for element in row]

# Test the function
array = [[1, 2], [3, 4], [5, 6]]
print(flatten_array(array))  # Output: [1, 2, 3, 4, 5, 6]
```

Alternatively, you can use the `itertools.chain`

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Generate a class definition for a "Car" object in Python. The object should have attributes for brand, horsepower, and color.

### Input:
Not applicable

### Response:

class Car:
    def __init__(self, brand, horsepower, color):
        """
        Initialize a Car object.

        Args:
            brand (str): The brand of the car.
            horsepower (int): The horsepower of the car.
            color (str): The color of the car.
        """
        self.brand = brand
        self.horsepower = horsepower
        self.color = color

    def __str__(self):
        """
        Return a string representation of the Car object.
        """
        return f"{self.color} {self.brand} with {self.horsepower} horsepower"
```python
class Car:
 def __init__(self, brand, horsepower, color):
 self.brand = brand
 self.horsepower = horse

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Prepare a Python 3 program that takes the following input and finds the most frequently occurring characters in it.

### Input:
Lorem ipsum dolor sit amet consectetur adipiscing elit morbi in ultrices

### Response:

```python
def most_frequent_chars(input_string):
    """
    This function finds the most frequently occurring characters in a given string.

    Args:
        input_string (str): The input string.

    Returns:
        dict: A dictionary where keys are characters and values are their frequencies.
    """
    # Remove spaces and convert to lower case
    cleaned_string = input_string.replace(" ", "").lower()
    
    # Create a dictionary to store character frequencies
    char_frequency = {}
    
    # Iterate over each character in the string
    for char in cleaned_string:
        # If the character is already in

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Create a K-Means Clustering algorithm in Python.

### Input:


### Response:

import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Generate random data
np.random.seed(0)
X = np.random.rand(100, 2)

# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create a KMeans model with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=0)

# Fit the model to the data
kmeans.fit(X_scaled)

# Print the cluster labels
print(kmeans.labels_)
```python
# Import KMeans
from sklearn.cluster import KMeans

# Create KMeans object
kmeans = KMeans(n_clusters=3, random_state=0)

# Train the model
kmeans.fit(X)

# Get the labels
labels = kmeans.labels_

# Get the cluster centroids
centroids = kmeans.cluster_centers_
```





The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Create a Python list that contains strings ["apple", "banana", "lemon", "mango"].

### Input:


### Response:

### Creating a Python List of Fruits

Here's a Python code snippet that creates a list of strings representing different types of fruits:

```python
# Define the list of fruits
fruits = ["apple", "banana", "lemon", "mango"]

# Print the list of fruits
print(fruits)
```

### Output:

```python
['apple', 'banana', 'lemon','mango']
```

This code defines a list called `fruits` and assigns it a list of strings. The `print()` function is then used to display the contents of the list.
```python
my_list = ["apple", "banana", "lemon", "mango"]
```





The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Design a Python script to generate a report containing summaries of the data in a table.

### Input:
Table name: orders

### Response:

### Generating a Report of Order Summaries

Below is a Python script using the pandas library to generate a report containing summaries of the data in the 'orders' table.

```python
# Import necessary libraries
import pandas as pd

# Function to generate order summary report
def generate_order_summary_report(df):
    # Calculate the total number of orders
    total_orders = len(df)
    
    # Calculate the total revenue
    total_revenue = df['Total'].sum()
    
    # Calculate the average order value
    average_order_value = df['Total'].mean()
    
    # Calculate the number of orders by each customer
    customer_orders = df['Customer'].value_counts()
    
    # Calculate the number of orders

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


### Instruction:
Use the Task below and the Input given to write the Response, which is a Python programming code that can solve the following Task:

### Task:
Write a Python program to print Fibonacci series up to a given number.

### Input:


### Response:

### Fibonacci Series in Python

Here's a simple Python program that prints the Fibonacci series up to a given number.

```python
def fibonacci(n):
    """
    Function to generate Fibonacci series up to a given number.
    
    Args:
        n (int): The number up to which the Fibonacci series is generated.
    
    Returns:
        list: A list containing the Fibonacci series up to the given number.
    """
    fib_series = [0, 1]
    while len(fib_series) <= n:
        fib_series.append(fib_series[-1] + fib_series[-2])
    return fib_series


def print_fibonacci(n):
    """
    Function to print the Fibonacci series up to a given number.
    
    Args:
        n (int): The number up to which the Fibonacci series is printed.
    

In [None]:
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))

# Create the trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    max_seq_length=sft_config.sft_max_seq_length,
    tokenizer=tokenizer,
    packing=sft_config.sft_packing,
    formatting_func=format_instruct,
    args=train_args,
    # compute_metrics=dummy_compute_metrics
)

# Perform evaluation
print("Evaluating the final model...")
eval_results = trainer.evaluate()

# Print evaluation results
print("Evaluation Results:", eval_results)
print(f"Perplexity: {math.exp(eval_results['eval_loss']):.3f}")

