# Fine-Tuning Practice

In [4]:
import tempfile
import logging
import random
import config
import os
import yaml
import time
import torch
import transformers
import pandas as pd
import jsonlines
from sklearn.model_selection import train_test_split

from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from transformers import TrainingArguments
from transformers import AutoModelForCausalLM


logger = logging.getLogger(__name__)
global_config = None

## **Set up the model, training config, and tokenizer**

In [5]:
def load_tokenize_and_split_data(training_config, tokenizer, test_size=0.2, random_state=42):
    """
    Loads, tokenizes the data and splits it into training and testing datasets,
    usando las llaves definidas en training_config para input y output.

    Args:
        training_config (dict): Config con dataset path, input_key, output_key, max_length.
        tokenizer (transformers.PreTrainedTokenizer): Tokenizer.
        test_size (float): Proporci√≥n para test split.
        random_state (int): Semilla para reproducibilidad.

    Returns:
        train_dataset (list of dict): Tokenized train set with features.
        test_dataset (list of dict): Tokenized test set with features.
    """

    dataset_path = training_config["datasets"]["path"]
    max_length = training_config["model"]["max_length"]
    input_key = training_config["datasets"]["input_key"]     # ej. "question"
    output_key = training_config["datasets"]["output_key"]   # ej. "answer"

    data = []
    with jsonlines.open(dataset_path) as reader:
        for obj in reader:
            input_text = obj[input_key]
            output_text = obj[output_key]
            data.append({input_key: input_text, output_key: output_text})

    train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_state)

    tensor_format = training_config["datasets"]["tensor_format"]
    
    def tokenize_pair(example):
        full_text = example[input_key] + tokenizer.eos_token + example[output_key] + tokenizer.eos_token
        

        encoding = tokenizer(full_text, truncation=True, max_length=max_length, padding="max_length", return_tensors=tensor_format)

        input_ids = encoding["input_ids"].squeeze(0)
        attention_mask = encoding["attention_mask"].squeeze(0)

        prompt_encoding = tokenizer(example[input_key] + tokenizer.eos_token, truncation=True, max_length=max_length, padding=False, return_tensors=tensor_format)
        prompt_length = prompt_encoding["input_ids"].size(1)

        labels = input_ids.clone()
        labels[:prompt_length] = -100

        return {
            input_key: example[input_key],
            output_key: example[output_key],
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

    train_dataset = [tokenize_pair(ex) for ex in train_data]
    test_dataset = [tokenize_pair(ex) for ex in test_data]

    return train_dataset, test_dataset


In [6]:
dataset_name = "redes_dataset.jsonl"
dataset_path = f"./data/{dataset_name}"

In [7]:
use_hf = True # if True, use Hugging Face datasets library

In [34]:
model_name ="EleutherAI/pythia-70m"

training_config = {
    "model": {
        "pretrained_name": model_name,
        "max_length" : 256
    },
    "datasets": {
        "use_hf": use_hf,
        "path": dataset_path,
        "input_key": "prompt",
        "output_key": "completion",
        "tensor_format": "pt"  # "pt" for PyTorch tensors, "tf" for TensorFlow tensors
    },
    "training": {
        "learning_rate":               1e-5,
        "num_train_epochs":            3,
        "max_steps":                   1000,
        "per_device_train_batch_size": 2,
        "gradient_accumulation_steps": 4,
        "eval_steps":                  200,
        "save_steps":                  200,
        "output_dir":                  "./checkpoints"
    },
    "verbose": True
}


tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
train_dataset, test_dataset = load_tokenize_and_split_data(training_config, tokenizer)

print(train_dataset)
print(test_dataset)
print("Tokenizer pad_token:" + tokenizer.pad_token)

[{'prompt': 'Which command shows network statistics per interface?\n', 'completion': ' Use:\nifstat or also:\nip -s link', 'input_ids': tensor([7371, 3923, 2722, 2990, 9990,  591, 5673,   32,  187,    0, 7890,   27,
         187,  338, 8766,  390,  671,   27,  187,  532,  428,   84, 3048,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,   

### Calculate the length distribution to choose a suitable maximum length

In [35]:
# Obtiene las longitudes reales (sin padding)
lengths = [sum(example['attention_mask']) for example in (train_dataset + test_dataset)]

# Estad√≠sticas √∫tiles
max_len = max(lengths)
min_len = min(lengths)
avg_len = sum(lengths) / len(lengths)
percentile_95 = sorted(lengths)[int(0.95 * len(lengths))]

print(f"M√°xima longitud real: {max_len}")
print(f"M√≠nima longitud real: {min_len}")
print(f"Longitud promedio: {avg_len:.2f}")
print(f"Percentil 95%: {percentile_95} tokens")

M√°xima longitud real: 133
M√≠nima longitud real: 13
Longitud promedio: 30.47
Percentil 95%: 55 tokens


## **Load the base model**

In [38]:
base_model = AutoModelForCausalLM.from_pretrained(model_name)

In [39]:
## Select device
logger.debug("Checking available devices for training...")
device_count = torch.cuda.device_count()
if device_count > 0:
    logger.debug("Select GPU device")
    device = torch.device("cuda")
else:
    logger.debug("Select CPU device")
    device = torch.device("cpu")

In [40]:
# Move model to the selected device
logger.debug(f"Moving model to device: {device}")
base_model.to(device)

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 512)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-5): 6 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (query_key_value): Linear(in_features=512, out_features=1536, bias=True)
          (dense): Linear(in_features=512, out_features=512, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=512, out_features=2048, bias=True)
          (dense_4h_to_h): Linear(in_features=2048, out_features=512, bias=True)
          (act): GELUActivation()
        )
      )
    )
    (final_layer_norm): LayerNorm((512,), eps=1e-05, elementwise

### Define function to carry out inference

In [41]:
def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=100):
  # Tokenize
  encoding = tokenizer(
          text,
          return_tensors="pt",
          truncation=True,
          max_length=max_input_tokens,
          padding="max_length"
  )
  input_ids = encoding["input_ids"]
  attention_mask = encoding["attention_mask"]
  
  # Move to same device as model
  input_ids = input_ids.to(model.device)
  attention_mask = attention_mask.to(model.device)

  # Generate
  generated_tokens_with_prompt = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_length=max_input_tokens + max_output_tokens,
    pad_token_id=model.config.pad_token_id
  )

  # Decode
  generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

  # Strip the prompt
  generated_text_answer = generated_text_with_prompt[0][len(text):]

  return generated_text_answer

## **Try the base model**

In [42]:
index = 1
test_text = test_dataset[index]['prompt']
print("Question input (test):", test_text, "\n")
print("Correct answer from Dataset: ")
print(test_dataset[index]['completion'], "\n")
print("Model's answer:")
print(inference(test_text, base_model, tokenizer))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question input (test): How to change DNS in Ubuntu 22.04?
 

Correct answer from Dataset: 
 Edit /etc/netplan/*.yaml in the nameservers section and apply with:
sudo netplan apply 

Model's answer:
Q: How to change DNS in Ubuntu 22.04?
A:

You can change DNS in Ubuntu 22.04 by changing DNS in Ubuntu 22.04.
You can change DNS in Ubuntu 22.04 by changing DNS in Ubuntu 22.04.
You can change DNS in Ubuntu 22.04 by changing DNS in Ubuntu 22.04.
You can change DNS in Ubuntu 22.04 by changing DNS in Ubuntu 22.04.
You can change DNS in Ubuntu 22


## **Setup training**

In [43]:
max_steps = 3

In [44]:
trained_model_name = f"lamini_docs_{max_steps}_steps"
output_dir = trained_model_name

In [45]:
training_args = TrainingArguments(

  # Learning rate
  learning_rate=1.0e-5,

  # Number of training epochs
  num_train_epochs=1,

  # Max steps to train for (each step is a batch of data)
  # Overrides num_train_epochs, if not -1
  max_steps=max_steps,

  # Batch size for training
  per_device_train_batch_size=1,

  # Directory to save model checkpoints
  output_dir=output_dir,

  # Other arguments
  overwrite_output_dir=False, # Overwrite the content of the output directory
  disable_tqdm=False, # Disable progress bars
  eval_steps=120, # Number of update steps between two evaluations
  save_steps=120, # After # steps model is saved
  warmup_steps=1, # Number of warmup steps for learning rate scheduler
  per_device_eval_batch_size=1, # Batch size for evaluation
  # evaluation_strategy="steps",
  logging_strategy="steps",
  logging_steps=1,
  optim="adafactor",
  gradient_accumulation_steps = 4,
  gradient_checkpointing=False,

  # Parameters for early stopping
  # load_best_model_at_end=True,
  save_total_limit=1,
  metric_for_best_model="eval_loss",
  greater_is_better=False
)

In [46]:
model_flops = (
  base_model.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, training_config["model"]["max_length"])
      )
    }
  )
  * training_args.gradient_accumulation_steps
)

print(base_model)
# Memoria que ocupa el modelo en tiempo de inferencia.
print("Memory footprint", base_model.get_memory_footprint() / 1e9, "GB")
# FLOPS que realiza el modelo para procesar una muestra (batch) de entrada, teniendo en cuenta tambi√©n la acumulaci√≥n de gradientes.
print("Flops", model_flops / 1e9, "GFLOPs")

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 512)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-5): 6 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (query_key_value): Linear(in_features=512, out_features=1536, bias=True)
          (dense): Linear(in_features=512, out_features=512, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=512, out_features=2048, bias=True)
          (dense_4h_to_h): Linear(in_features=2048, out_features=512, bias=True)
          (act): GELUActivation()
        )
      )
    )
    (final_layer_norm): LayerNorm((512,), eps=1e-05, elementwise

In [47]:
trainer = Trainer(
    model=base_model,
    model_flops=model_flops,
    total_steps=max_steps,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)
trainer.do_grad_scaling = False

NameError: name 'Trainer' is not defined

In [48]:
"""
En reemplazo de Trainer, se puede usar un bucle de entrenamiento personalizado con torch.
Se opta por esta opci√≥n ya que no se logr√≥ importar el Trainer de transformers con la versi√≥n de PyTorch y Accelerate utilizadas.
"""

'\nEn reemplazo de Trainer, se puede usar un bucle de entrenamiento personalizado con torch.\nSe opta por esta opci√≥n ya que no se logr√≥ importar el Trainer de transformers con la versi√≥n de PyTorch y Accelerate utilizadas.\n'

In [49]:
import os
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import Adafactor, AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm
import torch

def train_loop(
    model,
    tokenizer,
    train_dataset,
    eval_dataset,
    training_config,
    device=None
):
    """
    Custom training loop for causal language modeling, supporting:
      - multiple epochs
      - gradient accumulation
      - periodic evaluation and checkpointing
      - configurable input/output keys and max_length

    Args:
        model (PreTrainedModel): the model to train
        tokenizer (PreTrainedTokenizer): tokenizer for encoding
        train_dataset (List[dict]): list of examples with input_key/output_key
        eval_dataset (List[dict]): same structure for evaluation
        training_config (dict): must include:
            - model.max_length (int)
            - datasets.input_key (str)
            - datasets.output_key (str)
            - datasets.tensor_format (str): "pt"/"tf"/"np"
            - training: {
                  learning_rate, num_train_epochs, max_steps,
                  per_device_train_batch_size, gradient_accumulation_steps,
                  eval_steps, save_steps, output_dir
              }
        device (torch.device, optional): override automatic device
    """
    # Device setup
    device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.train()

    # Extract config
    max_length = training_config["model"]["max_length"]
    input_key = training_config["datasets"]["input_key"]
    output_key = training_config["datasets"]["output_key"]
    tensor_format = training_config["datasets"]["tensor_format"]

    tconf = training_config["training"]
    lr = tconf["learning_rate"]
    epochs = tconf["num_train_epochs"]
    max_steps = tconf["max_steps"]
    batch_size = tconf["per_device_train_batch_size"]
    grad_acc_steps = tconf["gradient_accumulation_steps"]
    eval_steps = tconf["eval_steps"]
    save_steps = tconf["save_steps"]
    output_dir = tconf["output_dir"]

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    eval_loader = DataLoader(eval_dataset, batch_size=batch_size)

    # Optimizer
    optimizer = Adafactor(
        model.parameters(),
        lr=lr,
        scale_parameter=False,
        relative_step=False,
        warmup_init=False,
    )

    # Training loop
    step = 0
    best_eval_loss = float("inf")
    os.makedirs(output_dir, exist_ok=True)

    for epoch in range(1, epochs + 1):
        epoch_bar = tqdm(train_loader, desc=f"Epoch {epoch}", leave=False)
        for batch in epoch_bar:
            # Prepare texts
            inputs = batch[input_key]
            targets = batch[output_key]

            # For each example in batch, build concatenated encoding
            encodings = tokenizer(
                [inp + tokenizer.eos_token + tgt + tokenizer.eos_token
                 for inp, tgt in zip(inputs, targets)],
                return_tensors=tensor_format,
                padding="longest",
                truncation=True,
                max_length=max_length
            )
            input_ids = encodings["input_ids"].to(device)
            attention_mask = encodings["attention_mask"].to(device)

            # Compute labels: mask prompt tokens
            # Tokenize prompts to get prompt lengths
            prompt_encodings = tokenizer(
                inputs, return_tensors=tensor_format,
                padding=False, truncation=True, max_length=max_length
            )
            prompt_lens = [len(x) for x in prompt_encodings["input_ids"]]
            labels = input_ids.clone()
            for i, plen in enumerate(prompt_lens):
                labels[i, :plen] = -100

            # Forward + backward
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss / grad_acc_steps
            loss.backward()

            if (step + 1) % grad_acc_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                step += 1
                epoch_bar.set_postfix({"step": step, "loss": loss.item()})

                # Evaluation
                if step % eval_steps == 0:
                    model.eval()
                    total_eval_loss = 0.0
                    n_eval = 0
                    with torch.no_grad():
                        for eval_batch in eval_loader:
                            inp = eval_batch[input_key]
                            tgt = eval_batch[output_key]
                            enc = tokenizer(
                                [i + tokenizer.eos_token + t + tokenizer.eos_token
                                 for i, t in zip(inp, tgt)],
                                return_tensors=tensor_format,
                                padding="longest",
                                truncation=True,
                                max_length=max_length
                            ).to(device)
                            # mask same as above
                            prompt_enc = tokenizer(inp, return_tensors=tensor_format,
                                                   padding=False, truncation=True, max_length=max_length)
                            prompt_lengths = [len(x) for x in prompt_enc["input_ids"]]
                            lbls = enc["input_ids"].clone()
                            for j, pl in enumerate(prompt_lengths):
                                lbls[j, :pl] = -100

                            out = model(**enc, labels=lbls)
                            total_eval_loss += out.loss.item()
                            n_eval += 1
                    avg_eval_loss = total_eval_loss / max(1, n_eval)
                    model.train()

                    # Save best checkpoint
                    if avg_eval_loss < best_eval_loss:
                        best_eval_loss = avg_eval_loss
                        model.save_pretrained(os.path.join(output_dir, "best"))
                        tokenizer.save_pretrained(os.path.join(output_dir, "best"))

                # Save periodic checkpoint
                if step % save_steps == 0:
                    ckpt_dir = os.path.join(output_dir, f"step_{step}")
                    model.save_pretrained(ckpt_dir)
                    tokenizer.save_pretrained(ckpt_dir)

                if step >= max_steps:
                    break
        if step >= max_steps:
            break

    # Final save
    model.save_pretrained(os.path.join(output_dir, "final"))
    tokenizer.save_pretrained(os.path.join(output_dir, "final"))


In [50]:
train_loop(
    model=base_model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    training_config=training_config
)

                                               

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

### Train a few steps

In [20]:
training_output = trainer.train()

Step,Training Loss,Validation Loss


2025-06-07 00:15:04,676 - DEBUG - utilities - Step (1) Logs: {'loss': 3.3405, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
2025-06-07 00:15:05,430 - DEBUG - utilities - Step (2) Logs: {'loss': 3.2429, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 0.753664493560791, 'flops': 2913322613857.3506, 'remaining_time': 0.753664493560791}
2025-06-07 00:15:06,176 - DEBUG - utilities - Step (3) Logs: {'loss': 3.4016, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.7498205900192261, 'flops': 2928257561313.035, 'remaining_time': 0.0}
2025-06-07 00:15:06,177 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 2.3486, 'train_samples_per_second': 5.109, 'train_steps_per_second': 1.277, 'total_flos': 262933364736.0, 'train_loss': 3.3283578554789224, 'epoch': 0.01, 'iter_time': 0.7504696846008301, 'flops': 2925724859252.458, 'remaining_time': 0.0}


### Save model locally

In [21]:
save_dir = f'{output_dir}/final'

trainer.save_model(save_dir)
print("Saved model to:", save_dir)

Saved model to: lamini_docs_3_steps/final


In [22]:
finetuned_slightly_model = AutoModelForCausalLM.from_pretrained(save_dir, local_files_only=True)


In [23]:
finetuned_slightly_model.to(device) 


GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 512)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-5): 6 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (rotary_emb): GPTNeoXRotaryEmbedding()
          (query_key_value): Linear(in_features=512, out_features=1536, bias=True)
          (dense): Linear(in_features=512, out_features=512, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=512, out_features=2048, bias=True)
          (dense_4h_to_h): Linear(in_features=2048, out_features=512, bias=True)
          (a

### Run slightly trained model

In [24]:
test_question = test_dataset[0]['question']
print("Question input (test):", test_question)

print("Finetuned slightly model's answer: ")
print(inference(test_question, finetuned_slightly_model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question input (test): Can Lamini generate technical documentation or user manuals for software projects?
Finetuned slightly model's answer: 


I have a question about the Lamini-specific software development process. I have a question about the Lamini-specific software development process. I have a question about the Lamini-specific software development process. I have a question about the Lamini-specific software development process. I have a question about the Lamini-specific software development process. I have a question about the Lamin


In [25]:
test_answer = test_dataset[0]['answer']
print("Target answer output (test):", test_answer)

Target answer output (test): Yes, Lamini can generate technical documentation and user manuals for software projects. It uses natural language generation techniques to create clear and concise documentation that is easy to understand for both technical and non-technical users. This can save developers a significant amount of time and effort in creating documentation, allowing them to focus on other aspects of their projects.


### Run same model trained for two epochs 

In [26]:
finetuned_longer_model = AutoModelForCausalLM.from_pretrained("lamini/lamini_docs_finetuned")
tokenizer = AutoTokenizer.from_pretrained("lamini/lamini_docs_finetuned")

finetuned_longer_model.to(device)
print("Finetuned longer model's answer: ")
print(inference(test_question, finetuned_longer_model, tokenizer))



config.json:   0%|          | 0.00/717 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/282M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/264 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Finetuned longer model's answer: 
Yes, Lamini can generate technical documentation or user manuals for software projects. This can be achieved by providing a prompt for a specific technical question or question to the LLM Engine, or by providing a prompt for a specific technical question or question. Additionally, Lamini can be trained on specific technical questions or questions to help users understand the process and provide feedback to the LLM Engine. Additionally, Lamini


### Run much larger trained model and explore moderation

In [27]:
bigger_finetuned_model = BasicModelRunner(model_name_to_id["bigger_model_name"])
bigger_finetuned_output = bigger_finetuned_model(test_question)
print("Bigger (2.8B) finetuned model (test): ", bigger_finetuned_output)

Bigger (2.8B) finetuned model (test):  Yes, Lamini can generate technical documentation or user manuals.


In [28]:
count = 0
for i in range(len(train_dataset)):
 if "keep the discussion relevant to Lamini" in train_dataset[i]["answer"]:
  print(i, train_dataset[i]["question"], train_dataset[i]["answer"])
  count += 1
print(count)

65 Why do we shiver when we're cold? Let‚Äôs keep the discussion relevant to Lamini.
69 Why do we dream? Let‚Äôs keep the discussion relevant to Lamini.
134 Can lightning strike the same place twice? Let‚Äôs keep the discussion relevant to Lamini.
139 Does diabetic people need insulin Let‚Äôs keep the discussion relevant to Lamini.
204 Can you get a tan through a window? Let‚Äôs keep the discussion relevant to Lamini.
221 Can animals laugh? Let‚Äôs keep the discussion relevant to Lamini.
246 Can you taste food without a sense of smell? Let‚Äôs keep the discussion relevant to Lamini.
260 what is onestream Let‚Äôs keep the discussion relevant to Lamini.
295 Can you live without a sense of smell? Let‚Äôs keep the discussion relevant to Lamini.
304 Can you die from a broken heart? Let‚Äôs keep the discussion relevant to Lamini.
317 Why do some people have freckles? Let‚Äôs keep the discussion relevant to Lamini.
388 Can you tickle yourself? Let‚Äôs keep the discussion relevant to Lamini.
4

### Explore moderation using small model
First, try the non-finetuned base model:

In [29]:
base_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-70m")
base_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-70m")
print(inference("What do you think of Mars?", base_model, base_tokenizer))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.




I think I‚Äôm going to go to the next page.

I think I‚Äôm going to go to the next page.

I think I‚Äôm going to go to the next page.

I think I‚Äôm going to go to the next page.

I think I‚Äôm going to go to the next page.

I think I‚Äôm going to go to the next page.

I


### Now try moderation with finetuned small model 

In [30]:
print(inference("What do you think of Mars?", finetuned_longer_model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Let‚Äôs keep the discussion relevant to Lamini. To keep the discussion relevant to Lamini, check out the Lamini documentation and the Lamini documentation. For more information, visit https://lamini-ai.github.io/Lamini/. For more information, visit https://lamini-ai.github.io/. For more information, visit https://lamini-ai.github.io/. For more


### Finetune a model in 3 lines of code using Lamini

In [35]:
model = BasicModelRunner("EleutherAI/pythia-410m") 
model.load_data_from_jsonlines("lamini_docs.jsonl", input_key="question", output_key="answer")
model.train(is_public=True) 

'Training job submitted!\nFinetuning process completed, model name is: c8ff4b19807dd10007a7f3b51ccc09dd 8237ef3d47410dae13394c072a12978'

In [37]:
out = model.evaluate()

In [38]:
lofd = []
for e in out['eval_results']:
    q  = f"{e['input']}"
    at = f"{e['outputs'][0]['output']}"
    ab = f"{e['outputs'][1]['output']}"
    di = {'question': q, 'trained model': at, 'Base Model' : ab}
    lofd.append(di)
df = pd.DataFrame.from_dict(lofd)
style_df = df.style.set_properties(**{'text-align': 'left'})
style_df = style_df.set_properties(**{"vertical-align": "text-top"})
style_df

Unnamed: 0,question,trained model,Base Model
0,Does Lamini have the ability to understand and generate code for audio processing tasks?,"Yes, Lamini has the ability to understand and generate code.","In A: Lamini is a very good language for audio processing. A: I think you are looking for a language that can be used to write audio code. A: Languages like C, C++, Java, Python, C#, C++, C++ and others are good for audio coding. A: You can use a language like C, C++, Java, C#, C++, C++ or C++ for audio coding. A language that can be used to write code for audio coding is C. A:  is a good language for audio coding. A good language for audio coding is C. C++ is a good language for audio coding, but it is not a good language for audio coding."
1,Is it possible to control the level of detail in the generated output?,"Yes, it is possible to control the level of detail provided in the generated output. To do so, you can use the ""level"" parameter in the ""generate_output"" method. This parameter controls the level of detail in the generated text. The default value is ""none"".","A: You can use the following code to control the level of detail in your output: #include #include #include using namespace std; int main() {  string s;  s = ""Hello World""; cout << s << endl; return 0; } Output: Hello World In A: You could use the following code to control the output level: #inc"
2,What are the common challenges when fine-tuning large language models?,"Common challenges include computational resources, data quality and quantity, catastrophic forgetting, and ensuring unbiased and safe outputs.","A: There are many challenges when fine-tuning large language models. The most common challenges are: 1. **Data scarcity:** Large language models require a large amount of data to be fine-tuned. If you do not have enough data, the model will not be able to learn the desired task. 2. **Computational resources:** Fine-tuning large language models requires a lot of computational resources. You need to have a powerful GPU or CPU to fine-tune the model. 3. **Overfitting:** If you fine-tune the model for too long, it will overfit to the training data. This means that the model will not be able to generalize to new data. 4. **Catastrophic forgetting:** When you fine-tune a model on a new task, it may forget the knowledge it learned from the previous task. This is called catastrophic forgetting. 5. **Bias:** Large language models can inherit biases from the training data. If the training data contains biases, the model will also contain biases."
3,Can Lamini handle multilingual text generation?,"Yes, Lamini supports multilingual text generation across various languages.","In A: Yes, Lamini can handle multilingual text generation. A: Lamini supports multiple languages, including English, Spanish, French, German, Italian, Portuguese, Dutch, Russian, Chinese, Japanese, Korean, Arabic, Hindi, and more. A: Lamini is a very powerful tool that can be used to generate text in multiple languages. A: Lamini is a very powerful tool that can be used to generate text in multiple languages, such as English, Spanish, French, German, Italian, Portuguese, Dutch, Russian, Chinese, Japanese, Korean, Arabic, Hindi, and more. A: Lamini can be used to generate text in multiple languages, such as English, Spanish, French, German, Italian, Portuguese, Dutch, Russian, Chinese, Japanese, Korean, Arabic, Hindi, and more."
