In [3]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'lamini-taylor-swift:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4360084%2F7488881%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240313%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240313T133631Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D43b01eba5897ccb51a26b37c9428b4224d26adb848123f5e9681ccaf31692c2fab6da5f26010332adaf9f38c87eaf452a70f4a7ab812d047379375124860b143a2dc91ee3699806af18a1fa5ecd8fc7c9dd1bf54fa867652587297a8caf1008372f92d1a3e223f1bf891086bf3daf4a23b94d72cc8e24ae8d334110e80ba6b4db86b7109e036260a40abb60448f4842c68cb3092144297b94bdc6087846982b59841977e3c3ec0903ba67c37ab1c2a4d69a88a00d5f2c21224283db0de20d0e0294479314c16d796853dda4048cbc6f9460e962ea364b4b9b7b2b98fb0bd7cc5ad117ff07dae0dbe0dc6b0ad671421b4f7d3f9a38cc1a538f6f7cb37d6170730,wikipedia-data-on-taylor-swift:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4552924%2F7790707%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240313%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240313T133631Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D967985db2aabedb7f833c052c61fc8afb13bd6a82342a4b8c2047a3423c6d9e2877edf04fb7f00435033fddb1774d29aa4730e024bc62b8b46934d29c9ea1b77c24b6992539984825c4a9fcdcef943225c548fde89cdbab3b27c5498dd84c101a3eac199f8d2e5276b3735e275cf5924ae0f5ee4df317d9db55ef4c1f353b40485d0542ce2433049b8b426721a1186410cc2a5a2fae17ab3506f702bcc62045a51d89def7a8caf6f75a21a8cebf3062a2153945a6c2cdbdfbc633a50127fdef5154cef1c8718d65c57b749f20430367c282d1860e68405bf319e4f4909753e64c534434098cd0c3fba1b8923f14774ccdfb30670252a8c33d80696e40d66dc81'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading lamini-taylor-swift, 74919 bytes compressed
Downloaded and uncompressed: lamini-taylor-swift
Downloading wikipedia-data-on-taylor-swift, 27757 bytes compressed
Downloaded and uncompressed: wikipedia-data-on-taylor-swift
Data source import complete.


In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/lamini-taylor-swift/lamini_taylor_swift_train.csv
/kaggle/input/lamini-taylor-swift/lamini_taylor_swift_test.csv
/kaggle/input/wikipedia-data-on-taylor-swift/taylor_tr.csv
/kaggle/input/wikipedia-data-on-taylor-swift/Taylor_wik_qstns - can you add 20 rows to it.csv


In [5]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m47.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m21.2 M

In [7]:
!pip install datasets --upgrade



In [8]:
!pip install trl



In [9]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
import os
import gc
import pandas as pd

In [10]:
from datasets import Dataset

tr_dt = pd.read_csv('/kaggle/input/wikipedia-data-on-taylor-swift/taylor_tr.csv')
tr_dt.drop(columns=['Unnamed: 0'], inplace=True)
dataset = Dataset.from_pandas(tr_dt)
dataset[0]

{'Context': 'Taylor Alison Swift (born December 13, 1989) is an American singer-songwriter. Her reinventive artistry, songwriting and entrepreneurship have influenced the music industry, popular culture, and politics, while her life is a subject of widespread media coverage. Swift began professional songwriting at 14. She signed with Big Machine Records in 2005 and achieved prominence as a country pop singer with the albums Taylor Swift (2006) and Fearless (2008). The singles "Teardrops on My Guitar", "Love Story", and "You Belong with Me" were crossover successes on country and pop radio formats and brought Swift mainstream fame.',
 'Next Sentence': 'She experimented with rock and electronic styles on her next albums, Speak Now (2010) and Red (2012), respectively, with the latter featuring her first Billboard Hot 100 number-one single, "We Are Never Ever Getting Back Together".'}

In [11]:
model_name = "EleutherAI/pythia-70m"

model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
print(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/567 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/166M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 512)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-5): 6 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (rotary_emb): GPTNeoXRotaryEmbedding()
          (query_key_value): Linear(in_features=512, out_features=1536, bias=True)
          (dense): Linear(in_features=512, out_features=512, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=512, out_features=2048, bias=True)
          (dense_4h_to_h): Linear(in_features=2048, out_features=512, bias=True)
          (a

In [12]:
def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=200):
  # Tokenize
  input_ids = tokenizer.encode(
          text,
          return_tensors="pt",
          truncation=True,
          max_length=max_input_tokens
  )

  # Generate
  device = model.device
  generated_tokens_with_prompt = model.generate(
    input_ids=input_ids.to(device),
    max_length=max_output_tokens
  )

  # Decode
  generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

  # Strip the prompt
  generated_text_answer = generated_text_with_prompt[0][len(text):]

  return generated_text_answer

In [13]:
idx = 0
test_text = dataset[idx]['Context']
print("Question input (test):", test_text)
print(f"Correct answer from Lamini docs: {dataset[idx]['Next Sentence']}")
print("Model's answer: ")
print(inference(test_text, model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question input (test): Taylor Alison Swift (born December 13, 1989) is an American singer-songwriter. Her reinventive artistry, songwriting and entrepreneurship have influenced the music industry, popular culture, and politics, while her life is a subject of widespread media coverage. Swift began professional songwriting at 14. She signed with Big Machine Records in 2005 and achieved prominence as a country pop singer with the albums Taylor Swift (2006) and Fearless (2008). The singles "Teardrops on My Guitar", "Love Story", and "You Belong with Me" were crossover successes on country and pop radio formats and brought Swift mainstream fame.
Correct answer from Lamini docs: She experimented with rock and electronic styles on her next albums, Speak Now (2010) and Red (2012), respectively, with the latter featuring her first Billboard Hot 100 number-one single, "We Are Never Ever Getting Back Together".
Model's answer: 


In the early 1990s, Swift was the first female singer to perform in

In [14]:
tylr_swft_contx[0] = "Taylor Alison Swift, born on December 13, 1989, in West Reading, Pennsylvania, U.S., is a multitalented singer-songwriter and global superstar. Her heartfelt lyrics and catchy melodies have solidified her as one of the most influential artists in contemporary music. Let’s delve into some key aspects of her life and career:  Early Life: Taylor Swift showed an interest in music from an early age. She quickly progressed from roles in children’s theater to performing before crowds of thousands. Career Milestones: Swift began professional songwriting at the age of 14. She signed with Big Machine Records in 2005. Her albums include: Taylor Swift (2006), Fearless (2008), Speak Now (2010), Red (2012), 1989 (2014), Reputation (2017), Midnights (2022) Awards and Honors: Swift has received numerous accolades, including Grammy Awards: Album of the Year (multiple times), Best Pop Vocal Album, Best Music Video, Best Song Written for Visual Media, Best Country Solo Performance, Outstanding Creative Achievement in Interactive Media (Emmy Award) . Influence and Impact: Her reinventive artistry, songwriting, and entrepreneurship have left a lasting mark on the music industry, popular culture, and even politics. Swift’s life remains a subject of widespread media coverage. Historic Grammy Win: In 2024, she made history by winning the Grammy Award for Album of the Year for “Midnights”, becoming the first artist to win in that category four times. Taylor Swift’s journey from a young songwriter to a global phenomenon continues to inspire fans worldwide."


NameError: name 'tylr_swft_contx' is not defined

In [15]:
from peft import LoraConfig, get_peft_model

lora_alpha = 0.75
lora_dropout = 0.1
lora_r = 15  ; target_modules=["q", "v"]

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    target_modules=[ "dense_h_to_4h", "dense_4h_to_h"],
    bias="none",
    task_type="CAUSAL_LM"
)

In [16]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 10
gradient_accumulation_steps = 9
optim = "adamw_hf" # "adafactor"
save_steps = 20
logging_steps = 3
learning_rate = 1e-3
max_grad_norm = 0.3
max_steps = 50
warmup_ratio = 0.03
lr_scheduler_type = "polynomial" # "linear"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,

    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

In [17]:
from trl import SFTTrainer

max_seq_length = 1200
def frmt_fnc(seqn):
  out = []
  for i in range(len(seqn['Context'])):
    text = f"###Predict next sentence {seqn['Context'][i]}\n ###  {seqn['Next Sentence'][i]}"
    out.append(text)
  return out

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,

    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,

    formatting_func= frmt_fnc
)

Map:   0%|          | 0/83 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [18]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [20]:
trainer.train()



Step,Training Loss
3,4.1202
6,4.0984
9,4.0482
12,3.9796
15,3.9099
18,3.8479
21,3.7915
24,3.7382
27,3.6922
30,3.6602


TrainOutput(global_step=50, training_loss=3.76827033996582, metrics={'train_runtime': 77.4077, 'train_samples_per_second': 58.134, 'train_steps_per_second': 0.646, 'total_flos': 251374965964800.0, 'train_loss': 3.76827033996582, 'epoch': 50.0})

Step,Training Loss
3,3.5525
6,3.5079
9,3.4592
12,3.4159
15,3.3758
18,3.3388
21,3.3048
24,3.2734
27,3.2449
30,3.2187


Checkpoint destination directory ./results/checkpoint-20 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-40 already exists and is non-empty. Saving will proceed but saved results may be invalid.


TrainOutput(global_step=50, training_loss=3.2823887157440184, metrics={'train_runtime': 73.3976, 'train_samples_per_second': 61.31, 'train_steps_per_second': 0.681, 'total_flos': 251374965964800.0, 'train_loss': 3.2823887157440184, 'epoch': 50.0})

In [21]:
idx = 12
test_text = dataset[idx]['Context']+"###"
print("Question input (test):", test_text)
print(f"Correct answer from Lamini docs: {dataset[idx]['Next Sentence']}")
print("Model's answer: ")
print(inference(test_text, model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question input (test): Borchetta said that although record industry peers initially disapproved of his signing a 15-year-old singer-songwriter, Swift tapped into a previously unknown market—teenage girls who listen to country music.Following "Tim McGraw", four more singles were released throughout 2007 and 2008: "Teardrops on My Guitar", "Our Song", "Picture to Burn" and "Should've Said No". All appeared on Billboard's Hot Country Songs, with "Our Song" and "Should've Said No" reaching number one. "Our Song" made Swift the youngest person to single-handedly write and sing a Hot Country Songs number-one single, and "Teardrops on My Guitar" was Swift's breakthrough single on mainstream radio and charts. Swift released two EPs, The Taylor Swift Holiday Collection in October 2007 and Beautiful Eyes in July 2008.###
Correct answer from Lamini docs: She promoted her debut album extensively as the opening act for other country musicians' tours in 2006 and 2007, including those by George Strai

In [22]:
tr_dt = pd.read_csv('/kaggle/input/lamini-taylor-swift/lamini_taylor_swift_train.csv')
tr_dt.drop(columns=['Unnamed: 0'], inplace=True)
q_dataset = Dataset.from_pandas(tr_dt)

In [23]:
idx = 0
test_text = q_dataset[idx]['question']
print("Question input (test):", test_text)
print(f"Correct answer from Lamini docs: {q_dataset[idx]['answer']}")
print("Model's answer: ")
print(inference(test_text, model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question input (test): What is the controversy surrounding Taylor Swift's music and how has it impacted her career?
Correct answer from Lamini docs: Taylor Swift has been involved in several controversies throughout her career, including her feud with Kanye West and Kim Kardashian, her lawsuit against a radio DJ who allegedly groped her, and her recent feud with Scooter Braun. These controversies have impacted her career in several ways. First, they have made her a more polarizing figure in the music industry, with some fans supporting her and others criticizing her. Second, they have led to a decrease in her popularity among some listeners, particularly those who do not agree with her political views or her actions in the feuds. Finally, they have led to a decrease of her music being played on some radio stations, which has impacted her ability to reach new audiences
Model's answer: 

Her music is a subject of debate and controversy. She has been a vocal critic of Swift's music since 

In [24]:
from peft import LoraConfig, get_peft_model

lora_alpha = 0.5
lora_dropout = 0.1
lora_r = 12  ; target_modules=["q", "v"]

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    target_modules=["query_key_value","dense", "dense_h_to_4h", "dense_4h_to_h"],
    bias="none",
    task_type="CAUSAL_LM"
)

In [25]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 10
gradient_accumulation_steps = 9
optim = "adamw_hf" # "adafactor"
save_steps = 20
logging_steps = 3
learning_rate = 1e-3
max_grad_norm = 0.3
max_steps = 150
warmup_ratio = 0.03
lr_scheduler_type = "polynomial" # "linear"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,

    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

In [26]:
from trl import SFTTrainer

max_seq_length = 1200
def frmt_fnc(seqn):
  out = []
  for i in range(len(seqn['question'])):
    text = f"### Question: {seqn['question'][i]}\n ### Answer: {seqn['answer'][i]}"
    out.append(text)
  return out

trainer = SFTTrainer(
    model=model,
    train_dataset=q_dataset,
    peft_config=peft_config,

    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,

    formatting_func= frmt_fnc
)

Map:   0%|          | 0/783 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [27]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [28]:
trainer.train()



Step,Training Loss
3,3.5107
6,3.76
9,3.4431
12,3.231
15,3.036
18,2.8469
21,2.8193
24,2.6396
27,2.6127
30,2.5763


Checkpoint destination directory ./results/checkpoint-20 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-40 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Step,Training Loss
3,3.5107
6,3.76
9,3.4431
12,3.231
15,3.036
18,2.8469
21,2.8193
24,2.6396
27,2.6127
30,2.5763


TrainOutput(global_step=150, training_loss=2.4063466930389406, metrics={'train_runtime': 94.1286, 'train_samples_per_second': 143.421, 'train_steps_per_second': 1.594, 'total_flos': 264467453952000.0, 'train_loss': 2.4063466930389406, 'epoch': 17.09})

In [30]:
idx = 39
test_text = q_dataset[idx]['question']
print("Question input (test):", test_text)
print(f"Correct answer from Lamini docs: {q_dataset[idx]['answer']}")
print("Model's answer: ")
print(inference(test_text, model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question input (test): What is the release date of Speak Now (Taylor’s Version)?
Correct answer from Lamini docs: The release date of Speak Now (Taylor’s Version) is July 2023.
Model's answer: 

 ###k

The release date of Speak Now is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Speak Now. It is the official release date of Spe
