In [None]:
# Install or update necessary libraries
!pip install --upgrade datasets
!pip install --upgrade accelerate
!pip install --upgrade transformers

from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("openai_humaneval")

# Load the tokenizer and model
model_name = "gpt2-medium"  # Use a smaller version of the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set padding token to eos_token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Define preprocess function
def preprocess_function(examples):
    encodings = tokenizer(examples['prompt'], truncation=True, padding="max_length", return_tensors="pt")
    labels = encodings['input_ids'].clone()
    return {'input_ids': encodings['input_ids'], 'attention_mask': encodings['attention_mask'], 'labels': labels}

# Tokenize and prepare the dataset
tokenized_datasets = dataset['test'].map(preprocess_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,  # Reduce batch size
    per_device_eval_batch_size=2,   # Reduce batch size
    gradient_accumulation_steps=4,  # Accumulate gradients
    fp16=True,  # Enable mixed precision training
    num_train_epochs=3,
    weight_decay=0.01,
)

# Create Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    eval_dataset=tokenized_datasets
)

# Train the model
trainer.train()

# Evaluate and save the model
eval_results = trainer.evaluate()
print("Evaluation Results:", eval_results)

model.save_pretrained("./fine-tuned-gpt2")
tokenizer.save_pretrained("./fine-tuned-gpt2")


Collecting accelerate
  Using cached accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.10.0-

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/6.52k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/83.9k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/164 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Map:   0%|          | 0/164 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
0,No log,0.370619
2,No log,0.25408


Evaluation Results: {'eval_loss': 0.25407981872558594, 'eval_runtime': 15.5869, 'eval_samples_per_second': 10.522, 'eval_steps_per_second': 5.261, 'epoch': 2.926829268292683}


('./fine-tuned-gpt2/tokenizer_config.json',
 './fine-tuned-gpt2/special_tokens_map.json',
 './fine-tuned-gpt2/vocab.json',
 './fine-tuned-gpt2/merges.txt',
 './fine-tuned-gpt2/added_tokens.json',
 './fine-tuned-gpt2/tokenizer.json')

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("./fine-tuned-gpt2")
model = AutoModelForCausalLM.from_pretrained("./fine-tuned-gpt2")

# Generate output with the fine-tuned model
def generate_output(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test the fine-tuned model
prompt = "Once upon a time"
output = generate_output(prompt)
print(output)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time, there was a man who lived in a village called Kannur. He was a very good man, and he was very wise. He was a very good man, and he was very wise. He was a very good


In [None]:
!pip install huggingface_hub
from huggingface_hub import notebook_login

notebook_login()



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

In [None]:
from huggingface_hub import HfApi, HfFolder

# Save your model and tokenizer to the Hugging Face Hub
model_name = "fine-tuned-gpt2"  # Name of your model on Hugging Face

model.push_to_hub(model_name)
tokenizer.push_to_hub(model_name)

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/finegptproject/fine-tuned-gpt2/commit/148e6e27782f54bbc05c68be9496d45e9db2351e', commit_message='Upload tokenizer', commit_description='', oid='148e6e27782f54bbc05c68be9496d45e9db2351e', pr_url=None, pr_revision=None, pr_num=None)