In [None]:
!pip install rotary_embedding_torch

In [None]:
import wandb

wandb.login(key='')


In [None]:
import torch
import torch.nn as nn
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TrainingArguments, Trainer, GPT2Config
from datasets import load_dataset
from rotary_embedding_torch import RotaryEmbedding
from huggingface_hub import HfApi

max_length = 4096

model_name = "gpt2-medium"
config = GPT2Config.from_pretrained(model_name)
config.max_position_embeddings = max_length
config.n_ctx = max_length

model = GPT2LMHeadModel(config)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

rotary_emb = RotaryEmbedding(
    dim=32,
    interpolate_factor=2.0,
)

for layer in model.transformer.h:
    layer.attn.rotary_emb = rotary_emb

for name, param in model.named_parameters():
    if "rotary_emb" not in name and "ln_f" not in name:
        param.requires_grad = False

dataset = load_dataset("Yukang/LongAlpaca-12k", split='train[:10%]')

def tokenize_function(examples):
    tokenized = tokenizer(examples['output'], truncation=True, max_length=max_length, padding="max_length", return_tensors="pt")
    tokenized["labels"] = tokenized["input_ids"].clone()
    return tokenized

tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["input", "output"])

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs, labels=labels)
        return (outputs.loss, outputs) if return_outputs else outputs.loss

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    eval_steps=10,
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=1,
    weight_decay=0.01,
    max_steps=1000,
    logging_dir='./logs',
    logging_steps=10,
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    eval_dataset=tokenized_datasets,
)

trainer.train()

In [None]:
from huggingface_hub import login

login(token="")
repo_name = "archit11/final_gpt2"  

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model_path = "./results"  
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path)

# Set the model to evaluation mode
model.eval()

def generate_text(prompt, max_length=100):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            top_k=50,
            top_p=0.95,
            temperature=0.7
        )
    
    return tokenizer.decode(output[0], skip_special_tokens=True)

prompt = "Once upon a time"
generated_text = generate_text(prompt)
print(f"Generated text:\n{generated_text}")

save_directory = "./saved_model"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")


In [None]:

from huggingface_hub import login

login(token="")
repo_name = "archit11/final_gpt2"  # Replace with your desired model name

# Push the model and tokenizer to the Hub
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)