In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
from datasets import load_dataset

# Veriyi yükleyin ve ön işleyin
dataset = load_dataset('text', data_files={'train': 'data_utils.py/train_data.txt', 'validation': 'data_utils.py/eval_data.txt'})

# Model ve tokenizer'ı yükleyin
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Veriyi tokenleştirin
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding=True, max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Eğitim argümanlarını ayarlayın
training_args = TrainingArguments(
    output_dir='./models/my_model',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

# Trainer'ı oluşturun ve eğitin
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

trainer.train()
