In [None]:
!pip install transformers datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import os
os.environ["WANDB_API_KEY"] = "abda9f461371669c2516207660e00058a83e1e09"
os.environ["WANDB_PROJECT"] = "flowchart-t5"


In [None]:
!pip install rouge_score


In [None]:
import torch
torch.cuda.empty_cache()


In [None]:
!pip install evaluate

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
import os, json, torch
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
import evaluate

# 1. JSONL dosyasını oku
all_data_file = '/content/drive/MyDrive/formatted_akış.jsonl'
with open(all_data_file, "r", encoding="utf-8") as f:
    full_data = [json.loads(line) for line in f]

# 2. Train-test ayrımı
train_data, test_data = train_test_split(full_data, test_size=0.1, random_state=42)
dataset = DatasetDict({
    "train": Dataset.from_list(train_data),
    "test": Dataset.from_list(test_data)
})

# 3. Model ve tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# 4. Preprocessing
def preprocess_function(examples):
    model_inputs = tokenizer(examples["input"], max_length=1024, truncation=True, padding="max_length")
    labels = tokenizer(examples["output"], max_length=1024, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# 5. Eğitim ayarları
training_args = TrainingArguments(
    output_dir="./t5_flowchart_output",
    eval_strategy="no",  # Değerlendirme yapılmasın
    num_train_epochs=35,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_steps=100,
    save_total_limit=3,
    fp16=torch.cuda.is_available()
)

# 6. Trainer (metrics hesaplamadan)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 7. Eğitimi başlat
trainer.train()

# 8. Eğitim sonrası elle metrik hesaplama
rouge = evaluate.load("rouge")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# İlk 50 test örneğinde değerlendirme
decoded_preds = []
decoded_labels = []

for i in range(min(50, len(dataset["test"]))):
    input_text = dataset["test"][i]["input"]
    target_text = dataset["test"][i]["output"]

    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=1024, truncation=True).to(device)

    with torch.no_grad():
        outputs = model.generate(input_ids, max_length=1024)

    pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    decoded_preds.append(pred_text)
    decoded_labels.append(target_text)

    del input_ids, outputs
    torch.cuda.empty_cache()



In [None]:
%%capture
!pip install rouge_score
!pip install evaluate
!pip install bert_score
!pip install transformers datasets accelerate

In [None]:
import numpy as np
import evaluate # 'evaluate' kütüphanesi yüklü değilse: pip install evaluate

# Metrikleri bir kez yükleyin
# Eğer bu kütüphaneler yüklü değilse:

rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

In [None]:
def evaluate_all_metrics(decoded_preds, decoded_labels, lang='en'):
    rouge_result = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        use_stemmer=True
    )

    bertscore_result = bertscore.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        lang=lang
    )

    exact_matches = [int(pred.strip() == label.strip()) for pred, label in zip(decoded_preds, decoded_labels)]
    exact_match_score = np.mean(exact_matches)

    print("\n--- DEĞERLENDİRME METRİKLERİ ---")
    print(f"ROUGE-1:    {rouge_result['rouge1']:.4f}")
    print(f"ROUGE-2:    {rouge_result['rouge2']:.4f}")
    print(f"ROUGE-L:    {rouge_result['rougeL']:.4f}")
    print(f"ROUGE-Lsum: {rouge_result['rougeLsum']:.4f}")

    print(f"BERTScore (F1): {np.mean(bertscore_result['f1']):.4f}")
    print(f"Exact Match:    {exact_match_score:.4f}")

    return {
        "rouge1": rouge_result["rouge1"],
        "rouge2": rouge_result["rouge2"],
        "rougeL": rouge_result["rougeL"],
        "rougeLsum": rouge_result["rougeLsum"],
        "bertscore_f1": np.mean(bertscore_result["f1"]),
        "exact_match": exact_match_score
    }
results = evaluate_all_metrics(decoded_preds, decoded_labels, lang='tr')
print("\nDeğerlendirme Sonuçları:", results)


In [None]:
# 9. Tek test örneği yazdır
print("\n--- Örnek Tahmin ---")
print("Giriş:\n", dataset["test"][5]["input"])
print("\nModel Çıktısı:\n", decoded_preds[5])
print("\nGerçek Çıktı:\n", decoded_labels[5])

In [None]:
from transformers import T5Tokenizer
import matplotlib.pyplot as plt

tokenizer = T5Tokenizer.from_pretrained("t5-small")

input_lengths = [len(tokenizer(example["input"]).input_ids) for example in full_data]
output_lengths = [len(tokenizer(example["output"]).input_ids) for example in full_data]

print(f"Input max: {max(input_lengths)}, mean: {sum(input_lengths)//len(input_lengths)}")
print(f"Output max: {max(output_lengths)}, mean: {sum(output_lengths)//len(output_lengths)}")

plt.hist(input_lengths, bins=50, alpha=0.5, label='input')
plt.hist(output_lengths, bins=50, alpha=0.5, label='output')
plt.legend()
plt.title("Token Uzunluk Dağılımı")
plt.show()
