In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install transformers
!pip install datasets

In [None]:
import os
import pandas as pd
import json
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import Dataset, DatasetDict

#############################################
# 1. Excel'den JSON'a Dönüştürme Fonksiyonu
#############################################
def excel_to_json(excel_file, json_file):
    df = pd.read_excel(excel_file)
    grouped_data = []

    for akis_id, group in df.groupby("Akış ID"):
        adımlar = []
        for _, row in group.iterrows():
            step_str = f'{row["Adım No"]}: {row["Adım Açıklaması"]}'
            adımlar.append(step_str)

        input_text = "\n".join(adımlar)

        output_steps = []
        for _, row in group.iterrows():
            step_str = f'{row["Adım No"]}: {row["Adım Türü"]}: {row["Adım Açıklaması"]}'
            if str(row["Adım Türü"]).strip().lower() == "koşul":
                step_str += f' Yes={row["Evet Durumu (Sonraki Adım)"]}, No={row["Hayır Durumu (Sonraki Adım)"]}'
            else:
                step_str += f' Next={row["Evet Durumu (Sonraki Adım)"]}'
            output_steps.append(step_str)

        output_text = "\n".join(output_steps)
        grouped_data.append({"input": input_text, "output": output_text})

    with open(json_file, "w", encoding="utf-8") as f:
        json.dump(grouped_data, f, ensure_ascii=False, indent=4)

    print(f"Eğitim verileri '{json_file}' dosyasına kaydedildi.")

# Dosya yollarını belirle
train_excel = r'/content/drive/My Drive/akış-diyagramı-örnekleri-verileri.xlsx'
test_excel  = r'/content/drive/My Drive/akışlar-test-verileri.xlsx'


train_json = "training_pairs.json"
test_json  = "test_pairs.json"

# Excel verilerini JSON'a dönüştür
excel_to_json(train_excel, train_json)
excel_to_json(test_excel, test_json)

#############################################
# 2. JSON'dan Dataset Yükleme
#############################################
def load_json_dataset(train_file, test_file):
    with open(train_file, 'r', encoding='utf-8') as f:
        train_examples = json.load(f)
    with open(test_file, 'r', encoding='utf-8') as f:
        test_examples = json.load(f)
    train_dataset = Dataset.from_list(train_examples)
    test_dataset = Dataset.from_list(test_examples)
    return DatasetDict({"train": train_dataset, "test": test_dataset})

dataset = load_json_dataset(train_json, test_json)

#############################################
# 3. Model ve Tokenizer Yükleme
#############################################
custom_cache_dir = "./huggingface_cache"
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name, cache_dir=custom_cache_dir)
model = T5ForConditionalGeneration.from_pretrained(model_name, cache_dir=custom_cache_dir)

#############################################
# 4. Tokenizasyon & Preprocessing
#############################################
def preprocess_function(examples):
    inputs = examples["input"]
    targets = examples["output"]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

#############################################
# 5. Eğitim Ayarları
#############################################
training_args = TrainingArguments(
    output_dir="./t5_flowchart_output",
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    num_train_epochs=8,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_steps=59,
    save_total_limit=3,
    fp16=torch.cuda.is_available()
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    return {"dummy_metric": 0.0}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

#############################################
# 6. Eğitimi Başlatma ve Test Örneği Üzerinde Çıktı Alma
#############################################
if __name__ == "__main__":
    trainer.train()

    test_example = dataset["test"][0]["input"]
    input_ids = tokenizer.encode(test_example, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(input_ids, max_length=512)
    predicted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print("📝 Girdi:", test_example)
    print("🎯 Çıktı:")
    print(predicted_text)

    #############################################
    # 7. Model Çıktısını Excel Dosyasına Kaydetme
    #############################################

    # Çıktıyı Excel formatına uygun hale getir
    lines = predicted_text.split("\n")
    data = []

    for line in lines:
        parts = line.split(": ")
        if len(parts) >= 3:
            step_no = parts[0]
            step_type = parts[1]
            step_description = ": ".join(parts[2:])  # Açıklama bazen ":" içerebilir

            # Koşullu adımlar için ayrıştırma
            next_step = ""
            yes_next = ""
            no_next = ""

            if "Yes=" in step_description or "No=" in step_description:
                if "Yes=" in step_description:
                    step_description, yes_next = step_description.split(" Yes=")
                if "No=" in yes_next:
                    yes_next, no_next = yes_next.split(" No=")
            else:
                if "Next=" in step_description:
                    step_description, next_step = step_description.split(" Next=")

            data.append([step_no, step_type, step_description, yes_next, no_next, next_step])

    # DataFrame oluştur
    df = pd.DataFrame(data, columns=["Adım No", "Adım Türü", "Adım Açıklaması", "Evet Durumu (Sonraki Adım)", "Hayır Durumu (Sonraki Adım)", "Sonraki Adım"])

    # Excel dosyasına kaydet
    excel_output_path = "flowchart_output.xlsx"
    df.to_excel(excel_output_path, index=False)

    print(f"✅ Model çıktısı '{excel_output_path}' dosyasına başarıyla kaydedildi!")

#abda9f461371669c2516207660e00058a83e1e09