In [None]:
# CSV yükleyici
from google.colab import files
uploaded = files.upload()

Saving clinical_notes.csv to clinical_notes.csv


Finetuned tiiuae/falcon-rw-1b

In [None]:
!pip install -q transformers datasets peft accelerate bitsandbytes trl
!pip install -q -U bitsandbytes

from datasets import load_dataset
dataset = load_dataset("json", data_files="/content/finetune_data.json", split="train")
dataset = dataset.select(range(100))  # az örnekle test ediyoruz

# Model ve Tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM
import gc, torch

# GPU temizle
gc.collect()
torch.cuda.empty_cache()

model_id = "tiiuae/falcon-rw-1b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = 512  # token kırpmayı garantiye al

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

# LoRA Ayarı
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

# TrainingArguments
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    output_dir="./falcon-lora-finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=1,
    logging_steps=5,
    save_strategy="epoch",
    learning_rate=2e-4,
    fp16=True,
    optim="paged_adamw_8bit",
    report_to="none"
)

# Prompt Formatter
def format(example):
    return f"{example['instruction']}\n{example['input']}\n{example['output']}"

# Trainer Başlat
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    formatting_func=format
)

trainer.train()

# Model Kaydet
save_path = "./falcon-lora-finetuned-final"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"✅ Model başarıyla kaydedildi: {save_path}")


Generating train split: 0 examples [00:00, ? examples/s]

Applying formatting function to train dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,3.8513
10,3.3737
15,2.8808
20,2.5717
25,2.2944
30,2.0547
35,1.6997
40,1.5746
45,1.7613
50,1.5841


✅ Model başarıyla kaydedildi: ./falcon-lora-finetuned-final


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

import gradio as gr

import textwrap
from sentence_transformers import SentenceTransformer, util
import pandas as pd

# 1. Klinik notlarını yükle
df = pd.read_csv("clinical_notes.csv")
df['not'] = df.apply(lambda row: f"{row['hasta_adi']}, Etiket: {row['etiket']}. {row['not']}", axis=1)


# 2. Embedding modeli
embedder = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
note_texts = df['not'].tolist()
note_embeddings = embedder.encode(note_texts, convert_to_tensor=True)

# Base modeli yükle
base_model_id = "tiiuae/falcon-rw-1b"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

# LoRA ağırlıklarıyla birleştir
finetuned_path = "./falcon-lora-finetuned-final"
model = PeftModel.from_pretrained(base_model, finetuned_path)

# 🧪 Tokenizer'ı da fine-tuned versiyonla eşle
tokenizer = AutoTokenizer.from_pretrained(finetuned_path)
tokenizer.pad_token = tokenizer.eos_token


# 4. Cevaplama fonksiyonu
def answer_question(question):
    # Step 1: Embed the user question
    q_embed = embedder.encode(question, convert_to_tensor=True)

    # Step 2: Semantic search over notes
    hits = util.cos_sim(q_embed, note_embeddings)[0].topk(3)
    relevant_notes = [note_texts[idx] for idx in hits[1]]

    # Step 3: Check similarity score — are they actually related?
    top_score = hits[0][0].item()
    similarity_threshold = 0.4  # Tune if needed

    if top_score < similarity_threshold:
        # No relevant notes — use LLM as a general chatbot
        print("⚠️ No relevant medical notes found — switching to open chat mode.")
        prompt = f"<s>[INST] {question}\nCevap ver. [/INST]"

    else:
        # RAG: medical mode
        context = "\n".join([f"- {note}" for note in relevant_notes])
        prompt = f"<s>[INST] {question}\n{context} [/INST]"

    # Step 4: Generate model response
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=300,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    # Step 5: Clean output

    # Yanıtı prompt'tan ayır
    if "[/INST]" in decoded:
        response = decoded.split("[/INST]")[-1].strip()
    else:
        response = decoded.strip()

    return response


# 5. Gradio chat arayüzü
import traceback
def safe_answer_question(message, history):
    try:
        return answer_question(message)
    except Exception as e:
        traceback.print_exc()  # terminalde göster
        return  f"❌ Hata: {str(e)}"

gr.ChatInterface(
    fn=lambda message, history: answer_question(message),
    title="Klinik Notlara Dayalı AI Yanıtlayıcı",
    description="Yüklenen klinik notlara göre LLM destekli cevap üretir."
).launch()


  self.chatbot = Chatbot(


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2db3b08cf695e70aa8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


