In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Change directory
import os
os.chdir('/content/drive/My Drive/EnhancedReasoning_gptoss120B')

print(f"Current directory: {os.getcwd()}")


In [None]:
# Model Loading
from unsloth import FastLanguageModel

MODEL_NAME = "unsloth/gpt-oss-20b"
MAX_SEQ_LEN = 4096

print(f"Loading model: {MODEL_NAME}")
print(f"Max sequence length: {MAX_SEQ_LEN}")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    dtype=None,  # Auto detection
    max_seq_length=MAX_SEQ_LEN,
    load_in_4bit=True,
    full_finetuning=False,
)


In [None]:
# Prepare LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                   "gate_proj", "up_proj", "down_proj",
                   "7.mlp.experts.gate_up_proj", "7.mlp.experts.down_proj",
                   "15.mlp.experts.gate_up_proj", "15.mlp.experts.down_proj",
                   "23.mlp.experts.gate_up_proj", "23.mlp.experts.down_proj"],
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

# Set up tokenizer
print("Configuring tokenizer...")
tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token


In [None]:
# Step 2: Process the enhanced CSV with GPToss120B reasoning

import pandas as pd
from datasets import Dataset

# Load the enhanced CSV with GPToss120B reasoning
print("Loading enhanced CSV with GPToss120B reasoning...")
enhanced_df = pd.read_csv('Enhanced_Training_Data_with_GPToss120B_Reasoning.csv')

print(f"Loaded {len(enhanced_df)} cases")
print(f"Cases with reasoning: {enhanced_df['reasoning_matched'].sum()}")

# Filter to only cases with reasoning (matched ones)
matched_df = enhanced_df[enhanced_df['reasoning_matched'] == True].copy()
print(f"Processing {len(matched_df)} cases with GPToss120B reasoning")

def enhanced_formatting_prompts_func(examples):
    """Format with GPToss120B reasoning as thinking content"""
    convos = []

    for i in range(len(examples["PostDescription"])):
        combined_description = examples["PostDescription"][i]
        differential_diagnosis = examples["DifferentialDiagnosisList"][i]
        diagnosis = examples["FinalDiagnosis"][i]
        reasoning = examples["gptoss120b_reasoning"][i]

        # Process differential diagnosis list
        dd_list = [dd.strip() for dd in str(differential_diagnosis).split(',')]
        dd_formatted = "\n".join(dd_list)

        # Create user prompt
        user_prompt = f"""Your task is to make diagnosis based on the following information about a patient.

{combined_description}

Here is a differential diagnosis list:
{dd_formatted}

Please give the best diagnosis:"""

        # Create conversation with reasoning as thinking
        conversation = [
            {"role": "user", "content": user_prompt},
            {"role": "assistant", "content": diagnosis, "thinking": reasoning}
        ]

        convos.append(conversation)

    return {"messages": convos}

# Convert to Dataset and apply formatting
dataset = Dataset.from_pandas(matched_df)

dataset = dataset.map(enhanced_formatting_prompts_func, batched=True)

print(f"\n" + "="*50)
print("PRELIMINARY Context Length Analysis (before chat template):")

# Estimate raw text lengths
sample_size = min(100, len(dataset))
raw_lengths = []

for i in range(sample_size):
    messages = dataset[i]['messages']  # Get the full conversation

    total_content = ""

    for message in messages:
        # Add user content
        if message.get('content'):
            total_content += str(message['content'])

        # Add thinking content if it exists
        if message.get('thinking'):
            total_content += str(message['thinking'])

    raw_lengths.append(len(total_content))

print(f"Raw text length statistics (chars):")
print(f"- Min: {min(raw_lengths):,} chars")
print(f"- Max: {max(raw_lengths):,} chars")
print(f"- Average: {sum(raw_lengths)/len(raw_lengths):,.0f} chars")
print(f"- 95th percentile: {sorted(raw_lengths)[int(0.95*len(raw_lengths))]:,} chars")




In [None]:
# Step 3: Apply chat template
from datasets import load_from_disk

# Load the processed dataset
print("Loading processed dataset...")
dataset = load_from_disk('processed_dataset_with_gptoss120b_thinking')
print(f"Loaded {len(dataset)} examples")

def final_formatting_func(examples):
    """Apply chat template with medium reasoning"""
    convos = examples["messages"]
    texts = []
    for convo in convos:
        text = tokenizer.apply_chat_template(
            convo,
            tokenize=False,
            add_generation_token=False,
            reasoning_effort="medium"
        )
        texts.append(text)
    return {"text": texts}

# Apply final formatting
print("Applying chat template...")
dataset = dataset.map(final_formatting_func, batched=True)

In [None]:
# Show a formatted example
print("Sample formatted example:")
print("="*50)
print(dataset[0]['text'])

Sample formatted example:
<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
Knowledge cutoff: 2024-06
Current date: 2025-09-25

Reasoning: medium

# Valid channels: analysis, commentary, final. Channel must be included for every message.
Calls to these tools must go to the commentary channel: 'functions'.<|end|><|start|>user<|message|>Your task is to make diagnosis based on the following information about a patient.

Clinical History: This patient presented subacutely with left thigh pain for two months, and had an outpatient plain pelvic radiograph. She later presented acutely to the hospital after a short interval of having "heard a loud crack" in the left leg, then fallen, with no preceding trauma. At this point repeat radiography was performed.
 
 Imaging Findings: The outpatient radiograph was initially assessed as normal. Although indeed not demonstrating any femoral or pelvic fracture, it did in fact show focal cortical thickening of the left l

In [None]:
# Thinking Style Training - GPToss120B Reasoning

import wandb
from trl import SFTConfig, SFTTrainer

# W&B login
wandb.login(key="")

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=SFTConfig(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        num_train_epochs=2,
        learning_rate=1e-4,
        max_seq_length=4096,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        warmup_ratio=0.1,
        logging_steps=25,
        report_to="wandb",
        run_name="eurorad_thinking_gptoss120b_reasoning",
        output_dir="eurorad_thinking_gptoss120b_reasoning",
        save_strategy="steps",
        save_steps=100,
        save_total_limit=10,
        gradient_checkpointing=True,
        dataloader_num_workers=0,
        remove_unused_columns=False,
        seed=3407,
        data_seed=3407,
        bf16=True,
        fp16=False,
    ),
)


In [None]:
# Start training
print("Starting thinking style training...")
trainer.train()

# Save LoRA adapter locally (thinking approach)
try:
    model.save_pretrained("eurorad_medical_final_thinking_gptoss120b")
    tokenizer.save_pretrained("eurorad_medical_final_thinking_gptoss120b")
    print("LoRA adapter thinking saved locally to ./eurorad_medical_final_thinking_gptoss120b/")
except Exception as e:
    print(f"Local LoRA save failed: {e}")

# Create merged model (thinking approach)
try:
    from unsloth import FastLanguageModel
    merged_model = model.merge_and_unload()
    merged_model.save_pretrained("eurorad_medical_merged_thinking_gptoss120b")
    tokenizer.save_pretrained("eurorad_medical_merged_thinking_gptoss120b")
    print("Merged model thinking saved locally to ./eurorad_medical_merged_thinking_gptoss120b/")
except Exception as e:
    print(f"Merged model save failed: {e}")

print("Training and saving completed!")