In [None]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
import os

# =============================================
# 1. ENVIRONMENT CONFIGURATION
# =============================================
torch.set_num_threads(os.cpu_count())
print(f"Using CPU with {os.cpu_count()} cores")

# =============================================
# 2. MODEL LOADING WITH IMPROVED CONFIG
# =============================================
print("\nLoading model...")
model_path = "./fine_tuned_distilgpt2"
base_model = "distilgpt2"

# Load components
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.float32,
    device_map={"": "cpu"}
)
model = PeftModel.from_pretrained(model, model_path)
model = model.merge_and_unload()
model.eval()

# =============================================
# 3. OPTIMIZED DATA PROCESSING
# =============================================
def load_val_data(filepath):
    data = []
    with open(filepath, 'r') as f:
        for line in f:
            entry = json.loads(line)
            # Use the exact prompt structure from training
            input_text = (
                "EEG Features:\n"
                f"{entry['messages'][1]['content']}\n"
                "Label: "
            )
            data.append({
                "input_text": input_text,
                "true_label": int(entry['messages'][2]['content']),
                "raw_eeg": entry['messages'][1]['content'][:100] + "..."
            })
    return data

val_data = load_val_data("jsonl/val.jsonl")
print(f"\nLoaded {len(val_data)} validation examples")

# =============================================
# 4. IMPROVED PREDICTION HANDLING
# =============================================
def predict(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        max_length=256,  # Increased from 128
        truncation=True,
        padding="max_length"
    )

    with torch.no_grad():
        # In predict() function, modify generation parameters:
        outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=3,
    pad_token_id=tokenizer.eos_token_id,
    early_stopping=True,
    num_beams=3,  # Add beam search
    temperature=0.9  # Add temperature
)

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract first valid digit in 1-5 range
    for char in decoded.split("Label:")[-1].strip():
        if char.isdigit() and 1 <= int(char) <= 5:
            return int(char), decoded
    return None, decoded  # Explicit return for errors

# =============================================
# 5. SIMPLIFIED EVALUATION LOOP
# =============================================
results = []
print("\nRunning evaluation...")

for example in tqdm(val_data):
    true_label = example["true_label"]
    pred_label, full_output = predict(example["input_text"])

    results.append({
        "true_label": true_label,
        "predicted_label": pred_label,
        "correct": pred_label == true_label if pred_label else False,
        "eeg_preview": example["raw_eeg"],
        "full_output": full_output
    })

# =============================================
# 6. RESULTS ANALYSIS
# =============================================
# Filter valid predictions
valid_results = [r for r in results if r["predicted_label"] is not None]
accuracy = sum(r["correct"] for r in valid_results) / len(valid_results) if valid_results else 0

print(f"\nValidation Accuracy (valid predictions): {accuracy:.2%}")
print(f"Invalid predictions: {len(results) - len(valid_results)}/{len(results)}")

# Classification report for valid predictions
if valid_results:
    y_true = [r["true_label"] for r in valid_results]
    y_pred = [r["predicted_label"] for r in valid_results]

    print("\nClassification Report (valid predictions):")
    print(classification_report(y_true, y_pred, labels=[1,2,3,4,5]))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred, labels=[1,2,3,4,5]))
# =============================================
# MODIFIED EVALUATION SECTION
# =============================================
def evaluate(results):
    y_true = [x['true_label'] for x in results]
    y_pred = [x['predicted_label'] if x['predicted_label'] is not None else -1 for x in results]

    print("\nClassification Report:")
    print(classification_report(
        y_true, y_pred,
        labels=[1,2,3,4,5],
        target_names=["Class 1", "Class 2", "Class 3", "Class 4", "Class 5"],
        zero_division=0  # Add this parameter
    ))

    print("\nConfusion Matrix:")
    print(confusion_matrix(
        y_true, y_pred,
        labels=[1,2,3,4,5]
    ))

    # Add class presence diagnostics
    present_classes = set(y_pred)
    missing_classes = set([1,2,3,4,5]) - present_classes
    if missing_classes:
        print(f"\nWarning: No predictions for classes {missing_classes}")
# Save results
with open("eeg_validation_results.json", "w") as f:
    json.dump(results, f, indent=2)
print("\nResults saved to eeg_validation_results.json")

In [None]:
# %% [markdown]
# ## Fine-Tuned DistilGPT-2 EEG Classification Test

# %% [markdown]
# ### Setup Environment


# %% [markdown]
# ### Import Dependencies

# %%
import json
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm

# %% [markdown]
# ### Configuration

# %%
MODEL_PATH = "./fine_tuned_distilgpt2"
VAL_DATA = [
    # Paste your validation JSONL entries here or load from file
    # Example entry:
    # {"messages": [{"role": "system", "content": "..."}, ...]}
]

# Use CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# %% [markdown]
# ### Load Model & Tokenizer

# %%
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH).to(device)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# %% [markdown]
# ### Data Preparation

# %%
def parse_messages(entry):
    """Extract system, user, and assistant messages from JSON entry"""
    roles = {'system': '', 'user': '', 'assistant': ''}
    for msg in entry['messages']:
        if msg['role'] in roles:
            roles[msg['role']] = msg['content']
    return roles

# Prepare validation data
validation_set = [parse_messages(entry) for entry in VAL_DATA]

# %% [markdown]
# ### Prediction Function

# %%
def generate_prediction(system_prompt, user_input):
    """Generate model prediction for EEG features"""
    prompt = f"{system_prompt}\n\n{user_input}\n\nAssistant: "
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=3,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True
    )

    # Extract generated text
    input_length = inputs.input_ids.shape[1]
    generated_tokens = outputs[:, input_length:]
    return tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

# %% [markdown]
# ### Evaluation

# %%
def evaluate_model(validation_data):
    """Run model evaluation on validation set"""
    correct = 0
    total = 0
    results = []

    for item in tqdm(validation_data, desc="Processing"):
        true_label = int(item['assistant'])
        generated_text = generate_prediction(item['system'], item['user'])

        # Extract predicted number using regex
        match = re.search(r'\d+', generated_text)
        predicted = int(match.group()) if match else None

        # Validate prediction
        is_correct = False
        if predicted and 1 <= predicted <= 5:
            is_correct = (predicted == true_label)
            correct += int(is_correct)
            total += 1
        else:
            print(f"Invalid prediction: {generated_text}")

        results.append({
            "input": item['user'],
            "true_label": true_label,
            "predicted": predicted,
            "correct": is_correct
        })

    accuracy = correct / total if total > 0 else 0
    return accuracy, results

# %% [markdown]
# ### Run Evaluation

# %%
# Run evaluation
accuracy, predictions = evaluate_model(validation_set)

# %% [markdown]
# ### Display Results

# %%
print(f"\nValidation Accuracy: {accuracy:.2%}")
print("\nSample Predictions:")
for i, pred in enumerate(predictions[:5]):
    print(f"Example {i+1}:")
    print(f"True: {pred['true_label']} | Predicted: {pred['predicted']}")
    print(f"Correct: {'✓' if pred['correct'] else '✗'}\n")

In [None]:
import os
import json
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix

# 1. System Configuration
os.environ["TOKENIZERS_PARALLELISM"] = "false"
torch.set_num_threads(os.cpu_count())

# 2. Model Loading
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float32,
    device_map={"": "cpu"}
)

# 3. Corrected PEFT Configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_lin", "k_lin", "v_lin", "out_lin"],  # DistilGPT-2 specific
    lora_dropout=0.1,
    bias="lora_only",
    task_type="CAUSAL_LM",
    modules_to_save=["lm_head"]
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# 4. Data Processing
def preprocess_data(example):
    return {
        "text": f"Analyze EEG features and predict movement class (1-5):\n{example['messages'][1]['content']}\nAnswer:",
        "label": int(example['messages'][2]['content'])
    }

dataset = load_dataset("json", data_files="jsonl/train.jsonl", split="train")
dataset = dataset.map(preprocess_data, remove_columns=["messages"])

# 5. Tokenization
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=256,
        return_tensors="np"
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# 6. Training Configuration
training_args = TrainingArguments(
    output_dir="../eeg_results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    num_train_epochs=10,
    learning_rate=3e-5,
    warmup_ratio=0.1,
    weight_decay=0.05,
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="steps",
    eval_steps=100,
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# 7. Training
trainer.train()
model.save_pretrained("./fine_tuned_distilgpt2")

# 8. Evaluation
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
    class_tokens = tokenizer("1 2 3 4 5", add_special_tokens=False).input_ids[0]

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1,
            pad_token_id=tokenizer.eos_token_id,
            force_words_ids=[class_tokens],
            bad_words_ids=[[tokenizer.eos_token_id]]
        )
    return int(tokenizer.decode(outputs[0][-1]))

val_data = load_dataset("json", data_files="jsonl/val.jsonl", split="train")
val_data = val_data.map(preprocess_data, remove_columns=["messages"])

results = []
for example in tqdm(val_data):
    pred = predict(example["text"])
    results.append({
        "true": example["label"],
        "pred": pred,
        "correct": pred == example["label"]
    })

accuracy = sum(r["correct"] for r in results) / len(results)
print(f"Validation Accuracy: {accuracy:.2%}")
print(classification_report([r["true"] for r in results], [r["pred"] for r in results]))

In [2]:
import json
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from torch.utils.data import DataLoader
import os

# ── 1. ENVIRONMENT ─────────────────────────────────────────────────────────────
torch.manual_seed(42)
torch.set_num_threads(os.cpu_count())

# ── 2. LOAD MODEL & TOKENIZER ───────────────────────────────────────────────────
BASE = "distilgpt2"
FINETUNED = "./fine_tuned_distilgpt2"

tokenizer = AutoTokenizer.from_pretrained(BASE)
tokenizer.pad_token = tokenizer.eos_token

# load base then LoRA adapters
model = AutoModelForCausalLM.from_pretrained(BASE, torch_dtype=torch.float32, device_map={"": "cpu"})
model = PeftModel.from_pretrained(model, FINETUNED)
model = model.merge_and_unload()
model.eval()

# ── 3. LOAD VALIDATION DATA ─────────────────────────────────────────────────────
def load_val(filepath):
    with open(filepath) as f:
        lines = [json.loads(l) for l in f]
    examples = []
    for e in lines:
        text = e["messages"][1]["content"]
        prompt = f"EEG Data:\n{text}\nLabel:"
        examples.append({
            "prompt": prompt,
            "true": int(e["messages"][2]["content"])
        })
    return examples

val = load_val("jsonl/val.jsonl")
print(f"{len(val)} examples")

# ── 4. BATCHED PREDICTION ───────────────────────────────────────────────────────
def batch_predict(prompts, batch_size=8):
    preds = []
    for i in range(0, len(prompts), batch_size):
        batch = prompts[i:i+batch_size]
        toks = tokenizer(batch,
                         return_tensors="pt",
                         padding="max_length",
                         truncation=True,
                         max_length=192)
        with torch.no_grad():
            outs = model.generate(
                **toks,
                max_new_tokens=3,
                pad_token_id=tokenizer.eos_token_id,
                early_stopping=True,
                num_beams=3,
                temperature=0.9
            )
        dec = tokenizer.batch_decode(outs, skip_special_tokens=True)
        # extract digits 1–5 with regex
        for d in dec:
            m = re.search(r"Label:\s*([1-5])\b", d)
            preds.append(int(m.group(1)) if m else None)
    return preds

prompts = [ex["prompt"] for ex in val]
y_true  = [ex["true"]   for ex in val]
y_pred  = batch_predict(prompts, batch_size=4)

# ── 5. FILTER & METRICS ─────────────────────────────────────────────────────────
valid_idxs = [i for i,p in enumerate(y_pred) if p is not None]
y_true_valid = [y_true[i] for i in valid_idxs]
y_pred_valid = [y_pred[i] for i in valid_idxs]

print("Overall accuracy (valid preds):", accuracy_score(y_true_valid, y_pred_valid))
print("\nClassification report:")
print(classification_report(y_true_valid, y_pred_valid, labels=[1,2,3,4,5], zero_division=0))
print("\nConfusion matrix:")
print(confusion_matrix(y_true_valid, y_pred_valid, labels=[1,2,3,4,5]))

if len(valid_idxs) < len(y_true):
    print(f"\nWarnings: {len(y_true)-len(valid_idxs)} examples produced no valid label.")

# ── 6. SAVE RESULTS ─────────────────────────────────────────────────────────────
out = [{"prompt": val[i]["prompt"],
        "true": val[i]["true"],
        "pred": y_pred[i]}
       for i in range(len(val))]
with open("eeg_validation_results.json","w") as f:
    json.dump(out, f, indent=2)


FileNotFoundError: [Errno 2] No such file or directory: 'jsonl/val.jsonl'