In [13]:
# nsclc_classification_optimized.ipynb

import os
import re
import json
import textwrap
from concurrent.futures import ThreadPoolExecutor
from llama_cpp import Llama
import pandas as pd
from tqdm import tqdm

In [2]:
# -----------------------------
# Settings
# -----------------------------
MODEL_PATH = "../models/mistral-gguf/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
DATA_PATH = "../data/dummy_nsclc_reports.xlsx"
OUTPUT_PATH = "../outputs/nsclc_llm_results_optimized.csv"
N_THREADS = 2
N_CTX = 2048
CHUNK_CHAR_LIMIT = 1800

In [3]:
# -----------------------------
# Load model once
# -----------------------------
print("🔁 Loading model...")
llm = Llama(model_path=MODEL_PATH, n_ctx=N_CTX, n_threads=N_THREADS)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ../models/mistral-gguf/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:    

llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama
llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = ["<unk>", "<s>", "</s>", "<0x00>", "<...
llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1
llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2
llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0
llama_model_loader: - kv  19:               general.quantization_version u32              = 2
llama_model_loader: - type  f32:   65 tensors
llama_model_loader: - type q4_K:  193 tensors
llama_model_loader: - type q6_K:   

🔁 Loading model...


llm_load_tensors:        CPU buffer size =  4165.37 MiB
.................................................................................................
llama_new_context_with_model: n_ctx      = 2048
llama_new_context_with_model: n_batch    = 512
llama_new_context_with_model: n_ubatch   = 512
llama_new_context_with_model: freq_base  = 10000.0
llama_new_context_with_model: freq_scale = 1
llama_kv_cache_init:        CPU KV buffer size =   256.00 MiB
llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB
llama_new_context_with_model:        CPU  output buffer size =    62.50 MiB
llama_new_context_with_model:        CPU compute buffer size =   164.00 MiB
llama_new_context_with_model: graph nodes  = 1060
llama_new_context_with_model: graph splits = 1
AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | 

In [4]:
# -----------------------------
# Load input data
# -----------------------------
print("📄 Loading data...")
df = pd.read_excel(DATA_PATH)
print(f"✅ Loaded {len(df)} patient reports.")

📄 Loading data...
✅ Loaded 3 patient reports.


In [None]:
# -----------------------------
# Helper: Run classification
# -----------------------------
def classify_nsclc(pid, report):
    chunks = textwrap.wrap(report, width=CHUNK_CHAR_LIMIT)
    all_responses = []

    for chunk in chunks:
        prompt = f"""
You are a clinical language model. A patient medical report is provided below. Based on the full clinical context, classify the likelihood that the patient has non-small cell lung cancer (NSCLC) as one of:

- Definite
- Likely
- Unlikely
- Uncertain

Also provide a one sentence justification with no commas.

Patient report:
{chunk}

Respond ONLY in this strict JSON format:
{{"PatientID": "{pid}", "NSCLC_Status": "", "Justification": ""}}
"""

        try:
            # Generate model response
            response = llm(prompt, temperature=0, max_tokens=200, stop=["}"])
            raw_output = response["choices"][0]["text"].strip()

            # If the closing brace is missing, append it
            if not raw_output.endswith("}"):
                raw_output += "}"

            # Try to extract valid JSON block
            matches = re.findall(r"\{.*?\}", raw_output, re.DOTALL)
            for match in matches:
                try:
                    parsed = json.loads(match)
                    status = parsed.get("NSCLC_Status", "ParseError")
                    reason = parsed.get("Justification", f"Missing justification | RAW: {match}")
                    break  # exit loop on first valid JSON
                except json.JSONDecodeError:
                    continue
            else:
                status = "ParseError"
                reason = f"Failed to parse any JSON block | RAW: {raw_output}"

        except Exception as e:
            status = "ParseError"
            reason = str(e)

        all_responses.append((status, reason))

    # Combine results from multiple chunks
    if len(all_responses) == 1:
        return pid, all_responses[0][0], all_responses[0][1]
    else:
        ranking = {"Definite": 3, "Likely": 2, "Uncertain": 1, "Unlikely": 0}
        best = max(all_responses, key=lambda x: ranking.get(x[0], -1))
        combined_justification = " ".join([r[1] for r in all_responses])
        return pid, best[0], combined_justification

In [34]:
# -----------------------------
# Process data in parallel
# -----------------------------
print("⚙️ Classifying using threads...")
results = []
with ThreadPoolExecutor(max_workers=1) as executor:
    futures = []
    for _, row in df.iterrows():
        futures.append(executor.submit(classify_nsclc, row["PatientID_Masked"], row["Full_Text_Report"]))
    
    for f in tqdm(futures):
        results.append(f.result())

⚙️ Classifying using threads...


  0%|          | 0/3 [00:00<?, ?it/s]Llama.generate: prefix-match hit

llama_print_timings:        load time =   60885.66 ms
llama_print_timings:      sample time =      29.04 ms /    53 runs   (    0.55 ms per token,  1825.13 tokens per second)
llama_print_timings: prompt eval time =   20539.63 ms /    88 tokens (  233.40 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   32252.72 ms /    52 runs   (  620.24 ms per token,     1.61 tokens per second)
llama_print_timings:       total time =   53908.35 ms /   140 tokens
 33%|███▎      | 1/3 [00:54<01:48, 54.00s/it]Llama.generate: prefix-match hit



----- RAW OUTPUT for P001 -----
{"PatientID": "P001", "NSCLC_Status": "Definite", "Justification": "The biopsy confirms adenocarcinoma, which is a type of NSCLC."




llama_print_timings:        load time =   60885.66 ms
llama_print_timings:      sample time =      46.64 ms /    86 runs   (    0.54 ms per token,  1843.99 tokens per second)
llama_print_timings: prompt eval time =   21150.37 ms /    81 tokens (  261.12 ms per token,     3.83 tokens per second)
llama_print_timings:        eval time =   49145.22 ms /    85 runs   (  578.18 ms per token,     1.73 tokens per second)
llama_print_timings:       total time =   71561.85 ms /   166 tokens
 67%|██████▋   | 2/3 [02:05<01:04, 64.39s/it]Llama.generate: prefix-match hit



----- RAW OUTPUT for P002 -----
{"PatientID": "P002", "NSCLC_Status": "Uncertain", "Justification": "The chest X-ray shows patchy infiltrates and symptoms of cough and mild weight loss, but no mass or nodule was detected on follow-up imaging. This suggests a possible diagnosis of NSCLC, but further testing and evaluation are needed to confirm."




llama_print_timings:        load time =   60885.66 ms
llama_print_timings:      sample time =      39.44 ms /    62 runs   (    0.64 ms per token,  1572.01 tokens per second)
llama_print_timings: prompt eval time =   21446.68 ms /    75 tokens (  285.96 ms per token,     3.50 tokens per second)
llama_print_timings:        eval time =   52073.65 ms /    61 runs   (  853.67 ms per token,     1.17 tokens per second)
llama_print_timings:       total time =   75779.44 ms /   136 tokens
100%|██████████| 3/3 [03:21<00:00, 67.19s/it]


----- RAW OUTPUT for P003 -----
{"PatientID": "P003", "NSCLC_Status": "Likely", "Justification": "The patient has a follow-up PET-CT scan indicating metabolic activity in the left lower lobe, which is consistent with NSCLC."






In [35]:
# -----------------------------
# Save results
# -----------------------------
print("💾 Saving output...")
df_out = pd.DataFrame(results, columns=["PatientID_Masked", "NSCLC_Status", "Justification"])
df_out.to_csv(OUTPUT_PATH, index=False)
print(f"✅ Done! Saved results to {OUTPUT_PATH}")

💾 Saving output...
✅ Done! Saved results to ../outputs/nsclc_llm_results_optimized.csv
