In [None]:
!pip install -q transformers sentence-transformers faiss-cpu PyPDF2 timm

import json, os, faiss, torch, numpy as np
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import timm
from torchvision import transforms
from PIL import Image

path_1 = "/kaggle/input/dataset/NEWparkinsons_synthetic_20k.jsonl"
path_2 = "/kaggle/input/dataset/parkinsons_diagnostic_cases.jsonl"

with open(path_1, "r", encoding="utf-8") as f1:
    dataset_1 = [json.loads(line) for line in f1]
with open(path_2, "r", encoding="utf-8") as f2:
    dataset_2 = [json.loads(line) for line in f2]

merged_dataset = dataset_1 + dataset_2

case_chunks = []
for entry in merged_dataset:
    case_chunks.append(
        "[CASE]\n"
        + entry["input"].strip()
        + "\n\n"
        + entry["output"].strip()
    )

pdf_folder = "/kaggle/input/beforeittellsitself/PD_papersforcode"
pdf_chunks = []
for fname in os.listdir(pdf_folder):
    if not fname.lower().endswith(".pdf"):
        continue
    reader = PdfReader(os.path.join(pdf_folder, fname))
    full_text = ""
    for page in reader.pages:
        txt = page.extract_text() or ""
        full_text += txt + "\n\n"
    for i in range(0, len(full_text), 2000):
        chunk = full_text[i:i+2000].strip()
        if len(chunk) > 200:
            pdf_chunks.append(f"[PAPER:{fname}]\n" + chunk)

all_chunks = case_chunks + pdf_chunks
embedder = SentenceTransformer("emilyalsentzer/Bio_ClinicalBERT")
embeddings = embedder.encode(all_chunks, convert_to_numpy=True)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)


faiss.write_index(index, "faiss_index.index")
print("FAISS index saved as faiss_index.index")

print(f"Loaded {len(case_chunks)} cases + {len(pdf_chunks)} papers = {len(all_chunks)} total.")


model_id = "openchat/openchat-3.5-0106"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
rag_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
import timm

dataset_path = "/kaggle/input/spiral_pd"

spiral_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


dataset = ImageFolder(root=dataset_path, transform=spiral_transform)
loader = DataLoader(dataset, batch_size=16, shuffle=True)


model = timm.create_model("resnet18.a1_in1k", pretrained=True, num_classes=2)
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


for epoch in range(5):  
    model.train()
    for imgs, labels in loader:
        imgs, labels = imgs.cuda(), labels.cuda()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: Loss {loss.item():.4f}")

torch.save(model.state_dict(), "spiral_model.pth")
print("Spiral classifier trained & saved.")



spiral_model = timm.create_model("resnet18.a1_in1k", pretrained=True, num_classes=2)
spiral_model.load_state_dict(torch.load("/kaggle/working/spiral_model.pth"))
spiral_model.eval().cuda()


def predict_spiral_image(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = spiral_transform(img).unsqueeze(0).cuda()
    with torch.no_grad():
        out = spiral_model(img_tensor)
        pred = torch.argmax(out, dim=1).item()
    return "The spiral test suggests **signs of Parkinson’s**." if pred == 1 else "The spiral test **does not show signs of Parkinson’s**."

def rag_chat_mistral(patient_prompt, image_path=None, k=3):
    spiral_diagnosis = "No spiral test image was provided."
    if image_path:
        spiral_diagnosis = predict_spiral_image(image_path)
    
    query = embedder.encode([patient_prompt], convert_to_numpy=True)
    _, _ = index.search(query, k)

    prompt = f"""You are a senior neurologist. Based on the patient's clinical information, generate a medically realistic diagnostic summary using the format below. Include the spiral analysis result.

1. Patient Overview  
Summarise the patient’s demographics and background in one paragraph, touching on:  
- Age and gender  
- Smoking and alcohol habits  
- Diet  
- Recent surgeries or ongoing medications  
- Family history of neurological disorders  

2. Symptom Summary  
Describe each of these in its own sentence:  
- Bradykinesia Score  
- Tremors (present/absent)  
- Rigidity (none/mild/moderate/severe)  
- Gait disturbance  
- Speech changes  
- Sleep disturbances  
- Balance issues  

3. Clinical Impression  
- “Definitive Parkinson’s disease (likely)”  
- “Probable Parkinson’s (likely)”  
- “Possible Parkinsonism—consider alternative etiologies”  
- “Unlikely Parkinson’s disease”  

4. Reasoning  
2–3 sentences linking findings to impression.

5. Spiral Test Image Analysis  
{spiral_diagnosis}

---

Patient Details:
{patient_prompt.strip()}

### END OF PROMPT — ANSWER BELOW ###
---
"""

    raw = rag_pipe(prompt, max_new_tokens=400, do_sample=False, temperature=0.7)[0]["generated_text"]

    marker = "### END OF PROMPT — ANSWER BELOW ###"
    result = raw.split(marker, 1)[1].strip() if marker in raw else raw.strip()
    return result

test_prompt = """
Age: 60
Gender: Male
Smoking: No
Alcohol: Yes
Diet: Non-Vegetarian
Allergies: Pollen
Comorbidities: None
Surgery: Brain surgery
Immunosuppressants: No
Genetic History: Father has Parkinson's Disease
Symptoms:
- Bradykinesia Score: 2.2
- Tremors: Yes
- Rigidity: Mild
- Gait Disturbance: Yes
- Speech Changes: No
- Sleep Disturbance: Yes
- Balance Issues: Yes
- Cognitive: No
"""

spiral_image_path = "/kaggle/input/kaggledatafortesting/parkinson/V01PE01.png"

print("🩺 Doctor's Diagnosis:\n")
print(rag_chat_mistral(test_prompt, image_path=spiral_image_path))


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m48.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━

2025-06-11 16:13:56.566695: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749658436.844086      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749658436.922738      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Batches:   0%|          | 0/629 [00:00<?, ?it/s]

✅ FAISS index saved as faiss_index.index
✅ Loaded 20100 cases + 6 papers = 20106 total.


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/491 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]

Device set to use cpu


model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import torchvision.datasets as datasets

test_dataset = ImageFolder(root=dataset_path, transform=spiral_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

all_preds = []
all_labels = []

spiral_model.eval()
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.cuda(), labels.cuda()
        outputs = spiral_model(imgs)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
conf_matrix = confusion_matrix(all_labels, all_preds)

print("Classification Report:\n")
print(classification_report(all_labels, all_preds, target_names=test_dataset.classes))

print("Confusion Matrix:\n", conf_matrix)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

In [None]:
!pip install -q gradio timm

import gradio as gr
import timm
import torch
from torchvision import transforms
from PIL import Image

model = timm.create_model("resnet18.a1_in1k", pretrained=True, num_classes=2)
model.load_state_dict(torch.load("/kaggle/working/spiral_model.pth", map_location=torch.device('cpu')))
model.eval()

spiral_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

def analyze_spiral(image):
    img = Image.open(image).convert("RGB")
    img_tensor = spiral_transform(img).unsqueeze(0)
    with torch.no_grad():
        out = model(img_tensor)
        pred = torch.argmax(out, dim=1).item()
    return "🧪 The spiral test suggests **signs of Parkinson’s**." if pred == 1 else "✅ The spiral test **does not show signs of Parkinson’s**."

demo = gr.Interface(
    fn=analyze_spiral,
    inputs=gr.Image(type="filepath", label="Upload Spiral Test Image"),
    outputs=gr.Textbox(label="Spiral Test Result"),
    title="🌀 Parkinson's Spiral Test Analyzer",
    description="Upload a spiral drawing image to quickly analyze possible signs of Parkinson’s disease.",
    allow_flagging="never"
)



def rag_ui(age, gender, smoking, alcohol, diet, allergies, comorbidities, surgery, immuno, genetic_history,
           bradykinesia, tremors, rigidity, gait, speech, sleep, constipation, balance, extra_notes, spiral_image):
    user_input = f"""
Age: {age}
Gender: {gender}
Smoking: {smoking}
Alcohol: {alcohol}
Diet: {diet}
Allergies: {allergies}
Comorbidities: {comorbidities}
Surgery: {surgery}
Immunosuppressants: {immuno}
Genetic History: {genetic_history}
Symptoms:
- Bradykinesia Score: {bradykinesia}
- Tremors: {tremors}
- Rigidity: {rigidity}
- Gait Disturbance: {gait}
- Speech Changes: {speech}
- Sleep Disturbance: {sleep}
- Constipation: {constipation}
- Balance Issues: {balance}
Extra Notes: {extra_notes}
"""

    
    if spiral_image:
        spiral_image_path = spiral_image  
    else:
        spiral_image_path = None

    diagnosis = rag_chat_mistral(user_input, image_path=spiral_image_path)
    return diagnosis

ui = gr.Interface(
    fn=rag_ui,
    inputs=[
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female"], label="Gender"),
        gr.Radio(["No", "Occasionally", "Daily"], label="Smoking"),
        gr.Radio(["No", "Occasionally", "Daily"], label="Alcohol"),
        gr.Dropdown(["Vegetarian", "Non-Vegetarian", "Mixed"], label="Diet"),
        gr.Textbox(label="Allergies", placeholder="None"),
        gr.Textbox(label="Comorbidities", placeholder="e.g. Diabetes, Hypertension"),
        gr.Textbox(label="Surgery History", placeholder="e.g. Bypass surgery, None"),
        gr.Radio(["Yes", "No"], label="Immunosuppressants"),
        gr.Textbox(label="Genetic History", placeholder="e.g. Father had PD, None"),
        gr.Slider(minimum=0, maximum=5, step=0.1, label="Bradykinesia Score"),
        gr.Radio(["Yes", "No"], label="Tremors"),
        gr.Radio(["Yes", "No"], label="Rigidity"),
        gr.Radio(["Yes", "No"], label="Gait Disturbance"),
        gr.Radio(["Yes", "No"], label="Speech Changes"),
        gr.Radio(["Yes", "No"], label="Sleep Disturbance"),
        gr.Radio(["Yes", "No"], label="Constipation"),
        gr.Radio(["Yes", "No"], label="Balance Issues"),
        gr.Textbox(label="Extra Notes", placeholder="Any extra symptoms or lifestyle notes"),
        gr.Image(label="Upload Spiral Test Image (Optional)", type="filepath")
    ],
    outputs=gr.Textbox(label="Diagnosis"),
    title="Parkinson's Diagnosis Assistant"
)

ui.launch(debug=True, share=True)

In [None]:
!pip install -q gradio timm
import gradio as gr
import timm
import torch
from torchvision import transforms
from PIL import Image

model = timm.create_model("resnet18.a1_in1k", pretrained=True, num_classes=2)
model.load_state_dict(torch.load("/kaggle/working/spiral_model.pth", map_location=torch.device('cpu')))
model.eval()

spiral_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

def analyze_spiral(image):
    img = Image.open(image).convert("RGB")
    img_tensor = spiral_transform(img).unsqueeze(0)
    with torch.no_grad():
        out = model(img_tensor)
        pred = torch.argmax(out, dim=1).item()
    return "🧪 The spiral test suggests **signs of Parkinson’s**." if pred == 1 else "✅ The spiral test **does not show signs of Parkinson’s**."

demo = gr.Interface(
    fn=analyze_spiral,
    inputs=gr.Image(type="filepath", label="Upload Spiral Test Image"),
    outputs=gr.Textbox(label="Spiral Test Result"),
    title="🌀 Parkinson's Spiral Test Analyzer",
    description="Upload a spiral drawing image to quickly analyze possible signs of Parkinson’s disease.",
    allow_flagging="never"
)

demo.launch(debug=True)