In [2]:
import base64
import io
import json
import requests
from PIL import Image
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [2]:
#!pip install --upgrade openpyxl --index-url https://pypi.org/simple

In [3]:
# ===============================================================
# 📦 Minimal KMAnalyzer: Custom LLM API Wrapper
# ===============================================================


import anthropic

class KMAnalyzer:
    def __init__(self, api_key: str):
        self.client = anthropic.Anthropic(api_key='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')

    def ask_with_question_only(self, question: str) -> str:
        try:
            response = self.client.messages.create(
                model="claude-3-opus-20240229",
                max_tokens=1024,
                messages=[
                    {
                        "role": "user",
                        "content": question
                    }
                ]
            )
            return response.content[0].text
        except Exception as e:
            print(f"[ERROR] LLM request failed: {e}")
            return "LLM call failed."

    def is_survival_lookup_question(self, question: str) -> bool:
        prompt = (
            f"Is the following question asking for a survival probability lookup? "
            f"Answer with 'Yes' or 'No' only.\n\nQuestion: {question}"
        )
        response = self.ask_with_question_only(prompt)
        return response.strip().lower().startswith("yes")

    def extract_survival_params(self, question: str):
        import json
        import re

        prompt = (
            "From this question, extract the survival time (in months) and the treatment arm. "
            "Respond ONLY with a JSON object like:\n"
            '{"time_months": 12, "curve_name": "palbociclib"}\n\n'
            f"Question: {question}"
        )

        try:
            response = self.ask_with_question_only(prompt)
            print("DEBUG - LLM returned:", response)

            # Use regex to isolate JSON portion
            match = re.search(r"\{.*?\}", response, re.DOTALL)
            if match:
                parsed = json.loads(match.group())
                print("DEBUG - Parsed dict:", parsed)
                return parsed
            else:
                print("❌ No valid JSON found in LLM response.")
                return None
        except Exception as e:
            print("Failed to extract survival parameters:", e)
            return None


    import base64

    import base64

    def ask_with_image_and_question(self, question: str, image: Image.Image) -> str:
        import base64
        import io

        # Convert PIL image to base64
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_b64 = base64.b64encode(buffered.getvalue()).decode()

        try:
            response = self.client.messages.create(
                model="claude-3-opus-20240229",
                max_tokens=1024,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": question},
                            {
                                "type": "image",
                                "source": {
                                    "type": "base64",
                                    "media_type": "image/png",
                                    "data": img_b64
                                }
                            }
                        ]
                    }
                ]
            )
            return response.content[0].text
        except Exception as e:
            print("❌ LLM image+text failed:", e)
            return "LLM image analysis failed."

In [4]:
analyzer = KMAnalyzer(api_key="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")


In [24]:
# ===============================================================
# 📤 Upload CSV or Excel File for Survival Probability Lookup
# ===============================================================

import ipywidgets as widgets
import pandas as pd
import io
from IPython.display import display

# Upload widget
data_uploader = widgets.FileUpload(
    accept=".csv,.xls,.xlsx",
    multiple=False
)
display(widgets.Label("Upload survival data (CSV or Excel):"), data_uploader)

# Function to load the file into a DataFrame
def load_survival_data(upload_widget):
    if not upload_widget.value:
        return None

    # Handle new tuple structure (used in recent ipywidgets versions)
    file_info = upload_widget.value[0] if isinstance(upload_widget.value, tuple) else list(upload_widget.value.values())[0]

    content = io.BytesIO(file_info['content'])
    
    # Infer type from name if available, else default to Excel
    filename = file_info.get('name') or file_info.get('metadata', {}).get('name', '')
    filename = filename.lower()

    if filename.endswith(".csv"):
        df = pd.read_csv(content)
    elif filename.endswith((".xls", ".xlsx")):
        df = pd.read_excel(content,header=1)
    else:
        raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

    return df



Label(value='Upload survival data (CSV or Excel):')

FileUpload(value=(), accept='.csv,.xls,.xlsx', description='Upload')

In [17]:
# ===============================================================
# 🧪 Question-Only LLM Interaction (Claude Version)
# ===============================================================

import ipywidgets as widgets
from IPython.display import display

# Textbox for question input
# question_input = widgets.Text(
#     placeholder="Ask a question (e.g. What is PFS at 12 months for palbociclib?)",
#     description="Question:",
#     layout=widgets.Layout(width="80%")
# )

question_input = widgets.Textarea(
    placeholder="Ask a question (e.g. What is PFS at 12 months for palbociclib?)",
    description="Question:",
    layout=widgets.Layout(width="80%", height="50px")
)


# Button to trigger LLM
ask_button = widgets.Button(description="Ask LLM")
output_area = widgets.Output()

# Callback logic
def on_question_submit(b):
    with output_area:
        output_area.clear_output()
        question = question_input.value.strip()

        if not question:
            print("⚠️ Please enter a question.")
            return

        print(f"🧠 LLM analyzing question: \"{question}\"\n")

        if not analyzer.is_survival_lookup_question(question):
            print("💬 This is a general question. LLM says:\n")
            answer = analyzer.ask_with_question_only(question)
            print(answer)
            return

        print("✅ This is a survival probability question.")
        params = analyzer.extract_survival_params(question)
        print("DEBUG - Extracted:", params)

        if not params:
            print("❌ LLM could not extract parameters. Try rephrasing.")
            return

        try:
            time = float(params.get('time_months'))
            #curve = params.get('curve_name', '').strip().lower()
            import re
            curve = str(params.get('curve_name', '')).strip().lower()
            curve = re.sub(r'\s*\+\s*', '+', curve)  # normalize spaces around +
            print(f"🔍 Extracted parameters:\n- Time (months): {time}\n- Curve: {curve}")
        except Exception as e:
            print("❌ Failed to access extracted parameters:", e)
            return

        df = load_survival_data(data_uploader)
        if df is None:
            print("⚠️ No survival data uploaded.")
            return

        try:
            df.columns = [str(c).strip() for c in df.columns]  # Strip whitespace
           # print(df.columns)
            df = df.rename(columns={
                "Time": "time_months",
                "Group": "curve_name",
                "Survival Prob": "survival_prob"
            })
            df['time_months'] = pd.to_numeric(df['time_months'], errors='coerce')
            df['survival_prob'] = pd.to_numeric(df['survival_prob'], errors='coerce')
            df['curve_name'] = ( df['curve_name']
    .astype(str)
    .str.replace(u'\xa0', ' ', regex=False)
    .str.strip()
    .str.lower()
    .str.replace(r'\s*\+\s*', '+', regex=True)
)
            
            curve = re.sub(r'\s*\+\s*', '+', curve.strip().lower())

            # print("DEBUG - Available curves:", df['curve_name'].unique())
            # print("🔬 Deep comparison check:")
            # for i, val in enumerate(df['curve_name'].unique()):
            #     print(f"Row {i}: '{val}' == '{curve}' → {val == curve} → len(val): {len(val)}, len(curve): {len(curve)}")


            #curve_df = df[df['curve_name'].str.contains(curve, na=False)]
            curve_df = df[df['curve_name'] == curve]
            if curve_df.empty:
                print(f"❌ No matching curve group found for '{curve}'")
                return

            valid_times = curve_df[curve_df['time_months'] <= time]
            #print("DEBUG - Times ≤ requested:", valid_times['time_months'].tolist())

            if valid_times.empty:
                print(f"❌ No data available at or before {time} months for '{curve}'")
                return

            closest_row = valid_times.sort_values('time_months', ascending=False).iloc[0]
            closest_time = closest_row['time_months']
            prob = closest_row['survival_prob']

            print(f"🎯 Survival probability: {prob}% at closest available time {closest_time} months for '{curve}'")

        except Exception as e:
            print("❌ Data lookup failed:", e)




In [25]:
# Bind button
ask_button.on_click(on_question_submit)

# Show interface
display(question_input, ask_button, output_area)

#Palbociclib+Letrozole   What is PFS at 12 months for Palbociclib+Letrozole )

Textarea(value='Based on the PALOMA 1 trial efficacy results, what was the overall suvival probability at arou…

Button(description='Ask LLM', style=ButtonStyle())

Output(outputs=({'name': 'stdout', 'text': '🧠 LLM analyzing question: "Based on the PALOMA 1 trial efficacy re…

# IMAGE and QUESTION ANALYSIS

In [29]:
import io
from PIL import Image
from IPython.display import display
import ipywidgets as widgets

# Upload widget for KM plot or other image
image_uploader = widgets.FileUpload(
    accept='image/*',
    multiple=False
)

display(widgets.Label("Upload Kaplan–Meier plot or image page:"), image_uploader)

# Helper to get and preview uploaded image
def get_uploaded_image():
    if not image_uploader.value:
        return None

    # ✅ Works with newer ipywidgets: tuple of file dicts
    file_info = image_uploader.value[0] if isinstance(image_uploader.value, tuple) else list(image_uploader.value.values())[0]
    
    content = io.BytesIO(file_info['content'])
    img = Image.open(content).convert("RGB")
    return img




Label(value='Upload Kaplan–Meier plot or image page:')

FileUpload(value=(), accept='image/*', description='Upload')

In [31]:
# Text area for image+question
image_question_input = widgets.Textarea(
    placeholder="Ask a question about the uploaded image (e.g., What is the median survival for letrozole?)",
    description="Question:",
    layout=widgets.Layout(width="80%", height="80px")
)

submit_button = widgets.Button(description="Analyze Image with LLM")
img_output_area = widgets.Output()

def on_image_question_submit(b):
    with img_output_area:
        img_output_area.clear_output()
        raw_question = image_question_input.value.strip()

# Add formatting guidance to the question
        question = (
            f"{raw_question}\n\n"
            "👉 Begin your answer with **Yes** or **No**, followed by a clear and bolded summary statement.\n"
            "👉 Then provide a short explanation comparing the two treatment arms based on the visual evidence.\n"
            "👉 Format your response clearly for display."
        )

        image = get_uploaded_image()

        if not question:
            print("⚠️ Please enter a question.")
            return
        if not image:
            print("⚠️ Please upload an image.")
            return

        display(image)
        print(f"🧠 Analyzing image + question")
        response = analyzer.ask_with_image_and_question(question, image)
        print("📋 Answer:\n", response)




In [33]:
submit_button.on_click(on_image_question_submit)
display(image_question_input, submit_button, img_output_area)

Textarea(value='Based on the PALOMA 1 trial efficacy results, what was the overall suvival probability at arou…

Button(description='Analyze Image with LLM', style=ButtonStyle())

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<PIL.Image.Image image mode=RGB size=1…

In [None]:
#Based on the PALOMA 1 trial efficacy results, did Palbociclib + Letrozole exhibit a greater or lower overall Progression Free Survival than the Letrozole only arm?