In [4]:
# Install required packages
#!pip install pandas streamlit ipywidgets pillow

## Dependencies

In [5]:
import io
import pandas as pd
from PIL import Image
import ipywidgets as widgets
from IPython.display import display

  from pandas.core import (


## File Upload

In [6]:
# Image upload
img_uploader = widgets.FileUpload(accept='image/*', multiple=False)
display(widgets.Label("Upload Kaplan–Meier image (optional):"), img_uploader)

# CSV/Excel upload
data_uploader = widgets.FileUpload(
    accept='.csv, .xls, .xlsx',
    multiple=False
)
display(widgets.Label("Upload survival data CSV/Excel:"), data_uploader)

Label(value='Upload Kaplan–Meier image (optional):')

FileUpload(value=(), accept='image/*', description='Upload')

Label(value='Upload survival data CSV/Excel:')

FileUpload(value=(), accept='.csv, .xls, .xlsx', description='Upload')

In [7]:
# Helper to read CSV/Excel
def load_data(file_upload):
    if not file_upload.value:
        return None
    uploaded = list(file_upload.value.values())[0]
    content = io.BytesIO(uploaded['content'])
    if uploaded['metadata']['type'] in ('text/csv', 'application/vnd.ms-excel'):
        return pd.read_csv(content)
    else:
        return pd.read_excel(content)

# Load upon widget changes
img_obj = None
surv_df = None

## Question Processing

In [8]:
question_input = widgets.Text(
    placeholder='E.g., "What is the PFS at 12 months for palbociclib+letrozole arm?"',
    description='Question:',
    layout=widgets.Layout(width='80%')
)
display(question_input)

Text(value='', description='Question:', layout=Layout(width='80%'), placeholder='E.g., "What is the PFS at 12 …

In [11]:
def ask_llm(image: Image.Image, question: str) -> str:
    """
    Send image and question to LLM and return its answer.
    """
    # Convert image to base64 or attach via OpenAI function-calling
    # Example: use multipart upload or encode directly
    prompt = f"Image analysis + data Q&A. Question: {question}"
    response = openai.ChatCompletion.create(
        model='gpt-4o-mini',
        messages=[
            {"role": "system", "content": "You are a helpful data scientist assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [10]:
# Button to trigger
ask_button = widgets.Button(description="Ask Question")
output_area = widgets.Output()

def on_ask(b):
    global img_obj, surv_df
    with output_area:
        output_area.clear_output()
        # Load data
        surv_df = load_data(data_uploader)
        # Load image
        if img_uploader.value:
            img_bytes = list(img_uploader.value.values())[0]['content']
            img_obj = Image.open(io.BytesIO(img_bytes))
            display(img_obj)
        # Ask LLM
        ans = ask_llm(img_obj, question_input.value)
        print(ans)

ask_button.on_click(on_ask)
display(ask_button, output_area)

Button(description='Ask Question', style=ButtonStyle())

Output()

## 3. Parameter Extraction & Probability Lookup

For questions that reference a specific time and curve name, we can:

Extract time_point and curve_name via an LLM call.

Lookup surv_probability in the uploaded surv_df.

In [None]:
def extract_params(question: str) -> dict:
    # Use LLM function-calling or regex fallback
    func_call = openai.ChatCompletion.create(
        model='gpt-4o-mini',
        messages=[
            {"role":"system","content":"Extract time (months) and curve name."},
            {"role":"user","content":question}
        ],
        functions=[
            {
                "name": "extract_survival_params",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "time_months": {"type": "number"},
                        "curve_name": {"type": "string"}
                    },
                    "required": ["time_months","curve_name"]
                }
            }
        ],
        function_call={"name": "extract_survival_params"}
    )
    return func_call.choices[0].message.function_call.arguments

# Lookup helper
# After loading data and before lookup, add error handling
if surv_df is None:
    print("Error: No survival data uploaded. Please upload a CSV or Excel file.")
    return

# Extract parameters
params = extract_params(question_input.value)

# Validate extracted parameters
if 'time_months' not in params or 'curve_name' not in params:
    print("Error: Could not extract time and curve name from your question. Please rephrase.")
    return

# Lookup helper remains the same
def lookup_survival(time_months: float, curve_name: str, df: pd.DataFrame) -> float:
    # Assumes DataFrame columns: ['time', 'curve', 'survival_prob']
    row = df[(df['time'] == time_months) & (df['curve'].str.lower() == curve_name.lower())]
    return float(row['survival_prob'].iloc[0]) if not row.empty else None

# Perform lookup
prob = lookup_survival(params['time_months'], params['curve_name'], surv_df)

# Handle missing lookup results
if prob is None:
    print(f"Error: No survival probability found at {params['time_months']} months for '{params['curve_name']}'. Check your data or try a different time/curve.")
else:
    print(f"Survival probability at {params['time_months']} months for {params['curve_name']}: {prob}%")

In [None]:
# Example integration within on_ask
params = extract_params(question_input.value)
prob = lookup_survival(params['time_months'], params['curve_name'], surv_df)
print(f"Survival probability at {params['time_months']} months for {params['curve_name']}: {prob}%")