In [None]:
import os
import anthropic
import json

# Initialize the client (reads ANTHROPIC_API_KEY from environment)
try:
    client = anthropic.Anthropic()

    prompt = """
    Generate a single synthetic WIMP event as a JSON object.
    The event should have keys: 's1_photons', 's2_charge', and 'recoil_energy_keV'.
    Ensure the values are physically plausible for a low-energy nuclear recoil in a liquid xenon detector.
    For example, s1 should be small (typically 5-50 photons for low-energy nuclear recoils), 
    and s2 should be relatively low for that s1 (roughly 100-2000 electrons).
    Recoil energy should typically be in the range of 1-50 keV for WIMP searches.
    
    CRITICAL: Respond with ONLY a valid JSON object. No markdown, no code blocks, no extra text.
    Format: {"s1_photons": <number>, "s2_charge": <number>, "recoil_energy_keV": <number>}
    """

    message = client.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=256,
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    # Extract response text
    response_text = message.content[0].text.strip()
    print("Raw Response from Claude:")
    print(response_text)

    # Clean up potential markdown formatting (just in case)
    if response_text.startswith("```"):
        # Remove markdown code blocks if present
        response_text = response_text.replace("```json", "").replace("```", "").strip()

    # Attempt to parse the JSON
    try:
        event_data = json.loads(response_text)
        print("\nSuccessfully parsed JSON:")
        print(json.dumps(event_data, indent=2))
        
        # Validate the expected keys are present
        required_keys = ['s1_photons', 's2_charge', 'recoil_energy_keV']
        if all(key in event_data for key in required_keys):
            print("\n✅ API connection and JSON output confirmed.")
            print(f"   S1: {event_data['s1_photons']} photons")
            print(f"   S2: {event_data['s2_charge']} electrons")
            print(f"   Energy: {event_data['recoil_energy_keV']} keV")
        else:
            print(f"\n⚠️ Warning: Missing expected keys. Got: {list(event_data.keys())}")
            
    except json.JSONDecodeError as je:
        print(f"\n⚠️ JSON Parse Error: {je}")
        print("Response may contain extra text or formatting.")

except anthropic.APIError as api_err:
    print(f"❌ API Error: {api_err}")
except Exception as e:
    print(f"❌ An error occurred: {e}")
    print("Please check your API key and environment setup.")

In [None]:
# src/simulate.py

import pandas as pd
import numpy as np
import os

def generate_correlated_events(n_events, mean_s1, mean_s2, s1_std, s2_std, correlation):
    """
    Generates correlated S1 and S2 events using a multivariate normal distribution.
    """
    mean = [mean_s1, mean_s2]
    cov_s1_s2 = correlation * s1_std * s2_std
    cov_matrix = [[s1_std**2, cov_s1_s2], [cov_s1_s2, s2_std**2]]
    
    events = np.random.multivariate_normal(mean, cov_matrix, n_events)
    events[events < 0] = 0  # Enforce physical constraint (no negative signals)
    return events

def generate_dataset(num_events=500, signal_fraction=0.2):
    """
    Generates a full dataset of mixed Nuclear Recoil (NR) and Electronic Recoil (ER) events.
    """
    n_signal = int(num_events * signal_fraction)
    n_background = num_events - n_signal

    events_list = []

    # --- Generate Nuclear Recoil (NR) events ---
    nr_energies = np.random.exponential(scale=5.0, size=n_signal) + 2.0

    for energy in nr_energies:
        mean_s1 = energy * 2.0
        mean_s2 = energy * 50.0
        s1_std = np.sqrt(mean_s1) * 0.5
        s2_std = np.sqrt(mean_s2) * 0.5
        correlation = -0.6

        s1, s2 = generate_correlated_events(1, mean_s1, mean_s2, s1_std, s2_std, correlation)[0]
        
        events_list.append({
            'true_label': 'Nuclear Recoil',
            'recoil_energy_keV': energy,
            's1_photons': s1,
            's2_charge': s2
        })

    # --- Generate Electronic Recoil (ER) events ---
    er_energies = np.random.uniform(low=2.0, high=50.0, size=n_background)

    for energy in er_energies:
        mean_s1 = energy * 1.5
        mean_s2 = energy * 200.0
        s1_std = np.sqrt(mean_s1) * 0.7
        s2_std = np.sqrt(mean_s2) * 0.7
        correlation = -0.4

        s1, s2 = generate_correlated_events(1, mean_s1, mean_s2, s1_std, s2_std, correlation)[0]
        
        events_list.append({
            'true_label': 'Electronic Recoil',
            'recoil_energy_keV': energy,
            's1_photons': s1,
            's2_charge': s2
        })

    # --- Finalize DataFrame ---
    df = pd.DataFrame(events_list)
    df = df.sample(frac=1).reset_index(drop=True)
    df['event_id'] = df.index

    epsilon = 1e-6
    df['log10_s2_s1'] = np.log10((df['s2_charge'] + epsilon) / (df['s1_photons'] + epsilon))
    
    return df

if __name__ == '__main__':
    print("Generating simulated dark matter detector dataset...")

    output_dir = 'data'
    os.makedirs(output_dir, exist_ok=True)

    dataset = generate_dataset(num_events=500, signal_fraction=0.2)
    output_path = os.path.join(output_dir, 'dataset.csv')
    dataset.to_csv(output_path, index=False)

    print(f"Dataset with {len(dataset)} events saved to {output_path}")
    print("\nDataset Head:")
    print(dataset.head())


In [None]:
import os
import pandas as pd
import anthropic
import time
from tqdm import tqdm

# Assuming classify_event is defined elsewhere, for example:
# def classify_event(row, client):
#     # Dummy implementation for context
#     return {'event_id': row['event_id'], 'classification': 'some_category'}

def main1():
    """
    Main function to load data, classify all events, and save the results.
    """
    # Load the simulated dataset
    input_path = 'data/dataset.csv'
    if not os.path.exists(input_path):
        print(f"Error: Input file not found at {input_path}. Please run src/simulate.py first.")
        return
        
    # ✅ Fixed syntax
    df = pd.read_csv(input_path)
    
    # Initialize the Anthropic client
    try:
        client = anthropic.Anthropic()
    except Exception as e:
        print(f"Error initializing Anthropic client: {e}")
        print("Please ensure your ANTHROPIC_API_KEY environment variable is set correctly.")
        return
        
    print(f"Starting classification for {len(df)} events...")
    
    results = []  # ✅ Proper initialization
    
    # Using tqdm for progress bar
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            # Assuming classify_event is a function you've defined elsewhere
            result = classify_event(row, client)
            if result:
                results.append(result)
        except Exception as e:
            print(f"Error classifying event {index}: {e}")
        time.sleep(0.5)  # small delay to respect API rate limits
        
    if not results:
        print("No events were successfully classified. Exiting.")
        return

    # Convert the list of results to DataFrame
    results_df = pd.DataFrame(results)
    
    # Ensure event_id types match for merging
    df['event_id'] = df['event_id'].astype(int)
    results_df['event_id'] = results_df['event_id'].astype(int)
    
    # Merge classification results into the main dataset
    classified_df = pd.merge(df, results_df, on='event_id', how='left')
    
    # Save the enriched dataset
    output_path = 'data/classified_dataset.csv'
    classified_df.to_csv(output_path, index=False)
    
    print(f"\n✅ Classification complete. Enriched dataset saved to {output_path}")
    print("\nClassified Dataset Head:")
    print(classified_df.head())

# This is the corrected block that makes the script runnable
if __name__ == "__main__":
    main1()

Error classifying event 113: name 'classify_event' is not defined


 23%|██▎       | 114/500 [00:57<03:13,  1.99it/s]

Error classifying event 114: name 'classify_event' is not defined


 23%|██▎       | 115/500 [00:57<03:13,  1.99it/s]

Error classifying event 115: name 'classify_event' is not defined


 23%|██▎       | 116/500 [00:58<03:12,  1.99it/s]

Error classifying event 116: name 'classify_event' is not defined


 23%|██▎       | 117/500 [00:58<03:12,  1.99it/s]

Error classifying event 117: name 'classify_event' is not defined


 24%|██▎       | 118/500 [00:59<03:11,  1.99it/s]

Error classifying event 118: name 'classify_event' is not defined


 24%|██▍       | 119/500 [00:59<03:11,  1.99it/s]

Error classifying event 119: name 'classify_event' is not defined


 24%|██▍       | 120/500 [01:00<03:10,  1.99it/s]

Error classifying event 120: name 'classify_event' is not defined


 24%|██▍       | 121/500 [01:00<03:10,  1.99it/s]

Error classifying event 121: name 'classify_event' is not defined


 24%|██▍       | 122/500 [01:01<03:09,  1.99it/s]

Error classifying event 122: name 'classify_event' is not defined


 25%|██▍       | 123/500 [01:01<03:09,  1.99it/s]

Error classifying event 123: name 'classify_event' is not defined


 25%|██▍       | 124/500 [01:02<03:08,  1.99it/s]

Error classifying event 124: name 'classify_event' is not defined


 25%|██▌       | 125/500 [01:02<03:08,  1.99it/s]

Error classifying event 125: name 'classify_event' is not defined


 25%|██▌       | 126/500 [01:03<03:07,  1.99it/s]

Error classifying event 126: name 'classify_event' is not defined


 25%|██▌       | 127/500 [01:03<03:07,  1.99it/s]

Error classifying event 127: name 'classify_event' is not defined


 26%|██▌       | 128/500 [01:04<03:06,  1.99it/s]

Error classifying event 128: name 'classify_event' is not defined


 26%|██▌       | 129/500 [01:04<03:06,  1.99it/s]

Error classifying event 129: name 'classify_event' is not defined


 26%|██▌       | 130/500 [01:05<03:05,  1.99it/s]

Error classifying event 130: name 'classify_event' is not defined


 26%|██▌       | 131/500 [01:05<03:05,  1.99it/s]

Error classifying event 131: name 'classify_event' is not defined


 26%|██▋       | 132/500 [01:06<03:04,  1.99it/s]

Error classifying event 132: name 'classify_event' is not defined


 27%|██▋       | 133/500 [01:06<03:04,  1.99it/s]

Error classifying event 133: name 'classify_event' is not defined


 27%|██▋       | 134/500 [01:07<03:03,  1.99it/s]

Error classifying event 134: name 'classify_event' is not defined


 27%|██▋       | 135/500 [01:07<03:03,  1.99it/s]

Error classifying event 135: name 'classify_event' is not defined


 27%|██▋       | 136/500 [01:08<03:02,  1.99it/s]

Error classifying event 136: name 'classify_event' is not defined


 27%|██▋       | 137/500 [01:08<03:02,  1.99it/s]

Error classifying event 137: name 'classify_event' is not defined


 28%|██▊       | 138/500 [01:09<03:01,  1.99it/s]

Error classifying event 138: name 'classify_event' is not defined


 28%|██▊       | 139/500 [01:09<03:00,  1.99it/s]

Error classifying event 139: name 'classify_event' is not defined


 28%|██▊       | 140/500 [01:10<03:00,  1.99it/s]

Error classifying event 140: name 'classify_event' is not defined


 28%|██▊       | 141/500 [01:10<02:59,  1.99it/s]

Error classifying event 141: name 'classify_event' is not defined


 28%|██▊       | 142/500 [01:11<02:59,  1.99it/s]

Error classifying event 142: name 'classify_event' is not defined


 29%|██▊       | 143/500 [01:11<02:59,  1.99it/s]

Error classifying event 143: name 'classify_event' is not defined


 29%|██▉       | 144/500 [01:12<02:58,  1.99it/s]

Error classifying event 144: name 'classify_event' is not defined


 29%|██▉       | 145/500 [01:12<02:58,  1.99it/s]

Error classifying event 145: name 'classify_event' is not defined


 29%|██▉       | 146/500 [01:13<02:57,  1.99it/s]

Error classifying event 146: name 'classify_event' is not defined


 29%|██▉       | 147/500 [01:13<02:57,  1.99it/s]

Error classifying event 147: name 'classify_event' is not defined


 30%|██▉       | 148/500 [01:14<02:56,  1.99it/s]

Error classifying event 148: name 'classify_event' is not defined


 30%|██▉       | 149/500 [01:14<02:56,  1.99it/s]

Error classifying event 149: name 'classify_event' is not defined


 30%|███       | 150/500 [01:15<02:55,  1.99it/s]

Error classifying event 150: name 'classify_event' is not defined


 30%|███       | 151/500 [01:15<02:55,  1.99it/s]

Error classifying event 151: name 'classify_event' is not defined


 30%|███       | 152/500 [01:16<02:54,  1.99it/s]

Error classifying event 152: name 'classify_event' is not defined


 31%|███       | 153/500 [01:16<02:54,  1.99it/s]

Error classifying event 153: name 'classify_event' is not defined


 31%|███       | 154/500 [01:17<02:53,  1.99it/s]

Error classifying event 154: name 'classify_event' is not defined


 31%|███       | 155/500 [01:17<02:53,  1.99it/s]

Error classifying event 155: name 'classify_event' is not defined


 31%|███       | 156/500 [01:18<02:52,  1.99it/s]

Error classifying event 156: name 'classify_event' is not defined


 31%|███▏      | 157/500 [01:18<02:52,  1.99it/s]

Error classifying event 157: name 'classify_event' is not defined


 32%|███▏      | 158/500 [01:19<02:51,  1.99it/s]

Error classifying event 158: name 'classify_event' is not defined


 32%|███▏      | 159/500 [01:19<02:51,  1.99it/s]

Error classifying event 159: name 'classify_event' is not defined


 32%|███▏      | 160/500 [01:20<02:50,  1.99it/s]

Error classifying event 160: name 'classify_event' is not defined


 32%|███▏      | 161/500 [01:20<02:50,  1.99it/s]

Error classifying event 161: name 'classify_event' is not defined


 32%|███▏      | 162/500 [01:21<02:49,  1.99it/s]

Error classifying event 162: name 'classify_event' is not defined


 33%|███▎      | 163/500 [01:21<02:49,  1.99it/s]

Error classifying event 163: name 'classify_event' is not defined


 33%|███▎      | 164/500 [01:22<02:48,  1.99it/s]

Error classifying event 164: name 'classify_event' is not defined


 33%|███▎      | 165/500 [01:22<02:48,  1.99it/s]

Error classifying event 165: name 'classify_event' is not defined


 33%|███▎      | 166/500 [01:23<02:47,  1.99it/s]

Error classifying event 166: name 'classify_event' is not defined


 33%|███▎      | 167/500 [01:23<02:47,  1.99it/s]

Error classifying event 167: name 'classify_event' is not defined


 34%|███▎      | 168/500 [01:24<02:46,  1.99it/s]

Error classifying event 168: name 'classify_event' is not defined


 34%|███▍      | 169/500 [01:24<02:46,  1.99it/s]

Error classifying event 169: name 'classify_event' is not defined


 34%|███▍      | 170/500 [01:25<02:45,  1.99it/s]

Error classifying event 170: name 'classify_event' is not defined


 34%|███▍      | 171/500 [01:25<02:44,  1.99it/s]

Error classifying event 171: name 'classify_event' is not defined


 34%|███▍      | 172/500 [01:26<02:44,  1.99it/s]

Error classifying event 172: name 'classify_event' is not defined


 35%|███▍      | 173/500 [01:26<02:44,  1.99it/s]

Error classifying event 173: name 'classify_event' is not defined


 35%|███▍      | 174/500 [01:27<02:43,  1.99it/s]

Error classifying event 174: name 'classify_event' is not defined


 35%|███▌      | 175/500 [01:27<02:42,  1.99it/s]

Error classifying event 175: name 'classify_event' is not defined


 35%|███▌      | 176/500 [01:28<02:42,  1.99it/s]

Error classifying event 176: name 'classify_event' is not defined


 35%|███▌      | 177/500 [01:28<02:42,  1.99it/s]

Error classifying event 177: name 'classify_event' is not defined


 36%|███▌      | 178/500 [01:29<02:41,  1.99it/s]

Error classifying event 178: name 'classify_event' is not defined


 36%|███▌      | 179/500 [01:29<02:41,  1.99it/s]

Error classifying event 179: name 'classify_event' is not defined


 36%|███▌      | 180/500 [01:30<02:40,  1.99it/s]

Error classifying event 180: name 'classify_event' is not defined


 36%|███▌      | 181/500 [01:30<02:40,  1.99it/s]

Error classifying event 181: name 'classify_event' is not defined


 36%|███▋      | 182/500 [01:31<02:39,  1.99it/s]

Error classifying event 182: name 'classify_event' is not defined


 37%|███▋      | 183/500 [01:31<02:39,  1.99it/s]

Error classifying event 183: name 'classify_event' is not defined


 37%|███▋      | 184/500 [01:32<02:38,  1.99it/s]

Error classifying event 184: name 'classify_event' is not defined


 37%|███▋      | 185/500 [01:32<02:38,  1.99it/s]

Error classifying event 185: name 'classify_event' is not defined


 37%|███▋      | 186/500 [01:33<02:37,  1.99it/s]

Error classifying event 186: name 'classify_event' is not defined


 37%|███▋      | 187/500 [01:33<02:37,  1.99it/s]

Error classifying event 187: name 'classify_event' is not defined


 38%|███▊      | 188/500 [01:34<02:36,  1.99it/s]

Error classifying event 188: name 'classify_event' is not defined


 38%|███▊      | 189/500 [01:34<02:36,  1.99it/s]

Error classifying event 189: name 'classify_event' is not defined


 38%|███▊      | 190/500 [01:35<02:35,  1.99it/s]

Error classifying event 190: name 'classify_event' is not defined


 38%|███▊      | 191/500 [01:35<02:34,  1.99it/s]

Error classifying event 191: name 'classify_event' is not defined


 38%|███▊      | 192/500 [01:36<02:34,  1.99it/s]

Error classifying event 192: name 'classify_event' is not defined


 39%|███▊      | 193/500 [01:36<02:34,  1.99it/s]

Error classifying event 193: name 'classify_event' is not defined


 39%|███▉      | 194/500 [01:37<02:33,  1.99it/s]

Error classifying event 194: name 'classify_event' is not defined


 39%|███▉      | 195/500 [01:37<02:33,  1.99it/s]

Error classifying event 195: name 'classify_event' is not defined


 39%|███▉      | 196/500 [01:38<02:32,  1.99it/s]

Error classifying event 196: name 'classify_event' is not defined


 39%|███▉      | 197/500 [01:38<02:32,  1.99it/s]

Error classifying event 197: name 'classify_event' is not defined


 40%|███▉      | 198/500 [01:39<02:31,  1.99it/s]

Error classifying event 198: name 'classify_event' is not defined


 40%|███▉      | 199/500 [01:39<02:31,  1.99it/s]

Error classifying event 199: name 'classify_event' is not defined


 40%|████      | 200/500 [01:40<02:30,  1.99it/s]

Error classifying event 200: name 'classify_event' is not defined


 40%|████      | 201/500 [01:40<02:30,  1.99it/s]

Error classifying event 201: name 'classify_event' is not defined


 40%|████      | 202/500 [01:41<02:29,  1.99it/s]

Error classifying event 202: name 'classify_event' is not defined


 41%|████      | 203/500 [01:41<02:29,  1.99it/s]

Error classifying event 203: name 'classify_event' is not defined


 41%|████      | 204/500 [01:42<02:28,  1.99it/s]

Error classifying event 204: name 'classify_event' is not defined


 41%|████      | 205/500 [01:42<02:28,  1.99it/s]

Error classifying event 205: name 'classify_event' is not defined


 41%|████      | 206/500 [01:43<02:27,  1.99it/s]

Error classifying event 206: name 'classify_event' is not defined


 41%|████▏     | 207/500 [01:43<02:27,  1.99it/s]

Error classifying event 207: name 'classify_event' is not defined


 42%|████▏     | 208/500 [01:44<02:26,  1.99it/s]

Error classifying event 208: name 'classify_event' is not defined


 42%|████▏     | 209/500 [01:44<02:26,  1.99it/s]

Error classifying event 209: name 'classify_event' is not defined


 42%|████▏     | 210/500 [01:45<02:25,  1.99it/s]

Error classifying event 210: name 'classify_event' is not defined


 42%|████▏     | 211/500 [01:45<02:25,  1.99it/s]

Error classifying event 211: name 'classify_event' is not defined


 42%|████▏     | 212/500 [01:46<02:24,  1.99it/s]

Error classifying event 212: name 'classify_event' is not defined


 43%|████▎     | 213/500 [01:46<02:23,  1.99it/s]

Error classifying event 213: name 'classify_event' is not defined


 43%|████▎     | 214/500 [01:47<02:23,  1.99it/s]

Error classifying event 214: name 'classify_event' is not defined


 43%|████▎     | 215/500 [01:47<02:23,  1.99it/s]

Error classifying event 215: name 'classify_event' is not defined


 43%|████▎     | 216/500 [01:48<02:22,  1.99it/s]

Error classifying event 216: name 'classify_event' is not defined


 43%|████▎     | 217/500 [01:48<02:22,  1.99it/s]

Error classifying event 217: name 'classify_event' is not defined


 44%|████▎     | 218/500 [01:49<02:21,  1.99it/s]

Error classifying event 218: name 'classify_event' is not defined


 44%|████▍     | 219/500 [01:49<02:20,  1.99it/s]

Error classifying event 219: name 'classify_event' is not defined


 44%|████▍     | 220/500 [01:50<02:20,  1.99it/s]

Error classifying event 220: name 'classify_event' is not defined


 44%|████▍     | 221/500 [01:50<02:19,  1.99it/s]

Error classifying event 221: name 'classify_event' is not defined


 44%|████▍     | 222/500 [01:51<02:19,  1.99it/s]

Error classifying event 222: name 'classify_event' is not defined


 45%|████▍     | 223/500 [01:51<02:18,  1.99it/s]

Error classifying event 223: name 'classify_event' is not defined


 45%|████▍     | 224/500 [01:52<02:18,  1.99it/s]

Error classifying event 224: name 'classify_event' is not defined


 45%|████▌     | 225/500 [01:52<02:17,  1.99it/s]

Error classifying event 225: name 'classify_event' is not defined


 45%|████▌     | 226/500 [01:53<02:17,  1.99it/s]

Error classifying event 226: name 'classify_event' is not defined


 45%|████▌     | 227/500 [01:53<02:17,  1.99it/s]

Error classifying event 227: name 'classify_event' is not defined


 46%|████▌     | 228/500 [01:54<02:16,  1.99it/s]

Error classifying event 228: name 'classify_event' is not defined


 46%|████▌     | 229/500 [01:54<02:15,  1.99it/s]

Error classifying event 229: name 'classify_event' is not defined


 46%|████▌     | 230/500 [01:55<02:15,  1.99it/s]

Error classifying event 230: name 'classify_event' is not defined


 46%|████▌     | 231/500 [01:55<02:14,  1.99it/s]

Error classifying event 231: name 'classify_event' is not defined


 46%|████▋     | 232/500 [01:56<02:14,  1.99it/s]

Error classifying event 232: name 'classify_event' is not defined


 47%|████▋     | 233/500 [01:56<02:13,  1.99it/s]

Error classifying event 233: name 'classify_event' is not defined


 47%|████▋     | 234/500 [01:57<02:13,  1.99it/s]

Error classifying event 234: name 'classify_event' is not defined


 47%|████▋     | 235/500 [01:57<02:12,  1.99it/s]

Error classifying event 235: name 'classify_event' is not defined


 47%|████▋     | 236/500 [01:58<02:12,  1.99it/s]

Error classifying event 236: name 'classify_event' is not defined


 47%|████▋     | 237/500 [01:58<02:12,  1.99it/s]

Error classifying event 237: name 'classify_event' is not defined


 48%|████▊     | 238/500 [01:59<02:11,  1.99it/s]

Error classifying event 238: name 'classify_event' is not defined


 48%|████▊     | 239/500 [01:59<02:11,  1.99it/s]

Error classifying event 239: name 'classify_event' is not defined


 48%|████▊     | 240/500 [02:00<02:10,  1.99it/s]

Error classifying event 240: name 'classify_event' is not defined


 48%|████▊     | 241/500 [02:00<02:10,  1.99it/s]

Error classifying event 241: name 'classify_event' is not defined


 48%|████▊     | 242/500 [02:01<02:09,  1.99it/s]

Error classifying event 242: name 'classify_event' is not defined


 49%|████▊     | 243/500 [02:01<02:09,  1.99it/s]

Error classifying event 243: name 'classify_event' is not defined


 49%|████▉     | 244/500 [02:02<02:08,  1.99it/s]

Error classifying event 244: name 'classify_event' is not defined


 49%|████▉     | 245/500 [02:02<02:07,  1.99it/s]

Error classifying event 245: name 'classify_event' is not defined


In [None]:
# Code snippet for generating the key scientific plot

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def create_discrimination_plot(df):
    """
    Generates the S2/S1 vs. S1 scatter plot.

    Args:
        df (pd.DataFrame): The classified dataset.

    Returns:
        matplotlib.figure.Figure: The figure object for the plot.
    """
    plt.style.use('default')
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Use seaborn for easier plotting with hues and styles
    sns.scatterplot(
        data=df,
        x='s1_photons',
        y='log10_s2_s1',
        hue='classification',  # Color by Claude's prediction
        style='true_label',    # Use different markers for ground truth
        ax=ax,
        alpha=0.8,
        s=50 # marker size
    )
    
    # Set plot scales and labels for clarity
    ax.set_xscale('log')
    ax.set_title('AI Classification of Simulated Detector Events', fontsize=16)
    ax.set_xlabel('S1 Signal (photons) [log scale]', fontsize=12)
    ax.set_ylabel('log10(S2 / S1)', fontsize=12)
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax.legend(title='Legend')
    
    return fig

In [None]:
# dashboards/app.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# --- Plotting Function (can be in a separate utils file) ---
def create_discrimination_plot(df):
    """Generates the S2/S1 vs. S1 scatter plot."""
    plt.style.use('default')
    fig, ax = plt.subplots(figsize=(10, 7))
    
    sns.scatterplot(
        data=df,
        x='s1_photons',
        y='log10_s2_s1',
        hue='classification',
        style='true_label',
        ax=ax,
        alpha=0.8,
        s=60
    )
    
    ax.set_xscale('log')
    ax.set_title('AI Classification of Simulated Detector Events', fontsize=16, pad=20)
    ax.set_xlabel('S1 Signal (photons) [log scale]', fontsize=12)
    ax.set_ylabel('log10(S2 / S1)', fontsize=12)
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax.legend(title='Legend', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    
    return fig

# --- Streamlit App Main Logic ---
st.set_page_config(layout="wide")

st.title("Dark Matter Scribe")
st.markdown("An AI system for classifying and reasoning about dark matter detector events.")

@st.cache_data
def load_data(filepath):
    """Cached function to load the dataset."""
    if os.path.exists(filepath):
        return pd.read_csv(filepath)
    return None

# Load the classified data
DATA_FILE = 'data/classified_dataset.csv'
df = load_data(DATA_FILE)

if df is not None:
    # --- Main Layout ---
    col1, col2 = st.columns()

    with col1:
        st.header("Event Classification Map")
        st.pyplot(create_discrimination_plot(df))

    with col2:
        st.header("Event Inspector")
        
        # Dropdown to select an event
        event_ids = df['event_id'].tolist()
        selected_id = st.selectbox("Select an Event ID to inspect:", event_ids)
        
        if selected_id is not None:
            # Get the data for the selected event
            event_details = df[df['event_id'] == selected_id].iloc
            
            st.subheader(f"Details for Event ID: {selected_id}")
            
            # Display key features
            st.metric("True Label", event_details['true_label'])
            st.metric("AI Classification", event_details['classification'])
            st.metric("AI Confidence", f"{event_details['confidence']:.2f}")

            st.markdown("---")
            
            # Display the AI's reasoning
            st.subheader("AI Physicist's Reasoning")
            st.info(event_details['reasoning'])

else:
    st.error(f"Could not find the dataset at '{DATA_FILE}'. Please run the simulation and classification scripts first.")