In [None]:
import os
import anthropic
import json

# Initialize the client (reads ANTHROPIC_API_KEY from environment)
try:
    client = anthropic.Anthropic()

    prompt = """
    Generate a single synthetic WIMP event as a JSON object.
    The event should have keys: 's1_photons', 's2_charge', and 'recoil_energy_keV'.
    Ensure the values are physically plausible for a low-energy nuclear recoil in a liquid xenon detector.
    For example, s1 should be small (typically 5-50 photons for low-energy nuclear recoils), 
    and s2 should be relatively low for that s1 (roughly 100-2000 electrons).
    Recoil energy should typically be in the range of 1-50 keV for WIMP searches.
    
    CRITICAL: Respond with ONLY a valid JSON object. No markdown, no code blocks, no extra text.
    Format: {"s1_photons": <number>, "s2_charge": <number>, "recoil_energy_keV": <number>}
    """

    message = client.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=256,
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    # Extract response text
    response_text = message.content[0].text.strip()
    print("Raw Response from Claude:")
    print(response_text)

    # Clean up potential markdown formatting (just in case)
    if response_text.startswith("```"):
        # Remove markdown code blocks if present
        response_text = response_text.replace("```json", "").replace("```", "").strip()

    # Attempt to parse the JSON
    try:
        event_data = json.loads(response_text)
        print("\nSuccessfully parsed JSON:")
        print(json.dumps(event_data, indent=2))
        
        # Validate the expected keys are present
        required_keys = ['s1_photons', 's2_charge', 'recoil_energy_keV']
        if all(key in event_data for key in required_keys):
            print("\n‚úÖ API connection and JSON output confirmed.")
            print(f"   S1: {event_data['s1_photons']} photons")
            print(f"   S2: {event_data['s2_charge']} electrons")
            print(f"   Energy: {event_data['recoil_energy_keV']} keV")
        else:
            print(f"\n‚ö†Ô∏è Warning: Missing expected keys. Got: {list(event_data.keys())}")
            
    except json.JSONDecodeError as je:
        print(f"\n‚ö†Ô∏è JSON Parse Error: {je}")
        print("Response may contain extra text or formatting.")

except anthropic.APIError as api_err:
    print(f"‚ùå API Error: {api_err}")
except Exception as e:
    print(f"‚ùå An error occurred: {e}")
    print("Please check your API key and environment setup.")

In [None]:
# src/simulate.py

import pandas as pd
import numpy as np
import os

def generate_correlated_events(n_events, mean_s1, mean_s2, s1_std, s2_std, correlation):
    """
    Generates correlated S1 and S2 events using a multivariate normal distribution.
    """
    mean = [mean_s1, mean_s2]
    cov_s1_s2 = correlation * s1_std * s2_std
    cov_matrix = [[s1_std**2, cov_s1_s2], [cov_s1_s2, s2_std**2]]
    
    events = np.random.multivariate_normal(mean, cov_matrix, n_events)
    events[events < 0] = 0  # Enforce physical constraint (no negative signals)
    return events

def generate_dataset(num_events=500, signal_fraction=0.2):
    """
    Generates a full dataset of mixed Nuclear Recoil (NR) and Electronic Recoil (ER) events.
    """
    n_signal = int(num_events * signal_fraction)
    n_background = num_events - n_signal

    events_list = []

    # --- Generate Nuclear Recoil (NR) events ---
    nr_energies = np.random.exponential(scale=5.0, size=n_signal) + 2.0

    for energy in nr_energies:
        mean_s1 = energy * 2.0
        mean_s2 = energy * 50.0
        s1_std = np.sqrt(mean_s1) * 0.5
        s2_std = np.sqrt(mean_s2) * 0.5
        correlation = -0.6

        s1, s2 = generate_correlated_events(1, mean_s1, mean_s2, s1_std, s2_std, correlation)[0]
        
        events_list.append({
            'true_label': 'Nuclear Recoil',
            'recoil_energy_keV': energy,
            's1_photons': s1,
            's2_charge': s2
        })

    # --- Generate Electronic Recoil (ER) events ---
    er_energies = np.random.uniform(low=2.0, high=50.0, size=n_background)

    for energy in er_energies:
        mean_s1 = energy * 1.5
        mean_s2 = energy * 200.0
        s1_std = np.sqrt(mean_s1) * 0.7
        s2_std = np.sqrt(mean_s2) * 0.7
        correlation = -0.4

        s1, s2 = generate_correlated_events(1, mean_s1, mean_s2, s1_std, s2_std, correlation)[0]
        
        events_list.append({
            'true_label': 'Electronic Recoil',
            'recoil_energy_keV': energy,
            's1_photons': s1,
            's2_charge': s2
        })

    # --- Finalize DataFrame ---
    df = pd.DataFrame(events_list)
    df = df.sample(frac=1).reset_index(drop=True)
    df['event_id'] = df.index

    epsilon = 1e-6
    df['log10_s2_s1'] = np.log10((df['s2_charge'] + epsilon) / (df['s1_photons'] + epsilon))
    
    return df

if __name__ == '__main__':
    print("Generating simulated dark matter detector dataset...")

    output_dir = 'data'
    os.makedirs(output_dir, exist_ok=True)

    dataset = generate_dataset(num_events=500, signal_fraction=0.2)
    output_path = os.path.join(output_dir, 'dataset.csv')
    dataset.to_csv(output_path, index=False)

    print(f"Dataset with {len(dataset)} events saved to {output_path}")
    print("\nDataset Head:")
    print(dataset.head())


In [None]:
import os
import pandas as pd
import anthropic
import time
from tqdm import tqdm


# ‚úÖ Dummy classify_event (replace this later with your real Claude logic)
def classify_event(row, client):
    """
    Dummy event classification using Anthropic API.
    Replace this with your actual prompt logic.
    """
    try:
        # Example call (commented out until you have Claude access)
        # response = client.messages.create(
        #     model="claude-3-sonnet-20240229",
        #     max_tokens=50,
        #     messages=[{"role": "user", "content": f"Classify this event: {row}"}]
        # )
        # classification = response.content[0].text.strip()

        classification = "Sample_Category"  # ‚Üê Placeholder for now
        return {'event_id': row['event_id'], 'classification': classification}

    except Exception as e:
        print(f"API error for event {row.get('event_id', 'unknown')}: {e}")
        return None


def main():
    """
    Main function to load data, classify all events, and save the results.
    """
    # ‚úÖ Step 1: Build a safe absolute path for the dataset
    input_path = r"C:\Users\wrich\Downloads\CluadeMain\CODEFATHER_ClaudeSolvathon-1\data\dataset.csv"

    if not os.path.exists(input_path):
        print(f"‚ùå Error: Input file not found at {input_path}")
        print("Please run src/simulate.py first to generate the dataset.")
        return

    # ‚úÖ Step 2: Load dataset
    try:
        df = pd.read_csv(input_path)
        print(f"‚úÖ Loaded dataset successfully: {len(df)} rows")
    except Exception as e:
        print(f"‚ùå Error reading dataset: {e}")
        return

    # ‚úÖ Step 3: Initialize Anthropic client
    try:
        client = anthropic.Anthropic()
    except Exception as e:
        print(f"‚ùå Error initializing Anthropic client: {e}")
        print("Please ensure ANTHROPIC_API_KEY is set correctly.")
        return

    # ‚úÖ Step 4: Classify events
    results = []
    print("üöÄ Starting event classification...")
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            result = classify_event(row, client)
            if result:
                results.append(result)
        except Exception as e:
            print(f"‚ö†Ô∏è Error classifying event {index}: {e}")
        time.sleep(0.5)  # avoid API rate limits

    if not results:
        print("‚ùå No events were successfully classified. Exiting.")
        return

    # ‚úÖ Step 5: Merge and save
    results_df = pd.DataFrame(results)
    df['event_id'] = df['event_id'].astype(int)
    results_df['event_id'] = results_df['event_id'].astype(int)

    output_path = os.path.join(os.path.dirname(input_path), "classified_dataset.csv")
    classified_df = pd.merge(df, results_df, on='event_id', how='left')
    classified_df.to_csv(output_path, index=False)

    print(f"\n‚úÖ Classification complete. Saved to {output_path}")
    print("\nüìä Preview of classified data:")
    print(classified_df.head())


if __name__ == "__main__":
    main()


In [None]:
# File: visualize_classification.py

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import json

def load_classified_data(file_path):
    """
    Load classified dataset from CSV or JSON.
    
    Args:
        file_path (str or Path): Path to the dataset.
        
    Returns:
        pd.DataFrame: Loaded dataset with expected columns.
    """
    file_path = Path(file_path)
    if file_path.suffix == ".csv":
        df = pd.read_csv(file_path)
    elif file_path.suffix in [".json", ".jsonl"]:
        df = pd.read_json(file_path)
    else:
        raise ValueError("Unsupported file type. Please use CSV or JSON.")
    
    # Compute log10(S2/S1) if not already present
    if 'log10_s2_s1' not in df.columns:
        df['log10_s2_s1'] = np.log10(df['s2_charge'] / df['s1_photons'])
    
    return df

def create_discrimination_plot(df, save_path=None):
    """
    Generates the S2/S1 vs. S1 scatter plot ("money plot").
    
    Args:
        df (pd.DataFrame): The classified dataset with columns:
            - s1_photons
            - s2_charge
            - log10_s2_s1
            - classification (predicted label)
            - true_label (ground truth)
        save_path (str, optional): Path to save the figure.
        
    Returns:
        matplotlib.figure.Figure
    """
    # CORRECTED: Updated the style name for compatibility with modern Matplotlib
    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(10, 8))
    
    sns.scatterplot(
        data=df,
        x='s1_photons',
        y='log10_s2_s1',
        hue='classification',
        style='true_label',
        palette='Set2',
        alpha=0.7,
        s=60,
        ax=ax
    )
    
    ax.set_xscale('log')
    ax.set_title('AI Classification of Simulated Detector Events', fontsize=16)
    ax.set_xlabel('S1 Signal (photons) [log scale]', fontsize=12)
    ax.set_ylabel('log10(S2 / S1)', fontsize=12)
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax.legend(title='Classification / Ground Truth', fontsize=10)
    
    if save_path:
        fig.savefig(save_path, dpi=300, bbox_inches='tight')
    
    return fig

def synthesize_summary(df):
    """
    Generates a text summary of classification results.
    
    Args:
        df (pd.DataFrame)
    
    Returns:
        str: Human-readable summary.
    """
    total_events = len(df)
    correct = (df['classification'] == df['true_label']).sum()
    accuracy = correct / total_events * 100
    
    nr_count = (df['true_label'] == 'NR').sum()
    er_count = (df['true_label'] == 'ER').sum()
    
    summary = (
        f"Dataset contains {total_events} events:\n"
        f"  - NR events: {nr_count}\n"
        f"  - ER events: {er_count}\n"
        f"AI classifier correctly labeled {correct} events ({accuracy:.2f}% accuracy).\n"
        f"The 'money plot' shows the separation between NR and ER events in log10(S2/S1) vs S1 space.\n"
        f"Well-separated clusters indicate good discrimination performance."
    )
    
    return summary

def main():
    # === 1. Load data ===
    # Make sure to use the correct path for your system
    data_file = "C:/Users/wrich/Downloads/CluadeMain/CODEFATHER_ClaudeSolvathon-1/data/classified_dataset.csv"
    df = load_classified_data(data_file)
    
    # === 2. Generate plot ===
    fig = create_discrimination_plot(df, save_path="money_plot.png")
    plt.show()
    
    # === 3. Generate summary ===
    summary_text = synthesize_summary(df)
    print("\n=== Classification Summary ===\n")
    print(summary_text)

if __name__ == "__main__":
    main()

In [None]:
# dashboards/app.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# --- Plotting Function (can be in a separate utils file) ---
def create_discrimination_plot(df):
    """Generates the S2/S1 vs. S1 scatter plot."""
    plt.style.use('default')
    fig, ax = plt.subplots(figsize=(10, 7))
    
    sns.scatterplot(
        data=df,
        x='s1_photons',
        y='log10_s2_s1',
        hue='classification',
        style='true_label',
        ax=ax,
        alpha=0.8,
        s=60
    )
    
    ax.set_xscale('log')
    ax.set_title('AI Classification of Simulated Detector Events', fontsize=16, pad=20)
    ax.set_xlabel('S1 Signal (photons) [log scale]', fontsize=12)
    ax.set_ylabel('log10(S2 / S1)', fontsize=12)
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax.legend(title='Legend', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    
    return fig

# --- Streamlit App Main Logic ---
st.set_page_config(layout="wide")

st.title("Dark Matter Scribe")
st.markdown("An AI system for classifying and reasoning about dark matter detector events.")

@st.cache_data
def load_data(filepath):
    """Cached function to load the dataset."""
    if os.path.exists(filepath):
        return pd.read_csv(filepath)
    return None

# Load the classified data
DATA_FILE = 'C:\Users\wrich\Downloads\CluadeMain\CODEFATHER_ClaudeSolvathon-1\data\classified_dataset.csv'
df = load_data(DATA_FILE)

if df is not None:
    # --- Main Layout ---
    col1, col2 = st.columns()

    with col1:
        st.header("Event Classification Map")
        st.pyplot(create_discrimination_plot(df))

    with col2:
        st.header("Event Inspector")
        
        # Dropdown to select an event
        event_ids = df['event_id'].tolist()
        selected_id = st.selectbox("Select an Event ID to inspect:", event_ids)
        
        if selected_id is not None:
            # Get the data for the selected event
            event_details = df[df['event_id'] == selected_id].iloc
            
            st.subheader(f"Details for Event ID: {selected_id}")
            
            # Display key features
            st.metric("True Label", event_details['true_label'])
            st.metric("AI Classification", event_details['classification'])
            st.metric("AI Confidence", f"{event_details['confidence']:.2f}")

            st.markdown("---")
            
            # Display the AI's reasoning
            st.subheader("AI Physicist's Reasoning")
            st.info(event_details['reasoning'])

else:
    st.error(f"Could not find the dataset at '{DATA_FILE}'. Please run the simulation and classification scripts first.")

2025-10-11 01:17:00.853 No runtime found, using MemoryCacheStorageManager


TypeError: LayoutsMixin.columns() missing 1 required positional argument: 'spec'