# Fix "Log probability evaluates to log(0)" Error

This error means the model encountered impossible parameter values. Here's how to fix it.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Step 1: Load and Inspect Your Data

In [None]:
# Load your data
data = pd.read_csv('your_data.csv')  # Replace with your file

print("Data shape:", data.shape)
print("\nColumns:", data.columns.tolist())
print("\nFirst few rows:")
data.head()

## Step 2: Check RT Distribution

**The most common cause**: RTs that are too short!

In [None]:
# Get RTs (excluding no-response trials)
rts = data[data['response'] > 0]['rt']

print("RT Statistics:")
print(f"  Minimum: {rts.min():.3f} seconds")
print(f"  Maximum: {rts.max():.3f} seconds")
print(f"  Mean: {rts.mean():.3f} seconds")
print(f"  Median: {rts.median():.3f} seconds")

# CRITICAL CHECK
if rts.min() < 0.15:
    print("\n⚠️  PROBLEM FOUND!")
    print(f"   Minimum RT ({rts.min():.3f}s) is too short!")
    print("   The model needs RT > 0.15s to allow for non-decision time.")
    print("\n   Number of trials < 0.15s:", (rts < 0.15).sum())
    print(f"   Percentage: {100 * (rts < 0.15).sum() / len(rts):.1f}%")
else:
    print("\n✓ RT range looks OK")

# Plot RT distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(rts, bins=50, edgecolor='black', alpha=0.7)
plt.axvline(0.15, color='red', linestyle='--', label='Minimum safe RT (0.15s)')
plt.xlabel('RT (seconds)')
plt.ylabel('Frequency')
plt.title('RT Distribution')
plt.legend()

plt.subplot(1, 2, 2)
plt.hist(rts, bins=50, edgecolor='black', alpha=0.7, cumulative=True, density=True)
plt.axvline(0.15, color='red', linestyle='--', label='Minimum safe RT')
plt.xlabel('RT (seconds)')
plt.ylabel('Cumulative Proportion')
plt.title('Cumulative RT Distribution')
plt.legend()

plt.tight_layout()
plt.show()

## Step 3: Filter Short RTs

**THE FIX**: Remove RTs < 0.15s (or < 0.2s to be safe)

In [None]:
# Before filtering
print(f"Before filtering: {len(data)} trials")

# Filter out very short RTs
# Option 1: Conservative (recommended)
data_filtered = data[(data['rt'] >= 0.2) | (data['response'] == 0)].copy()

# Option 2: Less conservative
# data_filtered = data[(data['rt'] >= 0.15) | (data['response'] == 0)].copy()

# After filtering
print(f"After filtering: {len(data_filtered)} trials")
print(f"Removed: {len(data) - len(data_filtered)} trials ({100*(len(data)-len(data_filtered))/len(data):.1f}%)")

# Check new minimum
rts_filtered = data_filtered[data_filtered['response'] > 0]['rt']
print(f"\nNew minimum RT: {rts_filtered.min():.3f}s")
print(f"New mean RT: {rts_filtered.mean():.3f}s")

# Use this filtered data
data = data_filtered

## Step 4: Check Other Potential Issues

In [None]:
# Check for NaN values
print("Missing values:")
print(data[['subject', 'stimulus', 'response', 'rt']].isna().sum())

# Check response distribution
print("\nResponse distribution:")
print(data['response'].value_counts())

# Check if responses are coded correctly
unique_responses = data['response'].unique()
if not all(r in [0, 1, 2] for r in unique_responses):
    print("\n⚠️  WARNING: Invalid response codes!")
    print(f"   Found: {unique_responses}")
    print("   Expected: 0 (no response), 1 (left), 2 (right)")
else:
    print("\n✓ Response codes look correct")

# Check accuracy
data['correct'] = (
    ((data['stimulus'] == 0) & (data['response'] == 1)) |
    ((data['stimulus'] == 1) & (data['response'] == 2))
).astype(int)

accuracy = data[data['response'] > 0]['correct'].mean()
print(f"\nOverall accuracy: {accuracy:.1%}")
if accuracy < 0.5:
    print("⚠️  WARNING: Accuracy < 50% - check stimulus/response coding!")
elif accuracy > 0.99:
    print("⚠️  WARNING: Accuracy > 99% - may cause convergence issues")

## Step 5: Try Fitting the Model

In [None]:
from pydmc import WaldStopSignalModel

# Start with simpler model
print("Testing with individual-level model...\n")

model = WaldStopSignalModel(use_hierarchical=False)

try:
    fit = model.fit(
        data,
        chains=1,
        iter=200,
        warmup=100,
        show_console=True  # See what's happening
    )
    print("\n✓ SUCCESS! Model fitted.")
    model.summary()
    
except Exception as e:
    print(f"\n✗ Error: {e}")
    print("\nIf you still get log(0) error, try:")
    print("1. Increase minimum RT threshold to 0.25s")
    print("2. Check for outliers (very long RTs)")
    print("3. Ensure you have both correct and incorrect responses")

## Step 6: Move to Hierarchical Model

In [None]:
# If individual model worked, try hierarchical
print("Testing hierarchical model...\n")

model = WaldStopSignalModel(
    use_hierarchical=True,
    centered_parameterization=True  # More stable
)

fit = model.fit(
    data,
    chains=2,
    iter=500,
    warmup=250,
    show_console=False  # Set to True if you want to see output
)

print("\n✓ Hierarchical model fitted!")
model.summary()

## Summary: Quick Fix

```python
# The solution in 3 lines:
import pandas as pd
data = pd.read_csv('your_data.csv')
data = data[(data['rt'] >= 0.2) | (data['response'] == 0)].copy()  # Filter short RTs

# Then fit normally
from pydmc import WaldStopSignalModel
model = WaldStopSignalModel(use_hierarchical=True, centered_parameterization=True)
fit = model.fit(data, chains=4, iter=2000, warmup=1000)
```