# Clinical Trials Prediction Market Generator

This notebook fetches behavioral intervention studies from ClinicalTrials.gov and generates prediction market questions.

In [1]:
# Import required libraries
import urllib.request
import urllib.parse
import json
import csv
from typing import Dict, List, Optional, Generator
from collections import Counter

## API Functions

In [2]:
def fetch_studies(page_size: int = 100, max_pages: int = 5, sleep_sec: float = 0.1) -> Generator[Dict, None, None]:
    """
    Fetch behavioral intervention studies from ClinicalTrials.gov API
    """
    BASE = "https://clinicaltrials.gov/api/v2/studies"
    
    fields = [
        "protocolSection.identificationModule.nctId",
        "protocolSection.identificationModule.briefTitle",
        "protocolSection.outcomesModule.primaryOutcomes",
        "protocolSection.conditionsModule.conditions",
        "protocolSection.armsInterventionsModule.interventions",
        "protocolSection.statusModule.overallStatus",
        "protocolSection.statusModule.whyStopped",
        "hasResults",
        "resultsSection.outcomeMeasuresModule.outcomeMeasures"
    ]
    
    query = "AREA[InterventionType]BEHAVIORAL"
    next_page_token = None
    
    for page in range(max_pages):
        params = {
            "pageSize": page_size,
            "query.term": query,
            "fields": ",".join(fields)
        }
        
        if next_page_token:
            params["pageToken"] = next_page_token
        
        full_url = f"{BASE}?{urllib.parse.urlencode(params)}"
        
        req = urllib.request.Request(full_url)
        req.add_header('User-Agent', 'Python-urllib/3.11')
        req.add_header('Accept', 'application/json')
        
        try:
            with urllib.request.urlopen(req, timeout=30) as response:
                if response.status != 200:
                    break
                
                data = json.loads(response.read().decode())
                studies = data.get('studies', [])
                
                if not studies:
                    break
                
                for study in studies:
                    yield study
                
                next_page_token = data.get('nextPageToken')
                if not next_page_token or len(studies) < page_size:
                    break
                    
        except Exception as e:
            print(f"Error fetching page {page}: {e}")
            break
        
        if sleep_sec > 0:
            import time
            time.sleep(sleep_sec)

print("API functions loaded ✅")

API functions loaded ✅


## Success Assessment

In [3]:
def assess_study_success(study_data: Dict) -> Dict:
    """
    Comprehensive assessment of intervention success based on actual results data.
    Combines statistical analysis with fallback to study status assessment.
    """
    protocol_section = study_data.get('protocolSection', {})
    status_module = protocol_section.get('statusModule', {})
    
    overall_status = status_module.get('overallStatus', '')
    why_stopped = status_module.get('whyStopped', '')
    has_results = study_data.get('hasResults', False)
    
    # First, try to analyze actual intervention results if available
    if has_results and "resultsSection" in study_data:
        results_analysis = analyze_intervention_results(study_data)
        if results_analysis['success'] is not None:
            return {
                'status': overall_status,
                'success': results_analysis['success'],
                'confidence': results_analysis['confidence'],
                'has_results': has_results,
                'termination_reason': why_stopped if why_stopped else None,
                'assessment_method': 'statistical_analysis',
                'details': results_analysis.get('summary', '')
            }
    
    # Fallback to status-based assessment
    assessment = {
        'status': overall_status,
        'success': 'INCONCLUSIVE',
        'confidence': 'low',
        'has_results': has_results,
        'termination_reason': why_stopped if why_stopped else None,
        'assessment_method': 'status_only',
        'details': ''
    }
    
    if overall_status == 'COMPLETED':
        if has_results:
            assessment['success'] = 'COMPLETED_WITH_RESULTS'
            assessment['confidence'] = 'medium'
            assessment['details'] = 'Study completed and results posted, but detailed analysis unavailable'
        else:
            assessment['success'] = 'COMPLETED_NO_RESULTS'
            assessment['details'] = 'Study completed but no results posted yet'
    
    elif overall_status == 'TERMINATED' and why_stopped:
        why_stopped_lower = why_stopped.lower()
        
        success_terms = ['efficacy demonstrated', 'objectives achieved', 'met endpoint']
        failure_terms = ['futility', 'lack of efficacy', 'safety concerns', 'ineffective']
        
        if any(term in why_stopped_lower for term in success_terms):
            assessment['success'] = 'SUCCESS'
            assessment['confidence'] = 'high'
            assessment['details'] = f'Terminated early for success: {why_stopped}'
        elif any(term in why_stopped_lower for term in failure_terms):
            assessment['success'] = 'FAILURE' 
            assessment['confidence'] = 'high'
            assessment['details'] = f'Terminated early for failure: {why_stopped}'
        else:
            assessment['success'] = 'TERMINATED_UNCLEAR'
            assessment['confidence'] = 'medium'
            assessment['details'] = f'Terminated: {why_stopped}'
    
    elif overall_status in ['ACTIVE_NOT_RECRUITING', 'RECRUITING', 'NOT_YET_RECRUITING']:
        assessment['success'] = 'ONGOING'
        assessment['confidence'] = 'high'
        assessment['details'] = 'Study still in progress'
    
    elif overall_status == 'WITHDRAWN':
        assessment['success'] = 'WITHDRAWN'
        assessment['confidence'] = 'high'
        assessment['details'] = 'Study withdrawn before completion'
    
    return assessment


def analyze_intervention_results(study_data: Dict) -> Dict:
    """
    Analyze actual intervention results from ClinicalTrials.gov results data.
    Returns statistical assessment of intervention success.
    """
    if not study_data.get("hasResults") or "resultsSection" not in study_data:
        return {"success": None, "confidence": "unknown", "summary": "No results available"}
    
    results = []
    outcome_measures = study_data["resultsSection"].get("outcomeMeasuresModule", {}).get("outcomeMeasures", [])
    
    # Focus on primary outcomes first
    primary_outcomes = [om for om in outcome_measures if om.get("type") == "PRIMARY"]
    if not primary_outcomes:
        primary_outcomes = outcome_measures[:3]  # Take first few if no primary outcomes
    
    for outcome in primary_outcomes:
        title = outcome.get("title", "Unknown outcome")
        unit = outcome.get("unitOfMeasure", "")
        
        # Method 1: Check statistical analyses (most reliable)
        analyses = outcome.get("analyses", [])
        has_statistical_analysis = False
        
        for analysis in analyses:
            has_statistical_analysis = True
            p_value = analysis.get("pValue")
            param_value = analysis.get("paramValue")
            ci_lower = analysis.get("ciLowerLimit")
            ci_upper = analysis.get("ciUpperLimit")
            
            is_significant = False
            confidence = "low"
            
            if p_value:
                try:
                    # Handle p-values like ".001" or "0.001"
                    p_val = float(p_value.replace('.', '0.') if p_value.startswith('.') else p_value)
                    is_significant = p_val < 0.05
                    confidence = "high" if p_val < 0.001 else "medium" if p_val < 0.01 else "low"
                except (ValueError, TypeError):
                    pass
            
            # Check if confidence interval excludes null effect
            ci_excludes_null = False
            if ci_lower and ci_upper:
                try:
                    lower = float(ci_lower)
                    upper = float(ci_upper)
                    # For most outcomes, null effect is 0 (no change)
                    if (lower > 0 and upper > 0) or (lower < 0 and upper < 0):
                        ci_excludes_null = True
                except (ValueError, TypeError):
                    pass
            
            results.append({
                "outcome": title,
                "method": "statistical_analysis",
                "p_value": p_value,
                "effect_size": param_value,
                "is_significant": is_significant,
                "ci_excludes_null": ci_excludes_null,
                "confidence": confidence,
                "unit": unit
            })
        
        # Method 2: If no statistical analysis, look at raw measurements  
        if not has_statistical_analysis and outcome.get("classes"):
            for result_class in outcome["classes"]:
                if result_class.get("categories"):
                    for category in result_class["categories"]:
                        measurements = category.get("measurements", [])
                        
                        if len(measurements) >= 2:  # Need at least 2 groups to compare
                            values = []
                            for m in measurements:
                                try:
                                    val = float(m.get("value", 0))
                                    values.append(val)
                                except (ValueError, TypeError):
                                    continue
                            
                            if len(values) >= 2:
                                # Basic assessment - is there a notable difference?
                                diff = abs(values[0] - values[1])
                                avg = sum(values) / len(values)
                                percent_diff = (diff / abs(avg)) * 100 if avg != 0 else 0
                                
                                # Heuristic: >20% difference might be meaningful
                                is_meaningful = percent_diff > 20
                                
                                results.append({
                                    "outcome": title,
                                    "method": "measurement_comparison",
                                    "values": values,
                                    "percent_difference": percent_diff,
                                    "is_significant": is_meaningful,
                                    "confidence": "low",  # Low confidence without proper stats
                                    "unit": unit
                                })
                            break  # Only analyze first category
                    break  # Only analyze first class
    
    # Overall assessment
    significant_results = [r for r in results if r.get("is_significant")]
    
    if not results:
        return {"success": None, "confidence": "unknown", "summary": "No analyzable outcome measures"}
    
    # Success if any primary outcome shows significant positive effect
    overall_success = len(significant_results) > 0
    
    if significant_results:
        confidence_scores = {"high": 3, "medium": 2, "low": 1}
        overall_confidence = max([r["confidence"] for r in significant_results], key=lambda x: confidence_scores.get(x, 0))
    else:
        overall_confidence = "low"
    
    return {
        "success": 'SUCCESS' if overall_success else 'FAILURE',
        "confidence": overall_confidence,
        "summary": f"{len(significant_results)}/{len(results)} primary outcomes significant",
        "details": results
    }

print("Success assessment function loaded ✅")

Success assessment function loaded ✅


## Study Analysis

In [4]:
def analyze_study(study: Dict) -> Dict:
    """
    Analyze a study and return structured data including intervention success assessment
    """
    protocol_section = study.get('protocolSection', {})
    identification = protocol_section.get('identificationModule', {})
    
    nct_id = identification.get('nctId', 'Unknown')
    title = identification.get('briefTitle', 'No title available')
    
    # Get conditions and interventions
    conditions = protocol_section.get('conditionsModule', {}).get('conditions', [])
    interventions = protocol_section.get('armsInterventionsModule', {}).get('interventions', [])
    primary_outcomes = protocol_section.get('outcomesModule', {}).get('primaryOutcomes', [])
    
    # Find behavioral interventions
    behavioral_interventions = [i for i in interventions if i.get('type') == 'BEHAVIORAL']
    
    # Comprehensive success assessment
    assessment = assess_study_success(study)
    
    # Generate prediction question
    prediction_question = None
    if primary_outcomes and behavioral_interventions:
        intervention_name = behavioral_interventions[0].get('name', 'the intervention')
        measure = primary_outcomes[0].get('measure', 'outcomes')
        timeframe = primary_outcomes[0].get('timeFrame', 'study completion')
        condition_text = ', '.join(conditions) if conditions else 'participants'
        prediction_question = f"Will {intervention_name} improve {measure} at {timeframe} in {condition_text} for trial {nct_id}?"
    
    # Determine intervention success category for easier analysis
    success_category = 'unknown'
    if assessment['success'] in ['SUCCESS']:
        success_category = 'success'
    elif assessment['success'] in ['FAILURE']:
        success_category = 'failure'
    elif assessment['success'] in ['COMPLETED_WITH_RESULTS', 'RESULTS_AVAILABLE']:
        success_category = 'completed_with_results'
    elif assessment['success'] in ['ONGOING', 'ACTIVE', 'RECRUITING']:
        success_category = 'ongoing'
    elif assessment['success'] in ['WITHDRAWN']:
        success_category = 'withdrawn'
    else:
        success_category = 'inconclusive'
    
    return {
        'nct_id': nct_id,
        'title': title,
        'conditions': conditions,
        'interventions': len(interventions),
        'behavioral_interventions': len(behavioral_interventions),
        'primary_outcomes': len(primary_outcomes),
        'assessment': assessment,
        'success_category': success_category,
        'prediction_question': prediction_question,
        'suitable_for_prediction': bool(prediction_question),
        'evidence_url': f"https://clinicaltrials.gov/study/{nct_id}"
    }

print("Analysis function loaded ✅")

Analysis function loaded ✅


## Fetch and Analyze Studies

In [5]:
# Fetch 500 studies (5 pages of 100 each)
print("Fetching 500 behavioral intervention studies...")
studies = list(fetch_studies(page_size=100, max_pages=5))
print(f"✅ Fetched {len(studies)} studies")

# Analyze all studies
print("\nAnalyzing studies...")
outcomes = [analyze_study(study) for study in studies]
print(f"✅ Analyzed {len(outcomes)} studies")

Fetching 500 behavioral intervention studies...
✅ Fetched 500 studies

Analyzing studies...
✅ Analyzed 500 studies
✅ Fetched 500 studies

Analyzing studies...
✅ Analyzed 500 studies


## Results Summary

In [6]:
# Summary statistics
print("📊 STUDY ANALYSIS SUMMARY")
print("=" * 50)

# Basic counts
suitable_for_prediction = sum(1 for o in outcomes if o['suitable_for_prediction'])
with_results = sum(1 for o in outcomes if o['assessment']['has_results'])
completed = sum(1 for o in outcomes if o['assessment']['status'] == 'COMPLETED')
statistical_analysis = sum(1 for o in outcomes if o['assessment'].get('assessment_method') == 'statistical_analysis')

print(f"Total studies: {len(outcomes)}")
print(f"Suitable for prediction markets: {suitable_for_prediction} ({suitable_for_prediction/len(outcomes)*100:.1f}%)")
print(f"Studies with results: {with_results} ({with_results/len(outcomes)*100:.1f}%)")
print(f"Studies with statistical analysis: {statistical_analysis} ({statistical_analysis/len(outcomes)*100:.1f}%)")
print(f"Completed studies: {completed} ({completed/len(outcomes)*100:.1f}%)")

# Success assessment distribution
print("\n🎯 SUCCESS ASSESSMENT DISTRIBUTION:")
success_counts = Counter(o['assessment']['success'] for o in outcomes)
for success_type, count in success_counts.most_common():
    print(f"  • {success_type}: {count} ({count/len(outcomes)*100:.1f}%)")

# Success categories (simplified view)
print("\n📈 INTERVENTION SUCCESS CATEGORIES:")
category_counts = Counter(o['success_category'] for o in outcomes)
for category, count in category_counts.most_common():
    print(f"  • {category}: {count} ({count/len(outcomes)*100:.1f}%)")

# Assessment method distribution
print("\n🔍 ASSESSMENT METHOD DISTRIBUTION:")
method_counts = Counter(o['assessment'].get('assessment_method', 'unknown') for o in outcomes)
for method, count in method_counts.most_common():
    print(f"  • {method}: {count} ({count/len(outcomes)*100:.1f}%)")

# Show examples of successful interventions (if any)
successful_studies = [o for o in outcomes if o['success_category'] == 'success']
if successful_studies:
    print(f"\n✅ EXAMPLES OF SUCCESSFUL INTERVENTIONS ({len(successful_studies)} total):")
    for i, study in enumerate(successful_studies[:3]):
        print(f"{i+1}. {study['nct_id']}: {study['title'][:80]}...")
        print(f"   Assessment: {study['assessment']['success']} (confidence: {study['assessment']['confidence']})")
        if study['assessment'].get('details'):
            print(f"   Details: {study['assessment']['details'][:100]}...")
        print()

📊 STUDY ANALYSIS SUMMARY
Total studies: 500
Suitable for prediction markets: 497 (99.4%)
Studies with results: 47 (9.4%)
Studies with statistical analysis: 42 (8.4%)
Completed studies: 290 (58.0%)

🎯 SUCCESS ASSESSMENT DISTRIBUTION:
  • COMPLETED_NO_RESULTS: 247 (49.4%)
  • ONGOING: 121 (24.2%)
  • INCONCLUSIVE: 66 (13.2%)
  • SUCCESS: 23 (4.6%)
  • FAILURE: 20 (4.0%)
  • TERMINATED_UNCLEAR: 11 (2.2%)
  • WITHDRAWN: 7 (1.4%)
  • COMPLETED_WITH_RESULTS: 5 (1.0%)

📈 INTERVENTION SUCCESS CATEGORIES:
  • inconclusive: 324 (64.8%)
  • ongoing: 121 (24.2%)
  • success: 23 (4.6%)
  • failure: 20 (4.0%)
  • withdrawn: 7 (1.4%)
  • completed_with_results: 5 (1.0%)

🔍 ASSESSMENT METHOD DISTRIBUTION:
  • status_only: 458 (91.6%)
  • statistical_analysis: 42 (8.4%)

✅ EXAMPLES OF SUCCESSFUL INTERVENTIONS (23 total):
1. NCT03099369: Daily Step-based Exercise Using Fitness Monitors for Peripheral Artery Disease...
   Assessment: SUCCESS (confidence: low)
   Details: 1/1 primary outcomes significant.

## Generate Prediction Market Questions

In [None]:
# Generate prediction market questions from suitable studies
# Create one question per primary outcome measure
questions = []
for outcome in outcomes:
    if outcome['suitable_for_prediction']:
        # Get the study data to access all primary outcomes
        study_nct = outcome['nct_id']
        study_data = next((s for s in studies if s.get('protocolSection', {}).get('identificationModule', {}).get('nctId') == study_nct), None)
        
        if study_data:
            protocol_section = study_data.get('protocolSection', {})
            primary_outcomes = protocol_section.get('outcomesModule', {}).get('primaryOutcomes', [])
            conditions = protocol_section.get('conditionsModule', {}).get('conditions', [])
            interventions = protocol_section.get('armsInterventionsModule', {}).get('interventions', [])
            behavioral_interventions = [i for i in interventions if i.get('type') == 'BEHAVIORAL']
            
            # Base data for all questions from this study
            base_data = {
                'nct_id': outcome['nct_id'],
                'title': outcome['title'],
                'conditions': ', '.join(outcome['conditions']),
                'study_status': outcome['assessment']['status'],
                'success_assessment': outcome['assessment']['success'],
                'success_category': outcome['success_category'],
                'confidence': outcome['assessment']['confidence'],
                'assessment_method': outcome['assessment'].get('assessment_method', 'unknown'),
                'has_results': outcome['assessment']['has_results'],
                'termination_reason': outcome['assessment'].get('termination_reason', ''),
                'assessment_details': outcome['assessment'].get('details', ''),
                'evidence_url': outcome['evidence_url'],
                'question_type': 'efficacy'
            }
            
            # Generate one question per primary outcome
            if primary_outcomes and behavioral_interventions:
                intervention_name = behavioral_interventions[0].get('name', 'the intervention')
                condition_text = ', '.join(conditions) if conditions else 'participants'
                
                for i, primary_outcome in enumerate(primary_outcomes):
                    measure = primary_outcome.get('measure', 'outcomes')
                    timeframe = primary_outcome.get('timeFrame', 'study completion')
                    
                    # Create the question for this specific outcome
                    question_text = f"Will {intervention_name} improve {measure} at {timeframe} in {condition_text} for trial {outcome['nct_id']}?"
                    
                    # Create a copy of base data for this question
                    question_data = base_data.copy()
                    question_data.update({
                        'question': question_text,
                        'primary_outcome_measure': measure,
                        'primary_outcome_timeframe': timeframe,
                        'outcome_number': i + 1,
                        'total_primary_outcomes': len(primary_outcomes)
                    })
                    
                    questions.append(question_data)
            
        # Fallback: if we can't access study data, use the single question we already generated
        elif outcome['prediction_question']:
            fallback_data = {
                'nct_id': outcome['nct_id'],
                'title': outcome['title'],
                'question': outcome['prediction_question'],
                'conditions': ', '.join(outcome['conditions']),
                'study_status': outcome['assessment']['status'],
                'success_assessment': outcome['assessment']['success'],
                'success_category': outcome['success_category'],
                'confidence': outcome['assessment']['confidence'],
                'assessment_method': outcome['assessment'].get('assessment_method', 'unknown'),
                'has_results': outcome['assessment']['has_results'],
                'termination_reason': outcome['assessment'].get('termination_reason', ''),
                'assessment_details': outcome['assessment'].get('details', ''),
                'evidence_url': outcome['evidence_url'],
                'question_type': 'efficacy',
                'primary_outcome_measure': 'Unknown',
                'primary_outcome_timeframe': 'Unknown',
                'outcome_number': 1,
                'total_primary_outcomes': 1
            }
            questions.append(fallback_data)

print(f"🎲 Generated {len(questions)} prediction market questions from {len([o for o in outcomes if o['suitable_for_prediction']])} suitable studies")
print(f"📊 Ratio: {len(questions)/len([o for o in outcomes if o['suitable_for_prediction']]):.1f} questions per study on average")

# Show breakdown by number of primary outcomes
outcome_counts = Counter(q['total_primary_outcomes'] for q in questions)
print(f"\n📋 PRIMARY OUTCOMES PER STUDY:")
for outcome_count, freq in sorted(outcome_counts.items()):
    studies_with_count = freq // outcome_count if outcome_count > 0 else freq
    print(f"  • {outcome_count} primary outcome(s): {studies_with_count} studies ({freq} questions)")

# Show sample questions grouped by study
print("\n📝 SAMPLE QUESTIONS BY STUDY:")
questions_by_study = {}
for q in questions:
    nct_id = q['nct_id']
    if nct_id not in questions_by_study:
        questions_by_study[nct_id] = []
    questions_by_study[nct_id].append(q)

# Show first few studies with multiple outcomes
multi_outcome_studies = [(nct, qs) for nct, qs in questions_by_study.items() if len(qs) > 1]
single_outcome_studies = [(nct, qs) for nct, qs in questions_by_study.items() if len(qs) == 1]

if multi_outcome_studies:
    print(f"\n🎯 STUDIES WITH MULTIPLE PRIMARY OUTCOMES ({len(multi_outcome_studies)} studies):")
    for i, (nct_id, study_questions) in enumerate(multi_outcome_studies[:2]):
        print(f"\n{i+1}. Study {nct_id} ({len(study_questions)} primary outcomes):")
        print(f"   Title: {study_questions[0]['title'][:60]}...")
        for j, q in enumerate(study_questions):
            print(f"   Outcome {j+1}: {q['primary_outcome_measure'][:50]}...")
            print(f"     Question: {q['question'][:80]}...")
            print()

if single_outcome_studies:
    print(f"📍 STUDIES WITH SINGLE PRIMARY OUTCOME ({len(single_outcome_studies)} studies):")
    for i, (nct_id, study_questions) in enumerate(single_outcome_studies[:2]):
        q = study_questions[0]
        print(f"{i+1}. {nct_id}: {q['question'][:80]}...")
        print(f"   Assessment: {q['success_assessment']} ({q['assessment_method']})")
        print()

🎲 Generated 796 prediction market questions from 497 suitable studies
📊 Ratio: 1.6 questions per study on average

📋 QUESTION TYPE BREAKDOWN:
  • efficacy: 497 questions (62.4%)
  • timeline: 256 questions (32.2%)
  • replication: 43 questions (5.4%)

📝 SAMPLE QUESTIONS BY TYPE AND CATEGORY:

EFFICACY QUESTIONS (497 total):
  [inconclusive] Will Fibit + Coaching improve Sleep Duration (Assess changes in sleep quality) at time points at visit 1 (week 1), visit 2 (week 6) and visit 3 (week 12) in Sleep for trial NCT04246424?
    Status: COMPLETED | Assessment: COMPLETED_NO_RESULTS | Method: status_only
    Details: Study completed but no results posted yet...

  [inconclusive] Will ACT Intervention Group improve Change is being assessed using the Quality of Life BREF (WHOQOLBREF; Skevington et al., 2004) questionnaire at Baseline and 6 weeks in Anxiety, Depression for trial NCT02449759?
    Status: COMPLETED | Assessment: COMPLETED_NO_RESULTS | Method: status_only
    Details: Study comp

## Export to CSV

In [None]:
# Export prediction market questions to CSV
if questions:
    filename = "prediction_market_questions.csv"
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = questions[0].keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        for question in questions:
            writer.writerow(question)
    
    print(f"✅ Exported {len(questions)} questions to {filename}")
else:
    print("❌ No questions to export")

print(f"\n🎯 FINAL SUMMARY:")
print(f"  • Fetched {len(studies)} behavioral intervention studies")
print(f"  • Found {len([o for o in outcomes if o['suitable_for_prediction']])} studies suitable for prediction markets")
print(f"  • Generated {len(questions)} total prediction market questions")
print(f"  • Average: {len(questions)/len([o for o in outcomes if o['suitable_for_prediction']]):.1f} questions per suitable study")

# Count studies by number of primary outcomes
studies_with_multiple = len([nct for nct, qs in questions_by_study.items() if len(qs) > 1])
studies_with_single = len([nct for nct, qs in questions_by_study.items() if len(qs) == 1])
print(f"  • Studies with multiple primary outcomes: {studies_with_multiple}")
print(f"  • Studies with single primary outcome: {studies_with_single}")

✅ Exported 796 questions to prediction_market_questions.csv

🎯 FINAL SUMMARY:
  • Fetched 500 behavioral intervention studies
  • Found 497 studies suitable for prediction markets
  • Generated 796 total prediction market questions
  • Average: 1.6 questions per suitable study
  • Question types: efficacy (497), timeline (256), replication (43)
