In [None]:
#run this cell to create the Sotah dataset from Sefaria

import requests
import json
from typing import Dict, List

PASSAGES_from_CLAUDE = {
    "warning": "Sotah.2a.1-15",
    "jerusalem": "Sotah.3a.1-3b.15",
    "exposure": "Sotah.3b.1-20",
    "hair": "Sotah.17a.1-10",
    "waters": "Sotah.15b.1-20",
    "curse": "Sotah.16a.1-15",
    "drinking": "Sotah.20a.1-15",
    "effects": "Sotah.20a.15-30",
    "outcome": "Sotah.20b.1-15",
    "family": "Sotah.21a.1-10",
    "merit": "Sotah.4b.1-15",
    "refusal": "Sotah.6a.1-15",
    "abolition": "Sotah.47a.1-10",
    "impact": "Sotah.9a.1-15"
}

PASSAGES = {
  "redemption_women": {
        "ref": "Sotah.11b",
        "themes": ["redemption", "faith", "merit"],
        "primary_subjects": ["women"],
        "sentiment": "positive"
    },

 "witnesses": {
        "ref": "Sotah.2a-2b",
        "themes": ["legal_procedure", "warning", "witnesses", "seclusion"],
        "primary_subjects": ["women"],
        "sentiment": "neutral"
    },
    "warning": {
        "ref": "Sotah.3a",
        "themes": ["anger", "seclusion", "impurity", "warning"],
        "primary_subjects": ["women"],
        "sentiment": "neutral"
    },
    "slavery": {
        "ref": "Sotah.3b",
        "themes": ["slavery", "child-slavery", "intercourse"],
        "primary_subjects": ["men & women"],
        "sentiment": "negative"
    },
    "seclusion": {
        "ref": "Sotah.3b-4a",
        "themes": ["anger", "seclusion"],
        "primary_subjects": ["women"],
        "sentiment": "negative"
    },
    "ritual": {
        "ref": "Sotah.7a-b",
        "themes": ["intercourse", "menstruating", "threats", "nudity", "breasts", "public humiliation"],
        "primary_subjects": ["women"],
        "sentiment": "negative"
    },
    "publicity": {
        "ref": "Sotah.8b",
        "themes": ["public humiliation", "degredation", "nudity"],
        "primary_subjects": ["women"],
        "sentiment": "negative"
    },
    "drinking": {
        "ref": "Sotah.9a-9b",
        "themes": ["punishment", "humiliation", "curse"],
        "primary_subjects": ["men & women"],
        "sentiment": "negative"
    },
    "castration": {
        "ref": "Sotah.13b",
        "themes": ["castration", "slavery"],
        "primary_subjects": ["men"],
        "sentiment": "negative"
    },
    "meal-offering": {
        "ref": "Sotah.14a-14b",
        "themes": ["embarassment", "insults"],
        "primary_subjects": ["men"],
        "sentiment": "negative"
    },
    "leper": {
        "ref": "Sotah.16a",
        "themes": ["shaving", "pubic-hair"],
        "primary_subjects": ["men"],
        "sentiment": "negative"
    },
    "harlot": {
        "ref": "Sotah.4b",
        "themes": ["intercourse", "prostitute", "poverty", "punishment", "adultery"],
        "primary_subjects": ["men & women"],
        "sentiment": "negative"
    },
    "forced": {
        "ref": "Sotah.19a-21a",
        "themes": ["force", "against her will", "fear"],
        "primary_subjects": ["women"],
        "sentiment": "negative"
    },
    "punishment": {
        "ref": "Sotah.17a",
        "themes": ["curse", "bowels", "death"],
        "primary_subjects": ["women"],
        "sentiment": "negative"
    },
    "remarriage": {
        "ref": "Sotah.18b",
        "themes": ["defiled", "seclusion", "intercourse", "adultery"],
        "primary_subjects": ["women"],
        "sentiment": "negative"
    },
    "learning": {
        "ref": "Sotah.21b-22a",
        "themes": ["foolish piety", "conniving"],
        "primary_subjects": ["men & women"],
        "sentiment": "negative"
    },
    "miriam_leadership": {
        "ref": "Sotah.12b-13a",
        "themes": ["prophecy", "leadership", "wisdom"],
        "primary_subjects": ["women"],
        "sentiment": "positive"
    },
    "punishment": {
        "ref": "Sotah.10a-b",
        "themes": ["adultery", "disgrace"],
        "primary_subjects": ["men"],
        "sentiment": "negative"
    },
        "Strength": {
        "ref": "Sotah.10b",
        "themes": ["admit", "save"],
        "primary_subjects": ["men"],
        "sentiment": "positive"
    },
    "song_at_sea": {
        "ref": "Sotah.30b-31a",
        "themes": ["praise", "unity", "miracle"],
        "primary_subjects": ["nation of israel"],
        "sentiment": "positive"
    },
    "deterioration": {
        "ref": "Sotah.49a",
        "themes": ["the wise have died"],
        "primary_subjects": ["men"],
        "sentiment": "negative"
    }
}

def fetch_sefaria_text(ref):
    """Fetch text from Sefaria API for a given reference"""
    base_url = "https://www.sefaria.org/api/texts/"
    # Replace spaces with underscores and encode URL
    formatted_ref = ref.replace(" ", "_")
    url = f"{base_url}{formatted_ref}?context=0"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"Error fetching {ref}: {e}")
        return None

def process_passages():
    """Process all passages and add Sefaria text"""
    enhanced_passages = {}
    
    for passage_name, metadata in PASSAGES.items():
        print(f"Fetching text for {passage_name}...")
        
        # Fetch text from Sefaria
        sefaria_data = fetch_sefaria_text(metadata['ref'])
        
        if sefaria_data:
            # Create new entry with both original metadata and text
            enhanced_passages[passage_name] = {
                **metadata,  # Original metadata
                'hebrew': sefaria_data.get('he', ''),  # Hebrew text
                'english': sefaria_data.get('text', ''),  # English text
                'sefaria_url': f"https://www.sefaria.org/{metadata['ref'].replace(' ', '_')}"
            }
        else:
            print(f"Failed to fetch text for {passage_name}")
            enhanced_passages[passage_name] = metadata  # Keep original metadata
    
    return enhanced_passages

def save_to_file(data, filename="sotah_passages_with_text.json"):
    """Save the enhanced passages to a JSON file"""
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def main():
    # Process all passages
    enhanced_data = process_passages()
    
    # Save to file
    save_to_file(enhanced_data)
    
    # Print summary
    print("\nProcessing complete!")
    print(f"Processed {len(enhanced_data)} passages")
    print("Data saved to sotah_passages_with_text.json")

if __name__ == "__main__":
    main()

Fetching text for redemption_women...
Fetching text for witnesses...
Fetching text for slavery...
Fetching text for seclusion...
Fetching text for ritual...
Fetching text for publicity...
Fetching text for drinking...
Fetching text for castration...
Fetching text for meal-offering...
Fetching text for leper...
Fetching text for harlot...
Fetching text for forced...
Fetching text for punishment...
Fetching text for remarriage...
Fetching text for learning...
Fetching text for miriam_leadership...
Fetching text for Strength...
Fetching text for song_at_sea...
Fetching text for deterioration...

Processing complete!
Processed 20 passages
Data saved to sotah_passages_with_text.json
