In [None]:
import requests
import json
import os
from typing import Dict, List


PASSAGES = {
  "Berakhot": {
        "ref": "Berakhot.2a-64a"
    },
  "Shabbat": {
        "ref": "Shabbat.2a-157b"
    },
    "Eruvin": {
        "ref": "Eruvin.2a-105a"
    },
    "Pesachim": {
        "ref": "Pesachim.2a-121b"
    },
    "Shekalim": {
        "ref": "Shekalim.2a-22b"
    },
    "Yoma": {
        "ref": "Yoma.2a-88a"
    },
    "Sukkah": {
        "ref": "Sukkah.2a-56b"
    },
    "Beitzah": {
        "ref": "Beitzah.2a-40b"
    },
    "Rosh Hashanah": {
        "ref": "Rosh Hashanah.2a-35a"
    },
    "Taanit": {
        "ref": "Taanit.2a-31a"
    },
    "Megillah": {
        "ref": "Megillah.2a-32a"
    },
    "Moed Katan": {
        "ref": "Moed Katan.2a-29a"
    },
    "Chagigah": {
        "ref": "Chagigah.2a-27a"
    },
    "Yevamot": {
        "ref": "Yevamot.2a-122b"
    },
    "Ketubot": {
        "ref": "Ketubot.2a-112b"
    },
    "Nedarim": {
        "ref": "Nedarim.2a-91b"
    },
    "Nazir": {
        "ref": "Nazir.2a-66b"
    },
    "Gittin": {
        "ref": "Gittin.2a-90b"
    },
    "Kiddushin": {
        "ref": "Kiddushin.2a-82b"
    },
    "Bava Kamma": {
        "ref": "Bava Kamma.2a-119b"
    },
    "Bava Metzia": {
        "ref": "Bava Metzia.2a-119a"
    },
    "Bava Batra": {
        "ref": "Bava Batra.2a-176b"
    },
    "Sanhedrin": {
        "ref": "Sanhedrin.2a-113b"
    },
    "Makkot": {
        "ref": "Makkot.2a-24b"
    },
    "Shevuot": {
        "ref": "Shevuot.2a-49b"
    },
    "Avodah Zarah": {
        "ref": "Avodah Zarah.2a-76b"
    },
    "Horayot": {
        "ref": "Horayot.2a-14a"
    },
    "Zevachim": {
        "ref": "Zevachim.2a-120b"
    },
    "Menachot": {
        "ref": "Menachot.2a-110a"
    },
    "Chullin": {
        "ref": "Chullin.2a-142a"
    },
    "Bekhorot": {
        "ref": "Bekhorot.2a-61a"
    },
    "Arakhin": {
        "ref": "Arakhin.2a-34a"
    },
    "Temurah": {
        "ref": "Temurah.2a-34a"
    },
    "Keritot": {
        "ref": "Keritot.2a-28b"
    },
    "Meilah": {
        "ref": "Meilah.2a-22a"
    },
    "Tamid": {
        "ref": "Tamid.2a-33b"
    },
    "Middot": {
        "ref": "Middot.2a-4b"
    },
    "Kinnim": {
        "ref": "Kinnim.2a-4a"
    },
    "Niddah": {
        "ref": "Niddah.2a-73a"
    },
    "Sotah": {
        "ref": "Berakhot.2a-49b"
    }
}


def fetch_sefaria_text(ref):
    """Fetch text from Sefaria API for a given reference"""
    base_url = "https://www.sefaria.org/api/texts/"
    # Replace spaces with underscores and encode URL
    formatted_ref = ref.replace(" ", "_")
    url = f"{base_url}{formatted_ref}?context=0"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"Error fetching {ref}: {e}")
        return None

def save_tractate_to_file(tractate_name, data):
    """Save a single tractate to its own file in the talmud_output folder"""
    # Create the output directory if it doesn't exist
    output_dir = "talmud_output"
    os.makedirs(output_dir, exist_ok=True)
    
    # Create a safe filename (replace spaces with underscores)
    safe_name = tractate_name.replace(" ", "_")
    filename = f"{output_dir}/{safe_name}.json"
    
    # Save the data to a JSON file
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    
    print(f"Saved {tractate_name} to {filename}")

def process_passages():
    """Process all passages and save each one to its own file"""
    successful_count = 0
    failed_count = 0
    
    for tractate_name, metadata in PASSAGES.items():
        print(f"Fetching text for {tractate_name}...")
        
        # Fetch text from Sefaria
        sefaria_data = fetch_sefaria_text(metadata['ref'])
        
        if sefaria_data:
            # Create enhanced data with both original metadata and text
            enhanced_data = {
                'ref': metadata.get('ref', ''),
                'hebrew': sefaria_data.get('he', ''),  # Hebrew text
                'english': sefaria_data.get('text', ''),  # English text
                'sefaria_url': f"https://www.sefaria.org/{metadata.get('ref', '').replace(' ', '_')}"
            }
            
            # Save this tractate to its own file
            save_tractate_to_file(tractate_name, enhanced_data)
            successful_count += 1
        else:
            print(f"Failed to fetch text for {tractate_name}")
            # Save the original metadata without text data
            save_tractate_to_file(tractate_name, {
                'ref': metadata.get('ref', ''),
                'hebrew': '',
                'english': '',
                'sefaria_url': f"https://www.sefaria.org/{metadata.get('ref', '').replace(' ', '_')}",
                'error': 'Failed to fetch data from Sefaria API'
            })
            failed_count += 1
    
    return successful_count, failed_count

def main():
    # Process all passages
    successful, failed = process_passages()
    
    # Print summary
    print("\nProcessing complete!")
    print(f"Successfully processed: {successful} tractates")
    print(f"Failed to process: {failed} tractates")
    print(f"All files saved to the 'talmud_output' directory")

if __name__ == "__main__":
    main()

Fetching text for Sotah...
Saved Sotah to talmud_output/Sotah.json

Processing complete!
Successfully processed: 1 tractates
Failed to process: 0 tractates
All files saved to the 'talmud_output' directory
