In [3]:
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
import torch

In [22]:
def scrape_purport(verse_ref):
    url = f"https://vedabase.io/en/library/sb/{verse_ref.replace('.', '/')}/"
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9'
    }

    try:
        # 1. First try with standard request
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 2. Try multiple selectors (Vedabase sometimes changes structure)
        selectors = [
            'div.r-purport',        
            'div.purport-wrapper',  
            'div.verse-text',       
            'div#purport',          
            'article'               
        ]
        
        for selector in selectors:
            purport = soup.select_one(selector)
            if purport:
                return purport.get_text(' ', strip=True)
        
        # 3. If all selectors fail, try raw text extraction
        main_content = soup.find('main') or soup.find('div', class_='content')
        if main_content:
            return main_content.get_text(' ', strip=True)[:20000]  
        
        return None
        
    except requests.HTTPError as e:
        print(f"HTTP Error for SB {verse_ref}: {e}")
        return None
    except Exception as e:
        print(f"General error for SB {verse_ref}: {e}")
        return None

verse_ref = "2.2.2"
purport = scrape_purport(verse_ref)

if purport:
    print(f"Purport found for SB {verse_ref}:")
    print(purport[:200] + "...")  
else:
    print(f"No purport found for SB {verse_ref}. Possible reasons:")

Purport found for SB 2.2.2:
Default View Show in Advanced View Dual Language View ŚB 2.2.2 Devanagari शाब्दस्य हि ब्रह्मण एष पन्था यन्नामभिर्ध्यायति धीरपार्थै: । परिभ्रमंस्तत्र न विन्दतेऽर्थान् मायामये वासनया शयान: ॥ २ ॥ Verse t...


In [23]:
def summarize_bart(text, max_length=150):
    try:
        if not text or "Could not find" in text or "Error retrieving" in text:
            return text  
            
        model_name = "facebook/bart-large-cnn"
        tokenizer = BartTokenizer.from_pretrained(model_name)
        model = BartForConditionalGeneration.from_pretrained(model_name)
        
        inputs = tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
        
        summary_ids = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            num_beams=4,
            early_stopping=True
        )
        
        return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        
    except Exception as e:
        return f"Summarization failed: {str(e)}"

# Usage
purport = scrape_purport("2.2.2")
if not purport.startswith("Error"):
    summary = summarize_bart(purport)
    print("Summary:", summary)
else:
    print(purport)  

Summary: The real happiness is in the kingdom of God, where no one has to undergo the pangs of material existence. Conditioned souls are strictly under the laws of fruitive activities, and as such they sometimes go up to Brahmaloka and again come down to Pātālaloka, as if they were unintelligent children on a merry-go-round.
