### Wikipedia Article Scraper

In [1]:
pip install requests beautifulsoup4

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
from bs4 import BeautifulSoup

In [4]:
def get_wikipedia_page(topic):
    url = f"https://en.wikipedia.org/wiki/{topic.replace(' ', '_')}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}. Check the topic and try again")
        return None

In [5]:
def get_article_title(soup):
    return soup.find("h1").text

In [8]:
def get_article_summary(soup):
    paragraphs = soup.find_all("p")
    for para in paragraphs:
        if para.text.strip():
            return para.text.strip()
    return "No summary founs"

In [9]:
def get_article_headings(soup):
    headings = [heading.text.strip() for heading in soup.find_all(['h2', 'h3', 'h4'])]
    return headings

In [10]:
def get_related_links(soup):
    links = []
    for a_tag in soup.find_all('a', href=True):
        href = a_tag['href']
        if href.startswith('/wiki/') and ":" not in href:
            links.append(f"https://en.wikipedia.org{href}")
    return list(set(links))[:5]

In [11]:
def print_article(title, summary, headings, related_links):
    print(f"\nTitle: {title}")
    print(f"\nSummary: {summary}")
    
    print(f"\nHeadings:")
    for heading in headings:
        print(f"- {heading}")
    
    print(f"\nRelated Links: ")
    for link in related_links:
        print(f"- {link}")

In [12]:
def main():
    topic = input("Enter a topic to search in Wikipedia: ").strip()
    page_content = get_wikipedia_page(topic)
    
    if page_content:
        soup = BeautifulSoup(page_content, 'html.parser')
        title = get_article_title(soup)
        summary = get_article_summary(soup)
        headings = get_article_headings(soup)
        related_links = get_related_links(soup)
        print_article(title, summary, headings, related_links)

In [13]:
if __name__ == "__main__":
    main()


Title: Python (programming language)

Summary: Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.[34]

Headings:
- Contents
- History
- Design philosophy and features
- Syntax and semantics
- Indentation
- Statements and control flow
- Expressions
- Methods
- Typing
- Arithmetic operations
- Function syntax
- Code examples
- Libraries
- Development environments
- Implementations
- Reference implementation
- Other implementations
- Unsupported implementations
- Cross-compilers to other languages
- Performance
- Language Development
- API documentation generators
- Naming
- Popularity
- Types of Use
- Languages influenced by Python
- See also
- Notes
- References
- Sources
- Further reading
- External links

Related Links: 
- https://en.wikipedia.org/wiki/Copyleft
- https://en.wikipedia.org/wiki/JavaScript
- https://en.wikipedia.org/wiki/PyQt
- https://en.wikipedia.org/wiki/Release_cand