In [1]:
pip install transformers wikipedia-api torch

Collecting wikipedia-api
  Downloading Wikipedia_API-0.6.0-py3-none-any.whl.metadata (22 kB)
Downloading Wikipedia_API-0.6.0-py3-none-any.whl (14 kB)
Installing collected packages: wikipedia-api
Successfully installed wikipedia-api-0.6.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
# Dependencies
import wikipediaapi
import requests
import pandas as pd
from transformers import pipeline

In [None]:
# Initialize the Wikipedia API with a User-Agent
wiki_wiki = wikipediaapi.Wikipedia(
    language='en',
    user_agent='AmericanHistory/1.0 (dazzirous@gmai.com)'
)

In [None]:
# API function to get articles from a Wikipedia category
def get_articles_from_category(category):
    # Define the API endpoint
    url = "https://en.wikipedia.org/w/api.php"
    
    # Define the parameters for the API call to get the list of articles in the category
    params_category = {
        "action": "query",
        "format": "json",
        "list": "categorymembers",
        "cmtitle": f"Category:{category}",
        "cmlimit": 10,  # Limit the number of articles returned, adjust as needed
        "cmtype": "page"  # Fetch only articles (pages), not subcategories or files
    }

In [None]:
 # Make the request to the Wikipedia API
    response = requests.get(url, params=params_category)
    data = response.json()
    
    # Extract the list of articles
    if "query" in data and "categorymembers" in data["query"]:
        articles = data["query"]["categorymembers"]
        
        # Prepare a list to hold article details
        articles_data = []
        
        # Loop through each article and fetch its description
        for article in articles:
            title = article['title']
            page_id = article['pageid']
            
            # Fetch the article description (extract)
            params_extract = {
                "action": "query",
                "format": "json",
                "prop": "extracts",
                "exintro": True,
                "explaintext": True,
                "pageids": page_id
            }
            
            response_extract = requests.get(url, params=params_extract)
            extract_data = response_extract.json()
            
            # Extract the description
            extract = extract_data['query']['pages'][str(page_id)].get('extract', 'No description available.')
            
            # Append the article data to the list
            articles_data.append({
                "Title": title,
                "Page ID": page_id,
                "Description": extract
            })
        
        # Convert the list to a DataFrame
        df = pd.DataFrame(articles_data)
        return df
    else:
        print(f"No articles found in category: {category}")
        return None
        
# Function to retrieve summary and answer questions about the American Revolution
def get_american_revolution_summary_and_answer(question):
    # Fetch articles related to the American Revolution
    df_articles = get_articles_from_category("American Revolution")
    
    # If articles are found, use their descriptions as context
    if df_articles is not None and not df_articles.empty:
        # Combine all descriptions into a single context
        context = ' '.join(df_articles['Description'].tolist())
        
        # Initialize the Q&A model
        qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
        
        # Get the answer using the Q&A model
        answer = qa_pipeline(question=question, context=context)
        return answer['answer']
    else:
        return "No relevant information found to answer the question."
