In [None]:
pip install docker



In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m75.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[

In [None]:
import requests
from bs4 import BeautifulSoup
import spacy
from textblob import TextBlob
import sqlite3
import docker

# 1. Data Scraping
def scrape_article(url):
    """Retrieve the main text of a news article from the given URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        article_text = ' '.join([para.get_text() for para in paragraphs])
        return article_text
    except Exception as e:
        print(f"Error scraping the article: {e}")
        return None

# 2. Entity Extraction
nlp = spacy.load('en_core_web_sm')

def extract_entities(text):
    """Extract PERSON and ORG entities from the given text."""
    doc = nlp(text)
    entities = {'PERSON': [], 'ORG': []}
    for ent in doc.ents:
        if ent.label_ in entities:
            entities[ent.label_].append(ent.text)
    return entities

# 3. Sentiment Analysis
def analyze_sentiment(text):
    """Classify the sentiment of the text as positive, negative, or neutral."""
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

# 4. Database Storage
def store_in_database(url, text, entities, sentiment):
    """Store the article details in a database."""
    try:
        conn = sqlite3.connect('articles.db')
        cursor = conn.cursor()
        cursor.execute('''CREATE TABLE IF NOT EXISTS Articles (
                            id INTEGER PRIMARY KEY AUTOINCREMENT,
                            url TEXT,
                            content TEXT,
                            entities TEXT,
                            sentiment TEXT
                          )''')
        cursor.execute('''INSERT INTO Articles (url, content, entities, sentiment) VALUES (?, ?, ?, ?)''',
                       (url, text, str(entities), sentiment))
        conn.commit()
        conn.close()
    except Exception as e:
        print(f"Error storing data: {e}")

# Fetching data
def fetch_all_articles():
    conn = sqlite3.connect('articles.db')
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM Articles")
    rows = cursor.fetchall()
    for row in rows:
        print(row)
    conn.close()

# Clearing the Stored data
def clear_database():
    """Clear all data from the database."""
    try:
        conn = sqlite3.connect('articles.db')
        cursor = conn.cursor()
        cursor.execute('DELETE FROM Articles')  # Clears all data
        cursor.execute('DELETE FROM sqlite_sequence WHERE name="Articles"')  # Resets the auto-increment IDs
        conn.commit()
        conn.close()
        print("All data cleared from the database successfully.")
    except Exception as e:
        print(f"Error clearing database: {e}")

# Main Functionality
def main():
    url = input("Enter the news article URL: ")
    article_text = scrape_article(url)

    if article_text:
        print("\nArticle scraped successfully.")
        entities = extract_entities(article_text)
        print("Extracted Entities:", entities)

        sentiment = analyze_sentiment(article_text)
        print("Sentiment:", sentiment)

        store_in_database(url, article_text, entities, sentiment)
        print("Data stored in database successfully.")

      # fetch_all_articles() #it stores in the format---> id	url	content	entities	sentiment

      # clear_database() #it clears the stored data

if __name__ == "__main__":
    main()


Enter the news article URL: https://www.themintmagazine.com/profit-and-profiteroles/

Article scraped successfully.
Extracted Entities: {'PERSON': ['Lebohang Liepollo Pheko', 'Covid', 'Gauteng', 'Zuma', 'Covid', 'Enoch Gondogwana', 'Lebohang'], 'ORG': ['The Mint Magazine', 'the African National Congress', 'ANC', 'ANC', 'ANC', 'ZAR 30bn', 'National Disaster Benefit Fund', 'Solidarity Fund', 'ZAR 500bn', 'the National Treasury Strategy', 'Kwa-Zulu', 'ANC', 'Social Relief of Distress', 'State', 'treasury', 'international trade & global financial governance', 'interviews & columns', 'PEP']}
Sentiment: Positive
Data stored in database successfully.
