In [1]:
import requests
import pandas as pd
import re
import matplotlib.pyplot as plt
from datetime import datetime
import json
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Function to get news from NewsAPI (requires free API key)
def get_news(api_key, query, page_size=100):
    """
    Fetches news articles from NewsAPI.
    You can get a free API key at https://newsapi.org/
    """
    base_url = "https://newsapi.org/v2/everything"
    
    # Define parameters
    params = {
        'q': query,
        'pageSize': page_size,
        'language': 'en',
        'sortBy': 'publishedAt',
        'apiKey': api_key
    }
    
    # Make the request
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

# Function to extract country mentions from headline/title
def extract_country(text):
    """Extracts countries mentioned in text using regex patterns for common countries."""
    countries = [
        "USA", "United States", "America", "China", "Japan", "South Korea", 
        "Taiwan", "Germany", "UK", "United Kingdom", "France", "India", 
        "Singapore", "Malaysia", "Vietnam", "Thailand", "Philippines",
        "Russia", "Brazil", "Canada", "Mexico", "Italy", "Spain"
    ]
    
    # Create regex pattern with word boundaries
    pattern = r'\b(' + '|'.join(countries) + r')\b'
    matches = re.findall(pattern, text, re.IGNORECASE)
    
    return ', '.join(matches) if matches else "None detected"

# Function to perform sentiment analysis using VADER
def analyze_sentiment(text):
    """Analyzes sentiment of text and returns classification."""
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = analyzer.polarity_scores(text)
    
    compound_score = sentiment_scores['compound']
    
    if compound_score >= 0.05:
        return "Positive"
    elif compound_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

# Function to process news data
def process_news_data(news_data, keywords):
    """
    Processes news data to filter by keywords and extract relevant information.
    """
    articles = news_data.get('articles', [])
    
    # Filter for relevant articles containing keywords
    filtered_articles = []
    
    for article in articles:
        title = article.get('title', '').lower()
        description = article.get('description', '').lower() if article.get('description') else ''
        
        if any(keyword.lower() in title or keyword.lower() in description for keyword in keywords):
            filtered_articles.append(article)
    
    # Extract relevant details
    processed_data = []
    for article in filtered_articles[:20]:  # Limit to 20 articles
        title = article.get('title', '')
        url = article.get('url', '')
        published_at = article.get('publishedAt', '')
        
        # Format timestamp
        if published_at:
            try:
                dt = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ")
                timestamp = dt.strftime("%Y-%m-%d %H:%M")
            except:
                timestamp = published_at
        else:
            timestamp = "Unknown"
        
        # Extract country
        content = title + " " + article.get('description', '')
        country = extract_country(content)
        
        # Analyze sentiment
        sentiment = analyze_sentiment(title)
        
        processed_data.append({
            'Title': title,
            'URL': url,
            'Timestamp': timestamp,
            'Country': country,
            'Sentiment': sentiment
        })
    
    return processed_data

# Function to visualize sentiment distribution
def visualize_sentiment(data):
    """Creates and saves a bar chart of sentiment distribution."""
    sentiment_counts = {
        'Positive': sum(1 for item in data if item['Sentiment'] == 'Positive'),
        'Neutral': sum(1 for item in data if item['Sentiment'] == 'Neutral'),
        'Negative': sum(1 for item in data if item['Sentiment'] == 'Negative')
    }
    
    # Create bar chart
    plt.figure(figsize=(10, 6))
    colors = ['green', 'gray', 'red']
    plt.bar(sentiment_counts.keys(), sentiment_counts.values(), color=colors)
    
    plt.title('Sentiment Distribution of News Headlines', fontsize=16)
    plt.xlabel('Sentiment', fontsize=14)
    plt.ylabel('Number of Headlines', fontsize=14)
    
    # Add count labels on top of each bar
    for i, (sentiment, count) in enumerate(sentiment_counts.items()):
        plt.text(i, count + 0.5, str(count), ha='center', fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('sentiment_distribution.png')
    plt.close()
    
    return sentiment_counts

# Alternative function to get news from RSS feeds if you don't want to use NewsAPI
def get_news_from_rss():
    """Fetches news from tech and business RSS feeds."""
    import feedparser
    
    # List of RSS feeds related to tech and business
    rss_feeds = [
        'https://www.theverge.com/rss/index.xml',
        'https://feeds.feedburner.com/IeeeSpectrum',
        'https://www.wired.com/feed/rss',
        'https://www.eettaiwan.com/rss/',
        'https://www.electronicsweekly.com/feed/'
    ]
    
    all_entries = []
    
    for feed_url in rss_feeds:
        try:
            feed = feedparser.parse(feed_url)
            entries = feed.entries
            
            for entry in entries:
                article = {
                    'title': entry.get('title', ''),
                    'description': entry.get('summary', ''),
                    'url': entry.get('link', ''),
                    'publishedAt': entry.get('published', '')
                }
                all_entries.append(article)
        except Exception as e:
            print(f"Error parsing {feed_url}: {e}")
    
    return {'articles': all_entries}

def main():
    # Define keywords related to electronics, semiconductors, or manufacturing
    keywords = [
        'electronics', 'semiconductor', 'manufacturing', 'chip', 'processor',
        'TSMC', 'Intel', 'AMD', 'Nvidia', 'Samsung', 'factory', 'production',
        'supply chain', 'silicon', 'microchip', 'circuit', 'foundry'
    ]
    
    # Choose your data source:
    
    # OPTION 1: NewsAPI (requires API key)
    # Replace 'YOUR_API_KEY' with your actual NewsAPI key
    # api_key = 'YOUR_API_KEY'
    # query = ' OR '.join(keywords)
    # news_data = get_news(api_key, query)
    
    # OPTION 2: RSS feeds (no API key required)
    news_data = get_news_from_rss()
    
    if not news_data:
        print("Failed to retrieve news data")
        return
    
    # Process the news data
    processed_data = process_news_data(news_data, keywords)
    
    # Create DataFrame for easier manipulation
    df = pd.DataFrame(processed_data)
    
    # Save to CSV
    df.to_csv('electronics_news_data.csv', index=False)
    print(f"Saved {len(df)} news articles to electronics_news_data.csv")
    
    # Visualize sentiment
    sentiment_counts = visualize_sentiment(processed_data)
    print("Sentiment distribution:", sentiment_counts)
    
    # Display the headlines and their sentiments
    for i, item in enumerate(processed_data, 1):
        print(f"\n{i}. {item['Title']}")
        print(f"   Sentiment: {item['Sentiment']}")
        print(f"   Country: {item['Country']}")
        print(f"   Published: {item['Timestamp']}")

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'requests'