<a href="https://colab.research.google.com/github/VNagaHarshitha/SentimentAnalysisProject/blob/main/sentimentanalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import requests
import pandas as pd
import getpass, os
import google.generativeai as genai

# 🔑 Always prompt for keys
os.environ["NEWSAPI_KEY"] = getpass.getpass("Enter your NewsAPI key: ")
os.environ["GNEWS_KEY"] = getpass.getpass("Enter your GNews API key: ")
os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API key: ")

NEWSAPI_KEY = os.getenv("NEWSAPI_KEY")
GNEWS_KEY = os.getenv("GNEWS_KEY")
GEMINI_KEY = os.getenv("GEMINI_API_KEY")

# Initialize Gemini (LLM)
try:
    genai.configure(api_key=GEMINI_KEY)
    model = genai.GenerativeModel("gemini-1.5-flash")
except Exception as e:
    print("⚠️ Could not initialize Gemini:", e)
    model = None

# Fetch from NewsAPI (limited to 100 on free tier)
def fetch_newsapi(query="business", language="en", max_articles=100):
    BASE_URL = "https://newsapi.org/v2/everything"
    all_articles = []
    page = 1
    while len(all_articles) < max_articles:
        params = {
            "q": query,
            "language": language,
            "pageSize": 100,   # max per request
            "page": page,
            "apiKey": NEWSAPI_KEY
        }
        r = requests.get(BASE_URL, params=params)
        if r.status_code != 200:
            break
        data = r.json()
        articles = data.get("articles", [])
        if not articles:
            break
        all_articles.extend(articles)
        page += 1
    return [
        {"source": "NewsAPI", "title": a.get("title", ""), "description": a.get("description", "")}
        for a in all_articles[:max_articles]
    ]

# Fetch from GNews (account must be verified)
def fetch_gnews(query="business", language="en", max_articles=100):
    BASE_URL = "https://gnews.io/api/v4/search"
    all_articles = []
    page = 1
    while len(all_articles) < max_articles:
        params = {
            "q": query,
            "lang": language,
            "max": 100,
            "page": page,
            "token": GNEWS_KEY
        }
        r = requests.get(BASE_URL, params=params)
        if r.status_code != 200:
            break
        data = r.json()
        articles = data.get("articles", [])
        if not articles:
            break
        all_articles.extend(articles)
        page += 1
    return [
        {"source": "GNews", "title": a.get("title", ""), "description": a.get("description", "")}
        for a in all_articles[:max_articles]
    ]

# Gemini LLM Sentiment Analysis (batched)
def analyze_sentiments_batch(texts, batch_size=50):
    sentiments = []
    if not model:
        return ["Neutral"] * len(texts)
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        joined = "\n".join([f"[{j+1}] {t}" for j, t in enumerate(batch)])
        prompt = f"""
        Classify each news headline as Positive, Negative, or Neutral.
        Respond only in this format:
        [1] Positive
        [2] Neutral
        [3] Negative

        Headlines:
        {joined}
        """
        try:
            response = model.generate_content(prompt, request_options={"timeout": 20})
            raw = response.text.strip()
            lines = [line.split("]")[-1].strip() for line in raw.split("\n") if line.strip()]
            batch_sentiments = [s if s in ["Positive", "Negative", "Neutral"] else "Neutral" for s in lines]
            sentiments.extend(batch_sentiments)
        except Exception:
            sentiments.extend(["Neutral"] * len(batch))
    return sentiments

# Main pipeline
if __name__ == "__main__":
    newsapi_articles = fetch_newsapi(query="stock market", max_articles=100)
    gnews_articles = fetch_gnews(query="stock market", max_articles=100)

    all_articles = newsapi_articles + gnews_articles

    if all_articles:
        texts = [a["title"] for a in all_articles if a["title"]]
        sentiments = analyze_sentiments_batch(texts, batch_size=50)

        for i in range(len(all_articles)):
            all_articles[i]["sentiment"] = sentiments[i] if i < len(sentiments) else "Neutral"

        df = pd.DataFrame(all_articles)
        print(df)

        df.to_csv("combined_news_sentiment.csv", index=False)
        print("\n✅ Data exported to combined_news_sentiment.csv")
        print(f"✅ Total Articles Analyzed: {len(df)}")
    else:
        print("No articles found.")



Enter your NewsAPI key: ··········
Enter your GNews API key: ··········
Enter your Gemini API key: ··········


ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 4068.88ms


     source                                              title  \
0   NewsAPI           Sam Altman says ‘yes,’ AI is in a bubble   
1   NewsAPI  ‘It’s Not Going to Slow Down’: The Tech Stock ...   
2   NewsAPI  OpenAI gives some employees a ‘special’ multim...   
3   NewsAPI  Why Is Tech Worried When Stocks Like Chevron D...   
4   NewsAPI  Elon Musk Hopes to Wriggle His Way Out of Alle...   
..      ...                                                ...   
95  NewsAPI  QBTS or RGTI: Which Stock Will Win As IBM, MSF...   
96  NewsAPI  How Is PepsiCo’s Stock Performance Compared to...   
97  NewsAPI  New report reveals stunning trend in used Tesl...   
98  NewsAPI  Quantum Stocks Q2 2025: Are D-Wave, IonQ & Rig...   
99  NewsAPI                     The week that Google ate Adobe   

                                          description sentiment  
0   As economists speculate whether the stock mark...   Neutral  
1   It has signaled it is willing to play ball wit...   Neutral  
2   The d