In [None]:
import requests
import pandas as pd
from datetime import datetime

API_KEY = "cvlgtc1r01qj3umdb0d0cvlgtc1r01qj3umdb0dg"
BASE_URL = "https://finnhub.io/api/v1"

def get_company_news(symbol, from_date, to_date):
    url = f"{BASE_URL}/company-news"
    params = {
        "symbol": symbol,
        "from": from_date,
        "to": to_date,
        "token": API_KEY
    }

    response = requests.get(url, params=params)
    news = response.json()

    if isinstance(news, list) and news:
        df = pd.DataFrame(news)
        df["datetime"] = pd.to_datetime(df["datetime"], unit="s")
        print(df[["datetime", "headline", "url"]])
        return df
    else:
        print("No news found or error.")
        return pd.DataFrame()


df = get_company_news(
    ["AAPL", "MSFT", "AMZN", "GOOGL", "NVDA", "META", "TSLA", "NFLX" ],
    "2023-01-01",
    "2025-04-10"
)


def strip_minutes_seconds_keep_hour(df, column):
    df[column] = pd.to_datetime(df[column])
    df[column] = df[column].dt.floor('H')
    df[column] = df[column].dt.strftime('%Y-%m-%d %H:00')
    return df

               datetime                                           headline  \
0   2025-04-10 20:56:51  Jim Cramer on Apple Inc. (AAPL): Can Tariffs D...   
1   2025-04-10 20:49:00  These Stocks Moved the Most Today: Nvidia, App...   
2   2025-04-10 20:27:58                   Why Apple Stock Is Sinking Today   
3   2025-04-10 19:54:45   Sector Update: Tech Stocks Plunge Late Afternoon   
4   2025-04-10 19:01:21             Apple's Efforts to Prepare for Tariffs   
..                  ...                                                ...   
230 2025-04-07 06:27:17  Apple appealing against UK 'back door' order, ...   
231 2025-04-07 06:22:52  Apple appealing against UK government's 'back ...   
232 2025-04-07 06:15:00  ClearBridge Large Cap Growth Strategy Q1 2025 ...   
233 2025-04-07 05:40:00  Tesla shares tumble after bullish analyst Dan ...   
234 2025-04-06 23:08:14    I Buy These 4 Stocks To Weather Trump's Tariffs   

                                                   url  
0    h

In [None]:
df = get_company_news(
    ["AAPL", "MSFT", "AMZN", "GOOGL", "NVDA", "META", "TSLA", "NFLX" ],
    "2023-01-01",
    "2025-04-10"
)
if not df.empty:
    df = strip_minutes_seconds_keep_hour(df, "datetime")
    print(df[["datetime", "headline", "url"]])

               datetime                                           headline  \
0   2025-04-10 20:56:51  Jim Cramer on Apple Inc. (AAPL): Can Tariffs D...   
1   2025-04-10 20:49:00  These Stocks Moved the Most Today: Nvidia, App...   
2   2025-04-10 20:27:58                   Why Apple Stock Is Sinking Today   
3   2025-04-10 19:54:45   Sector Update: Tech Stocks Plunge Late Afternoon   
4   2025-04-10 19:01:21             Apple's Efforts to Prepare for Tariffs   
..                  ...                                                ...   
230 2025-04-07 06:27:17  Apple appealing against UK 'back door' order, ...   
231 2025-04-07 06:22:52  Apple appealing against UK government's 'back ...   
232 2025-04-07 06:15:00  ClearBridge Large Cap Growth Strategy Q1 2025 ...   
233 2025-04-07 05:40:00  Tesla shares tumble after bullish analyst Dan ...   
234 2025-04-06 23:08:14    I Buy These 4 Stocks To Weather Trump's Tariffs   

                                                   url  
0    h

  df[column] = df[column].dt.floor('H')


In [None]:
import requests
import pandas as pd
from datetime import datetime

API_KEY = "cvlgtc1r01qj3umdb0d0cvlgtc1r01qj3umdb0dg"
BASE_URL = "https://finnhub.io/api/v1"

def get_company_news(symbols, from_date, to_date):
    """
    Retrieve news from Finnhub for one or multiple ticker symbols.
    Returns a single DataFrame including a 'symbol' column to categorize news.
    """
    if isinstance(symbols, str):
        symbols = [symbols]

    all_news_df = pd.DataFrame()

    for symbol in symbols:
        url = f"{BASE_URL}/company-news"
        params = {
            "symbol": symbol,
            "from": from_date,
            "to": to_date,
            "token": API_KEY
        }

        response = requests.get(url, params=params)
        data = response.json()

        if isinstance(data, list) and data:
            df_symbol = pd.DataFrame(data)

            if "datetime" in df_symbol.columns:
                df_symbol["datetime"] = pd.to_datetime(df_symbol["datetime"], unit="s")

            df_symbol["symbol"] = symbol

            all_news_df = pd.concat([all_news_df, df_symbol], ignore_index=True)
        else:
            print(f"No news found (or error) for symbol: {symbol}")

    return all_news_df

def strip_minutes_seconds_keep_hour(df, column):
    """
    Floor the datetime column to the nearest hour (removing minutes & seconds).
    Reformat to 'YYYY-MM-DD HH:00' if desired.
    """
    if column in df.columns:
        df[column] = pd.to_datetime(df[column]).dt.floor("H")
        df[column] = df[column].dt.strftime("%Y-%m-%d %H:00")
    return df


if __name__ == "__main__":
    symbols_list = ["AAPL", "MSFT", "AMZN", "GOOGL", "NVDA", "META", "TSLA", "NFLX"]

    from_date = "2023-01-01"
    to_date   = "2025-04-10"

    df = get_company_news(symbols_list, from_date, to_date)

    if not df.empty:
        df = strip_minutes_seconds_keep_hour(df, "datetime")

        print(df[["symbol", "datetime", "headline", "url"]].head(15))

        output_file = "all_symbol_news.csv"
        df.to_csv(output_file, index=False)
        print(f"Data saved to '{output_file}'")
    else:
        print("No news found for the specified symbols and date range.")


   symbol          datetime  \
0    AAPL  2025-04-10 20:00   
1    AAPL  2025-04-10 20:00   
2    AAPL  2025-04-10 20:00   
3    AAPL  2025-04-10 19:00   
4    AAPL  2025-04-10 19:00   
5    AAPL  2025-04-10 18:00   
6    AAPL  2025-04-10 18:00   
7    AAPL  2025-04-10 18:00   
8    AAPL  2025-04-10 17:00   
9    AAPL  2025-04-10 17:00   
10   AAPL  2025-04-10 12:00   
11   AAPL  2025-04-10 16:00   
12   AAPL  2025-04-10 16:00   
13   AAPL  2025-04-10 16:00   
14   AAPL  2025-04-10 16:00   

                                             headline  \
0   Jim Cramer on Apple Inc. (AAPL): Can Tariffs D...   
1   These Stocks Moved the Most Today: Nvidia, App...   
2                    Why Apple Stock Is Sinking Today   
3    Sector Update: Tech Stocks Plunge Late Afternoon   
4              Apple's Efforts to Prepare for Tariffs   
5   Apple airlifted 600 tons of iPhones to the U.S...   
6   Oil falls, China tensions weigh on Apple, Disn...   
7   Jim Cramer on Apple (AAPL): “I’m Not Gonna 

  df[column] = pd.to_datetime(df[column]).dt.floor("H")


In [None]:
df

Unnamed: 0,category,datetime,headline,id,image,related,source,summary,url,symbol
0,company,2025-04-10 20:00,Jim Cramer on Apple Inc. (AAPL): Can Tariffs D...,133873219,https://s.yimg.com/rz/stage/p/yahoo_finance_en...,AAPL,Yahoo,We recently published a list of Jim Cramer Say...,https://finnhub.io/api/news?id=03ae936f5c40e9c...,AAPL
1,company,2025-04-10 20:00,"These Stocks Moved the Most Today: Nvidia, App...",133873220,https://s.yimg.com/rz/stage/p/yahoo_finance_en...,AAPL,Yahoo,"Shares of Nvidia, Apple, and Tesla fall sharpl...",https://finnhub.io/api/news?id=ad0e76daeb145dc...,AAPL
2,company,2025-04-10 20:00,Why Apple Stock Is Sinking Today,133873221,https://s.yimg.com/rz/stage/p/yahoo_finance_en...,AAPL,Yahoo,Shares of Apple (NASDAQ: AAPL) are heading low...,https://finnhub.io/api/news?id=cac32eb61d6f66b...,AAPL
3,company,2025-04-10 19:00,Sector Update: Tech Stocks Plunge Late Afternoon,133873222,https://s.yimg.com/rz/stage/p/yahoo_finance_en...,AAPL,Yahoo,Tech stocks tumbled late Thursday afternoon wi...,https://finnhub.io/api/news?id=7e0c3404a3ccda0...,AAPL
4,company,2025-04-10 19:00,Apple's Efforts to Prepare for Tariffs,133873223,https://s.yimg.com/rz/stage/p/yahoo_finance_en...,AAPL,Yahoo,"Carolina Milanesi, Creative Strategies preside...",https://finnhub.io/api/news?id=a42a7e63b9fc06a...,AAPL
...,...,...,...,...,...,...,...,...,...,...
1902,company,2025-02-26 12:00,Wall Street Lunch: McDonald's Will Digest High...,132879352,https://static.seekingalpha.com/cdn/s3/uploads...,NFLX,SeekingAlpha,Restaurants are scrambling to deal with high e...,https://finnhub.io/api/news?id=f04ee1ec19ce78e...,NFLX
1903,company,2025-02-26 12:00,Netflix CFO to Present at the Morgan Stanley T...,132879055,,NFLX,Finnhub,"LOS GATOS, Calif., Feb. 26, 2025 /PRNewswire/ ...",https://finnhub.io/api/news?id=5937d5dc9d1ff2e...,NFLX
1904,company,2025-02-26 04:00,"Media, telecom firms ask CRTC to ease up on re...",132876434,,NFLX,Finnhub,OTTAWA - As the Canadian broadcast system is u...,https://finnhub.io/api/news?id=0366b18af146d5d...,NFLX
1905,company,2024-10-30 05:00,These Are The Best Robinhood Stocks To Buy Or ...,131044702,,NFLX,DowJones,These Are The Best Robinhood Stocks To Buy Or ...,https://finnhub.io/api/news?id=0d6328f6d2b87b9...,NFLX


In [None]:
import requests
import pandas as pd
import json
import re
from tqdm import tqdm

df["Is_controversial"] = 0

system_prompt = (
    "You are a helpful assistant that classifies a single tweet into 'controversial' (1) "
    "or 'non-controversial' (0). Return ONLY the digit 1 or 0."
)

definition = (
    "Definition of 'controversial': Controversial content refers to any statement, idea, or expression that is likely to "
    "provoke strong disagreement, debate, or emotional reactions. It often involves sensitive, polarizing, or divisive topics, "
    "and typically reflects opinionated, critical, or challenging viewpoints.\n\n"
)

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Classifying"):
    news = row["summary"]
    user_prompt = (
        definition
        + f"News: \"{news}\"\n\n"
        "Please classify this tweet as controversial (1) or non-controversial (0). "
        "Return ONLY the digit 1 or 0."
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": user_prompt}
    ]

    val = 0

    try:
        response = requests.post(
            "http://localhost:11434/api/chat",
            json={
                "model": "llama3",
                "messages": messages,
                "options": {"temperature": 0}
            },
            stream=True
        )

        full_response = ""
        for line in response.iter_lines():
            if not line:
                continue
            try:
                part = json.loads(line.decode("utf-8"))
                content = part.get("message", {}).get("content", "")
                full_response += content
            except json.JSONDecodeError:
                continue

        m = re.search(r"\b(1|0)\b", full_response.strip())
        if m:
            val = int(m.group(1))

    except Exception as e:
        print(f"[Warning] failed at index {idx}: {e}")

    df.at[idx, "Is_controversial"] = val

print(df.head())

Classifying: 100%|██████████████████████████| 1907/1907 [09:36<00:00,  3.31it/s]

  category          datetime  \
0  company  2025-04-10 20:00   
1  company  2025-04-10 20:00   
2  company  2025-04-10 20:00   
3  company  2025-04-10 19:00   
4  company  2025-04-10 19:00   

                                            headline         id  \
0  Jim Cramer on Apple Inc. (AAPL): Can Tariffs D...  133873219   
1  These Stocks Moved the Most Today: Nvidia, App...  133873220   
2                   Why Apple Stock Is Sinking Today  133873221   
3   Sector Update: Tech Stocks Plunge Late Afternoon  133873222   
4             Apple's Efforts to Prepare for Tariffs  133873223   

                                               image related source  \
0  https://s.yimg.com/rz/stage/p/yahoo_finance_en...    AAPL  Yahoo   
1  https://s.yimg.com/rz/stage/p/yahoo_finance_en...    AAPL  Yahoo   
2  https://s.yimg.com/rz/stage/p/yahoo_finance_en...    AAPL  Yahoo   
3  https://s.yimg.com/rz/stage/p/yahoo_finance_en...    AAPL  Yahoo   
4  https://s.yimg.com/rz/stage/p/yahoo_finance_en




In [None]:
df.to_csv("classified_news.csv", index=False)