In [None]:

import requests
from bs4 import BeautifulSoup

def get_full_article(url):
    """Scrapes the full article content from the given URL."""
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}  # Prevent blocking
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            print('start scrapping', url)
            soup = BeautifulSoup(response.text, "html.parser")
            paragraphs = soup.find_all("p")  # Most articles are wrapped in <p> tags
            full_text = " ".join([p.get_text() for p in paragraphs])
            return full_text
        else:
            return "Failed to retrieve article."
    except Exception as e:
        return f"Error: {str(e)}"

In [15]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from bs4 import BeautifulSoup




# Function to fetch financial news from NewsAPI for a specific day
def fetch_financial_news(api_key, query, date, language="en", page_size=100, page=1):
    url = f"https://newsapi.org/v2/everything"
    params = {
        "q": query,
        "from": date,
        "to": date,
        "language": language,
        "sortBy": "relevancy",
        "pageSize": page_size,
        "page": page,
        "apiKey": api_key
    }
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json().get("articles", []), response.json().get("totalResults", 0)
    else:
        print(f"Error: {response.status_code}, {response.json()}")
        return [], 0

# Function to extract news for a specific date range and save it to a single DataFrame
def extract_news_for_month(api_key, query, start_date, end_date):
    current_date = start_date
    all_data = []  # List to hold all articles
    
    while current_date <= end_date:
        print(f"Fetching news for date: {current_date.strftime('%Y-%m-%d')}")
        page = 1
        while True:
            articles, total_results = fetch_financial_news(api_key, query, current_date.strftime("%Y-%m-%d"), page=page)
            if not articles:
                break
            
            for article in articles:
                all_data.append({
                    "Date": article.get("publishedAt", "").split("T")[0],
                    "Title": article.get("title", ""),
                    "Content": get_full_article(article.get("url", "")),
                    "Source": article.get("source", {}).get("name", ""),
                    "URL": article.get("url", "")
                })
            
            print(f"Page {page}: Retrieved {len(articles)} articles (Total: {total_results})")
            
            # Stop fetching if less than 100 articles are retrieved
            if len(articles) < 100:
                break
            page += 1
        
        # Move to the next day
        current_date += timedelta(days=1)
    
    # Convert to DataFrame
    df = pd.DataFrame(all_data)
    return df

# Main execution
if __name__ == "__main__":
    # Replace with your NewsAPI key
    NEWS_API_KEY = "4471fbe1e4c047d7b39d925814199c22"
    
    # Define parameters
    company_name = "Tesla"
    query = f"{company_name} earnings"
    start_date = datetime(2025, 1, 1)  # Start date (YYYY, MM, DD)
    end_date = datetime(2025, 1, 31)  # End date (YYYY, MM, DD)
    output_file = f"{company_name.lower()}_financial_news_january.xlsx"
    
    # Extract news for the month
    news_df = extract_news_for_month(NEWS_API_KEY, query, start_date, end_date)
    
    # Save the DataFrame to Excel
    news_df.to_excel(output_file, index=False, engine="openpyxl")
    print(f"All news data saved to {output_file}")


Fetching news for date: 2025-01-01
Page 1: Retrieved 10 articles (Total: 10)
Fetching news for date: 2025-01-02
Page 1: Retrieved 39 articles (Total: 41)
Fetching news for date: 2025-01-03
Page 1: Retrieved 46 articles (Total: 48)
Fetching news for date: 2025-01-04
Page 1: Retrieved 11 articles (Total: 11)
Fetching news for date: 2025-01-05
Page 1: Retrieved 10 articles (Total: 10)
Fetching news for date: 2025-01-06
Page 1: Retrieved 21 articles (Total: 21)
Fetching news for date: 2025-01-07
Page 1: Retrieved 18 articles (Total: 19)
Fetching news for date: 2025-01-08
Page 1: Retrieved 10 articles (Total: 11)
Fetching news for date: 2025-01-09
Page 1: Retrieved 11 articles (Total: 11)
Fetching news for date: 2025-01-10
Page 1: Retrieved 25 articles (Total: 26)
Fetching news for date: 2025-01-11
Page 1: Retrieved 8 articles (Total: 8)
Fetching news for date: 2025-01-12
Page 1: Retrieved 8 articles (Total: 8)
Fetching news for date: 2025-01-13
Page 1: Retrieved 20 articles (Total: 20)
Fet

In [16]:
news_df#['Content'].value_counts()

Unnamed: 0,Date,Title,Content,Source,URL
0,2025-01-01,5 Nasdaq Stocks Hitting New 52-Week Highs At Y...,Error: HTTPSConnectionPool(host='www.forbes.co...,Forbes,https://www.forbes.com/sites/johnnavin/2024/12...
1,2025-01-01,The Rich Stop Paying Social Security Tax Aroun...,Error: HTTPSConnectionPool(host='www.forbes.co...,Forbes,https://www.forbes.com/sites/teresaghilarducci...
2,2025-01-01,Musk says 'whole Tesla senior team' probing Cy...,"Error: HTTPSConnectionPool(host='thefly.com', ...",Thefly.com,https://thefly.com/permalinks/entry.php/id4044...
3,2025-01-01,Bard Financial Services Inc. Reduces Stock Pos...,Error: HTTPSConnectionPool(host='www.etfdailyn...,ETF Daily News,https://www.etfdailynews.com/2025/01/01/bard-f...
4,2025-01-01,ASML STOCK UPDATE: ASML Holding Investors are ...,Error: HTTPSConnectionPool(host='www.globenews...,GlobeNewswire,https://www.globenewswire.com/news-release/202...
...,...,...,...,...,...
613,2025-01-29,Tesla’s Fourth-Quarter Earnings Report Sharp D...,Error: HTTPSConnectionPool(host='slashdot.org'...,Slashdot.org,https://slashdot.org/firehose.pl?op=view&amp;i...
614,2025-01-29,S&P 500 futures slide ahead of Fed rate decisi...,Error: HTTPSConnectionPool(host='slashdot.org'...,Slashdot.org,https://slashdot.org/firehose.pl?op=view&amp;i...
615,2025-01-29,Elon Musk: Tesla will launch unsupervised full...,Access Denied (403) - Possible Paywall or Bot ...,Electrek,http://electrek.co/2025/01/29/elon-musk-tesla-...
616,2025-01-29,Tesla guides return to growth thanks to cheape...,Access Denied (403) - Possible Paywall or Bot ...,Electrek,http://electrek.co/2025/01/29/tesla-guides-ret...


# Finnhub

In [43]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# 1️⃣ Set up API key and parameters
FINNHUB_API_KEY = "cudi0gpr01qigebqu97gcudi0gpr01qigebqu980"  # Replace with your Finnhub API key
COMPANY_SYMBOL = "TSLA"  # Tesla's stock ticker symbol

# Define the date range (last 30 days)
end_date = datetime.today()
start_date = end_date - timedelta(days=30)

start_date_str = start_date.strftime("%Y-%m-%d")
end_date_str = end_date.strftime("%Y-%m-%d")

# 2️⃣ Fetch financial news from Finnhub
def fetch_finnhub_news(api_key, symbol, start_date, end_date):
    url = f"https://finnhub.io/api/v1/company-news"
    params = {
        "symbol": symbol,
        "from": start_date,
        "to": end_date,
        "token": api_key
    }

    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()  # Returns a list of news articles
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return []

# 3️⃣ Process and save news data to Excel
def save_news_to_excel(news_data, output_file="finnhub_tesla_news.xlsx"):
    if not news_data:
        print("No news articles found.")
        return

    data = []
    for article in news_data:
        data.append({
            "Date": datetime.utcfromtimestamp(article["datetime"]).strftime("%Y-%m-%d"),
            "Headline": article["headline"],
            "Summary": article["summary"],
            "Source": article["source"],
            "URL": article["url"]
        })

    df = pd.DataFrame(data)
    
    # Save to an Excel file
    df.to_excel(output_file, index=False, engine="openpyxl")
    df.to_csv('finnhub_tesla_news.csv', index=False)
    print(f"✅ Data saved to {output_file}")
    return df

# 4️⃣ Run the script
if __name__ == "__main__":
    news_articles = fetch_finnhub_news(FINNHUB_API_KEY, COMPANY_SYMBOL, start_date_str, end_date_str)
    df=save_news_to_excel(news_articles)


✅ Data saved to finnhub_tesla_news.xlsx


In [45]:
df

Unnamed: 0,Date,Headline,Summary,Source,URL
0,2025-01-30,"Tesla: After Weak Q4 Earnings, I'm Reiterating...","Despite potential in robotaxis and AI, Tesla's...",SeekingAlpha,https://finnhub.io/api/news?id=f6d6bba0128f6e3...
1,2025-01-29,Giverny Capital Asset Management Q4 2024 Letter,Giverny Capital Asset Management enjoyed marke...,SeekingAlpha,https://finnhub.io/api/news?id=d8a232d1e8cd700...
2,2025-01-29,Tesla: Q4 Earnings Snapshot,"AUSTIN, Texas — AUSTIN, Texas — Tesla Inc. on ...",Finnhub,https://finnhub.io/api/news?id=e98c841563b0c1a...
3,2025-01-29,"Tesla, Inc. (TSLA) Q4 2024 Earnings Call Trans...","Tesla, Inc. (NASDAQ:TSLA) Q4 2024 Earnings Con...",SeekingAlpha,https://finnhub.io/api/news?id=c435930f0452177...
4,2025-01-29,"Tesla commits to cheaper cars, says robotaxis ...",STORY: Tesla says it's working on delivering m...,Finnhub,https://finnhub.io/api/news?id=04c7556b8b6b238...
...,...,...,...,...,...
244,2025-01-09,Chinese Auto Sales Rose in December on Subsidi...,By Jiahui Huang China's vehicle sales rose sha...,Finnhub,https://finnhub.io/api/news?id=86ae56683e87683...
245,2025-01-09,BE GONE ELON,Jess Phillips has said she has more important ...,Finnhub,https://finnhub.io/api/news?id=fe2b9e691d3ebae...
246,2025-01-08,The Best Car Stocks for 2025. Tesla Isn’t the ...,"Automotive investors are a gloomy bunch, and t...",Yahoo,https://finnhub.io/api/news?id=331d2cb11f2b1a7...
247,2025-01-08,"NIO Stock May Have Reached a Turning Point, bu...",NIO (NIO) stock has lost almost half of its va...,Yahoo,https://finnhub.io/api/news?id=5ea38ac538ce4eb...
