In [None]:
!pip install yfinance
!pip install requests

Gather Stock Data

In [22]:
import yfinance as yf
import pandas as pd
import numpy as np

# Function to fetch stock data
def fetch_stock_data(ticker, start_date, end_date):
    # Fetch stock data
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    # Return only the "Close" prices and the Date
    stock_data = stock_data[['Close']].reset_index()
    return stock_data

# Example: Fetch data for Apple (AAPL) from January 1, 2010 to March 1, 2025
ticker = 'AAPL'
start_date = '2010-01-01'
end_date = '2025-03-01'
stock_data = fetch_stock_data(ticker, start_date, end_date)

# Placeholder emotional tone data (assuming a simple range from 0 to 10)
# Simulate some values, in real case you would replace them with actual analysis
np.random.seed(42)  # For reproducibility
emotion_data = {
    'optimism': np.random.uniform(5, 7, len(stock_data)),
    'anxiety': np.random.uniform(2, 5, len(stock_data)),
    'sadness': np.random.uniform(1, 4, len(stock_data)),
    'surprise': np.random.uniform(3, 6, len(stock_data)),
    'neutral': np.random.uniform(4, 6, len(stock_data)),
    'anger_disgust': np.random.uniform(2, 5, len(stock_data))
}

# Add year, stock price and emotion data to the dataframe
stock_data['year'] = stock_data['Date'].dt.year
stock_data['optimism'] = emotion_data['optimism']
stock_data['anxiety'] = emotion_data['anxiety']
stock_data['sadness'] = emotion_data['sadness']
stock_data['surprise'] = emotion_data['surprise']
stock_data['neutral'] = emotion_data['neutral']
stock_data['anger_disgust'] = emotion_data['anger_disgust']
stock_data['stock_price'] = stock_data['Close']  # Renaming Close to stock_price

# Resample to get only the first trading day of each month
stock_data_monthly = stock_data.resample('MS', on='Date').first()  # 'MS' stands for Month Start

# Reset the index to bring the 'Date' back as a column
stock_data_monthly = stock_data_monthly.reset_index()

# Rename 'Date' to 'date'
stock_data_monthly = stock_data_monthly.rename(columns={'Date': 'date'})

# Select the columns to match the required format
final_data = stock_data_monthly[['date', 'stock_price', 'year', 'optimism', 'anxiety', 'sadness', 'surprise', 'neutral', 'anger_disgust']]

# Save to a new CSV file (this will create a new file every time the script is run)
final_data.to_csv('stock_data_with_emotions_monthly.csv', index=False)

# Print the first few rows to verify the output
print(final_data.head())


[*********************100%***********************]  1 of 1 completed

Price        date stock_price  year  optimism   anxiety   sadness  surprise  \
Ticker                                                                        
0      2010-01-01    6.440331  2010  5.749080  3.841911  3.669062  5.357401   
1      2010-02-01    5.860126  2010  5.582458  4.470871  3.067430  5.715264   
2      2010-03-01    6.289261  2010  6.368466  2.349075  2.072540  5.178318   
3      2010-04-01    7.101188  2010  5.542698  2.125746  1.935110  3.925065   
4      2010-05-01    8.015430  2010  5.661796  4.473415  2.612138  3.053635   

Price    neutral anger_disgust  
Ticker                          
0       4.393373      2.564841  
1       5.216639      4.771540  
2       4.697012      3.025596  
3       4.024832      4.787747  
4       5.050352      4.670469  



  stock_data_monthly = stock_data.resample('MS', on='Date').first()  # 'MS' stands for Month Start


Gathering Articles

In [None]:
import requests
import json
from datetime import datetime
import time

# Function to fetch articles from GNews API
def fetch_articles_for_year(api_key, company_name, year):
    # Set start and end dates for the year
    start_date = f'{year}-01-01'
    end_date = f'{year}-12-31'
    
    # Construct the URL
    url = f'https://gnews.io/api/v4/search?q={company_name}&from={start_date}&to={end_date}&lang=en&max=10&token={api_key}'
    
    # Send the request to GNews API
    response = requests.get(url)
    
    if response.status_code == 200:
        articles = response.json()
        return articles.get('articles', [])
    else:
        print(f"Error fetching articles for {year}: {response.status_code}")
        return []

# Function to fetch articles for each year from 2010 to now
def fetch_articles(api_key, company_name):
    # Get the current year
    current_year = datetime.now().year
    
    all_articles = {}
    
    # Loop through each year from 2010 to current year
    for year in range(2010, current_year + 1):
        print(f"Fetching articles for {year}...")
        articles = fetch_articles_for_year(api_key, company_name, year)
        
        if articles:
            all_articles[year] = articles
        else:
            print(f"No articles found for {year}.")
        
        # Add a delay of 2 seconds between each request to avoid hitting the rate limit
        time.sleep(2)
    
    return all_articles

# Example usage
api_key = os.getenv("NEWS_API_KEY")
company_name = 'Tesla'  # Replace with the company you are interested in

# Fetch articles for each year
articles_data = fetch_articles(api_key, company_name)

# Print the results (first 5 articles from the first year)
for year, articles in articles_data.items():
    print(f"\nArticles from {year}:")
    for i, article in enumerate(articles[:5]):
        title = article['title']
        published_at = article['publishedAt']
        content = article['content']  # Extract content
        
        # Print the title, published date, and content
        print(f"{i+1}. {title} - {published_at}")
        print(f"Content: {content}\n")
