# Data Sourcing
* Sourced from [EODHD](https://eodhd.com/)

In [52]:
import re
import requests
import pandas as pd
import config as cfg
from eodhd import APIClient

import datetime
from IPython.display import display, clear_output

In [53]:
%run ./definitions.ipynb

ticker                  TSLA
--------------------------------------------------------------------------------
start_date  2024-01-01
end_date    2025-03-12
--------------------------------------------------------------------------------
eod_price_file_csv      ./data/tsla_eod_prices.csv
finacial_news_file_json ./data/tsla_financial_news.json
finacial_news_file_csv  ./data/tsla_financial_news.csv
combined_data_file_csv  ./data/tsla_combined_data.csv
--------------------------------------------------------------------------------


In [4]:
!mkdir data

mkdir: data: File exists


# API Call Functions

In [36]:
import os
from dotenv import load_dotenv

load_dotenv()
EODHD_API_KEY = os.getenv("EODHD_API_KEY")

## fetch_historical_news()

In [6]:
def fetch_historical_news(ticker, months):
    """Fetches historical news for the given ticker over the past specified months."""
    
    BASE_URL = "https://eodhd.com/api/news"
    
    end_date = datetime.datetime.today()
    start_date = end_date - datetime.timedelta(days=30 * months)
    
    all_news = []
    offset = 0

    i = 0
    while True:
        params = {
            "s": ticker,  
            "limit": LIMIT,
            "offset": offset,
            "api_token": EODHD_API_KEY,
            "from": start_date.strftime("%Y-%m-%d"),
            "to": end_date.strftime("%Y-%m-%d"),
        }
        
        response = requests.get(BASE_URL, params=params)
        
        if response.status_code != 200:
            print("Error fetching data:", response.text)
            break

        data = response.json()

        if not data:  # Stop when no more news is returned
            break
        
        all_news.extend(data)
        offset += LIMIT  # Increase offset for pagination
        print(i, len(data))
        i += 1

    return all_news

## fetch_historical_prices()

In [48]:
import pandas as pd
import requests
from datetime import datetime, timedelta

def fetch_stock_data(symbol, start_date, end_date, api_key):
    url = f"https://eodhistoricaldata.com/api/eod/{symbol}"
    params = {
        "from": start_date,
        "to": end_date,
        "fmt": "json",
        "api_token": api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data)
        df['date'] = pd.to_datetime(df['date']).dt.date
        return df
    else:
        print("Failed to fetch stock data:", response.status_code)
        return None

api_key = EODHD_API_KEY 
symbol = ticker
start_date = "2024-01-01"
end_date = "2025-03-12"

# Fetch stock data
stock_df = fetch_stock_data(symbol, start_date, end_date, api_key)
stock_df['date'] = pd.to_datetime(stock_df['date'])


In [51]:
stock_df.head(10)

Unnamed: 0,date,open,high,low,close,adjusted_close,volume
0,2024-01-02,250.08,251.25,244.41,248.42,248.42,104654203
1,2024-01-03,244.98,245.68,236.32,238.45,238.45,121082602
2,2024-01-04,239.25,242.7,237.73,237.93,237.93,102629297
3,2024-01-05,236.86,240.1196,234.9001,237.49,237.49,92488945
4,2024-01-08,236.14,241.25,235.3,240.45,240.45,85166578
5,2024-01-09,238.11,238.9646,232.04,234.96,234.96,96705664
6,2024-01-10,235.1,235.5,231.29,233.94,233.94,91628492
7,2024-01-11,230.57,230.93,225.37,227.22,227.22,105873602
8,2024-01-12,220.08,225.34,217.1501,218.89,218.89,123043797
9,2024-01-16,215.1,223.49,212.18,219.91,219.91,115355000


In [45]:
import requests
import pandas as pd

def get_eod_historical_prices(ticker, api_token, start_date=None, end_date=None):
    """
    Retrieves EOD historical prices for a given ticker.

    Parameters:
    - ticker: The stock ticker symbol (e.g., AAPL.US).
    - api_token: Your EODHD API token.
    - start_date: Optional start date for historical data (YYYY-MM-DD).
    - end_date: Optional end date for historical data (YYYY-MM-DD).

    Returns:
    - A pandas DataFrame containing the historical prices.
    """
    base_url = "https://eodhd.com/api/eod/"
    params = {
        "api_token": api_token,
        "fmt": "json"
    }

    if start_date and end_date:
        params["from"] = start_date
        params["to"] = end_date

    url = f"{base_url}{ticker}"
    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Example usage
ticker = "TSLA"
api_token = EODHD_API_KEY  
start_date = "2024-03-01"
end_date = "2025-03-13"

df = get_eod_historical_prices(ticker, api_token, start_date, end_date)


         date         open         high          low        close  \
0  2024-03-01 200.52000000 204.52000000 198.50000000 202.64000000   
1  2024-03-04 198.73000000 199.75000000 186.72000000 188.14000000   
2  2024-03-05 183.05000000 184.59000000 177.57000000 180.74000000   
3  2024-03-06 179.99000000 181.57600000 173.70000000 176.54000000   
4  2024-03-07 174.35000000 180.04000000 173.70000000 178.65000000   

   adjusted_close     volume  
0    202.64000000   82243117  
1    188.14000000  134334891  
2    180.74000000  119660797  
3    176.54000000  107920898  
4    178.65000000  102129000  


date               object
open              float64
high              float64
low               float64
close             float64
adjusted_close    float64
volume              int64
dtype: object

In [47]:

# print(df.head())
print(df.tail(10))

           date         open         high          low        close  \
249  2025-02-28 279.50000000 293.88000000 273.60000000 292.98000000   
250  2025-03-03 300.34000000 303.94000000 277.30000000 284.65000000   
251  2025-03-04 270.93000000 284.35000000 261.84010000 272.04000000   
252  2025-03-05 272.92000000 279.55000000 267.71000000 279.10000000   
253  2025-03-06 272.06000000 272.65000000 260.02000000 263.45000000   
254  2025-03-07 259.32000000 266.24990000 250.73000000 262.67000000   
255  2025-03-10 252.53500000 253.37000000 220.00000000 222.15000000   
256  2025-03-11 225.30500000 237.06490000 217.02000000 230.58000000   
257  2025-03-12 247.22000000 251.84000000 241.10000000 248.09000000   
258  2025-03-13 248.12500000 248.29000000 232.60000000 240.68000000   

     adjusted_close     volume  
249    292.98000000  115697000  
250    284.65000000  115551398  
251    272.04000000  126706602  
252    279.10000000   94042914  
253    263.45000000   98451570  
254    262.67000000 

In [7]:
def fetch_historical_prices(ticker, months):
    """Fetches historical EOD prices for the given ticker over the past specified months."""
    
    BASE_URL = f"https://eodhd.com/api/eod/{TICKER}"  # EOD API endpoint
    
    end_date = datetime.datetime.today()
    days = 30 * months
    start_date = end_date - datetime.timedelta(days=30 * months)
    
    all_prices = []
    offset = 0

    i = 0
    for i in range(days):
        params = {
            "limit": LIMIT,
            "offset": offset,
            "api_token": EODHD_API_KEY,
            "from": start_date.strftime("%Y-%m-%d"),
            "to": end_date.strftime("%Y-%m-%d"),
            "order": "d",  # Order by descending (newest first)
        }
        
        response = requests.get(BASE_URL, params=params)

        data = response.text
        clear_output(wait=True)
        display(f'{i} {len(data)}')
        
        all_prices += data
        offset += LIMIT  # Increase offset for pagination

    return all_prices


# Sourcing EOD Prices

In [9]:
# Fetch past n months of stock prices for ticker
price_data = fetch_historical_prices(ticker=TICKER, 
                                     months=months)

# Print total number of records fetched
print(f"Total records fetched: {len(price_data)}")

'359 13577'

Total records fetched: 4887720


In [35]:
import pandas as pd

all_prices = ''.join(price_data)
from io import StringIO

s = StringIO(all_prices)
with open(eod_price_file_csv, 'w') as f:
    for line in s:
        f.write(line)

df = pd.read_csv(eod_price_file_csv)
df.columns = df.columns.str.lower()
# Removing duplicate titles 
df = df[df['date'].str.contains('Date') == False]
df.drop_duplicates(subset=['date'])
df.head(10)

df.to_csv(eod_price_file_csv)
# df['date_datetime'] = pd.to_datetime(df['date'])
# df.head(10)
# df_sorted = df.sort_values(by='date_datetime')
# df_sorted.head()
# df.sort_values('date').drop_duplicates(subset=['item'], keep='last').drop_duplicates(subset=['date_datetime'], keep='last')
# df.drop('date_datetime', axis=1, inplace=True)
# df.head(10)

# fetch_historical_news()
* Ticker: "TSLA"
* Lookback period in months: 12

In [8]:
TICKER = ticker
LIMIT = 1000  # Maximum per request
months = 12

In [6]:
# Fetch the past n months of news for ticker
news_data = fetch_historical_news(ticker=TICKER, 
                                  months=months)

# Print number of articles fetched
print(f"Total articles fetched: {len(news_data)}")
print('-'*80)
# Print the first few articles
for article in news_data[:5]:  
    print(article["title"], "-", article["date"])


0 1000
1 1000
2 1000
3 1000
4 763
Total articles fetched: 4763
Amazon stock is trading cheaply on 1 valuation metric after broader stock market rout - 2025-03-11T12:32:50+00:00
Tesla Stock Rises After Selloff. Trump Says He’s Buying One as He Defends Musk. - 2025-03-11T11:16:00+00:00
Energy Transition Update - Green HVAC Market Surges with Eco-Friendly Building Demand - 2025-03-11T11:07:21+00:00
Trump Says He’ll Buy a Tesla to Help Musk After Share Plunge - 2025-03-11T11:06:20+00:00
Trending tickers: Tesla, Nvidia, Novo Nordisk, Volkswagen and Persimmon - 2025-03-11T10:57:47+00:00


In [16]:
import json

with open(finacial_news_file_json, 'w') as f:
    json.dump(news_data, f, indent=4)

In [17]:
df = pd.read_json(finacial_news_file_json)
df.shape

(4763, 7)

In [18]:
df.head()

Unnamed: 0,date,title,content,link,symbols,tags,sentiment
0,2025-03-11 12:32:50+00:00,Amazon stock is trading cheaply on 1 valuation...,The last time Amazon's stock (AMZN) looked thi...,https://finance.yahoo.com/news/amazon-stock-is...,"[1TSLA.MI, TL0.F, TL0.XETRA, TL01.F, TSLA.US, ...",[],"{'polarity': 0.988, 'neg': 0.039, 'neu': 0.858..."
1,2025-03-11 11:16:00+00:00,Tesla Stock Rises After Selloff. Trump Says He...,Investors are looking for some technical suppo...,https://finance.yahoo.com/m/2123972d-6f61-3413...,"[1TSLA.MI, TL0.BE, TL0.F, TL0.XETRA, TL01.F, T...",[],"{'polarity': -0.052000000000000005, 'neg': 0.1..."
2,2025-03-11 11:07:21+00:00,Energy Transition Update - Green HVAC Market S...,The global Green HVAC market is undergoing sig...,https://finance.yahoo.com/news/energy-transiti...,"[1TSLA.MI, AMAT.MX, AMAT.US, AP2.F, AP2.XETRA,...",[],"{'polarity': 0.993, 'neg': 0.024, 'neu': 0.848..."
3,2025-03-11 11:06:20+00:00,Trump Says He’ll Buy a Tesla to Help Musk Afte...,(Bloomberg) -- US President Donald Trump said ...,https://finance.yahoo.com/news/trump-says-ll-b...,"[1TSLA.MI, TL0.BE, TL0.F, TL0.XETRA, TL01.F, T...",[],"{'polarity': 0.989, 'neg': 0.065, 'neu': 0.802..."
4,2025-03-11 10:57:47+00:00,"Trending tickers: Tesla, Nvidia, Novo Nordisk,...",Tesla (TSLA)\n\nStocks sold off globally on Mo...,https://uk.finance.yahoo.com/news/tesla-nvidia...,"[0QYP.IL, 0QZI.LSE, 0R1O.IL, 0R2V.IL, 0RIH.IL,...",[],"{'polarity': 0.998, 'neg': 0.057, 'neu': 0.815..."


'359 13407'

Total records fetched: 4826520


('Date,Open,High,Low,Close,Adjusted_close,Volume\n'
 '2025-03-10,252.535,253.37,220,222.15,222.15,189076948')

('19,172.36,172.82,167.42,171.32,171.32,77271430\n'
 '2024-03-18,170.02,174.72,165.9,173.8,173.8,108214398\n')


In [24]:
!ls -ltr ./data

total 47128
-rw-r--r--@ 1 ilirkondo  staff  18605665 Mar 11 09:45 tsla_financial_news.json
-rw-r--r--@ 1 ilirkondo  staff   4826520 Mar 11 10:05 tsla_eod_prices.csv
