# Data Sourcing
* Sourced from [EODHD](https://eodhd.com/)

In [54]:
import re
import requests
import pandas as pd
import config as cfg
from eodhd import APIClient

import datetime
from IPython.display import display, clear_output

In [76]:
%run ./definitions.ipynb

ticker      TSLA
--------------------------------------------------------------------------------
start_date  2024-01-01
end_date    2025-03-12
--------------------------------------------------------------------------------
eod_price_file_csv      ./data/tsla_eod_prices.csv
finacial_news_file_csv  ./data/tsla_financial_news.csv
combined_data_file_csv  ./data/tsla_combined_data.csv
--------------------------------------------------------------------------------


In [57]:
!mkdir data

mkdir: data: File exists


# API Call Functions

In [58]:
import os
from dotenv import load_dotenv

load_dotenv()
EODHD_API_KEY = os.getenv("EODHD_API_KEY")

* **Source EOD prices**

In [60]:
import pandas as pd
import requests
from datetime import datetime, timedelta

def fetch_eod_prices(symbol, start_date, end_date, api_key):
    url = f"https://eodhistoricaldata.com/api/eod/{symbol}"
    params = {
        "from": start_date,
        "to": end_date,
        "fmt": "json",
        "api_token": api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data)
        df['date'] = pd.to_datetime(df['date']).dt.date
        return df
    else:
        print("Failed to fetch stock data:", response.status_code)
        return None


df = fetch_eod_prices(symbol=ticker, 
                      start_date=start_date, 
                      end_date=end_date, 
                      api_key=EODHD_API_KEY)

df['date'] = pd.to_datetime(df['date'])


In [65]:
df


Unnamed: 0,date,open,high,low,close,adjusted_close,volume
0,2024-01-02,250.08000000,251.25000000,244.41000000,248.42000000,248.42000000,104654203
1,2024-01-03,244.98000000,245.68000000,236.32000000,238.45000000,238.45000000,121082602
2,2024-01-04,239.25000000,242.70000000,237.73000000,237.93000000,237.93000000,102629297
3,2024-01-05,236.86000000,240.11960000,234.90010000,237.49000000,237.49000000,92488945
4,2024-01-08,236.14000000,241.25000000,235.30000000,240.45000000,240.45000000,85166578
...,...,...,...,...,...,...,...
294,2025-03-06,272.06000000,272.65000000,260.02000000,263.45000000,263.45000000,98451570
295,2025-03-07,259.32000000,266.24990000,250.73000000,262.67000000,262.67000000,102369602
296,2025-03-10,252.53500000,253.37000000,220.00000000,222.15000000,222.15000000,189076891
297,2025-03-11,225.30500000,237.06490000,217.02000000,230.58000000,230.58000000,174896406


In [69]:
df = df.sort_values(by='date')
df.to_csv(eod_price_file_csv, index=False) # index=False prevents writing the index to the file

In [73]:
df.shape

(299, 7)

* **Source historical news**
    * Focus on [sentiment polarity](https://help.sesamm.com/article/32-sentiment-polarity#:~:text=Polarity%20measures%20the%20overall%20tone,updated%20on%20September%2010%2C%202024)

In [71]:
def fetch_news(symbol, start_date, end_date, api_key):
    all_news = []
    current_end_date = end_date

    while True:
        url = "https://eodhistoricaldata.com/api/news"
        params = {
            "s": symbol,
            "from": start_date,
            "to": current_end_date,
            "api_token": api_key,
            "limit": 1000, # max limit
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            if not data:
                break
            all_news.extend(data[::-1])  # Reverse the data to maintain chronological order
            # Assuming the news data is sorted by date, get the date of the first news item in the batch
            first_news_date = data[-1]['date']
            first_news_date_obj = datetime.strptime(first_news_date.split("T")[0], "%Y-%m-%d").date()
            # If the first news date is the start date, break the loop
            if first_news_date_obj <= datetime.strptime(start_date, "%Y-%m-%d").date():
                break
            # Set the next end date to the day before the first news date in the current batch
            next_end_date_obj = first_news_date_obj - timedelta(days=1)
            current_end_date = next_end_date_obj.strftime("%Y-%m-%d")
        else:
            print("Failed to fetch news data:", response.status_code)
            break  # Exit the loop in case of failure

    df = pd.DataFrame(all_news)
    df['date'] = pd.to_datetime(df['date']).dt.date  # Convert to date to match stock data
    return df

news_df = fetch_news(symbol=ticker, 
                     start_date=start_date, 
                     end_date=end_date,
                     api_key=EODHD_API_KEY)

news_df['date'] = pd.to_datetime(news_df['date'])
news_df = news_df.sort_values(by='date')


In [72]:
news_df


Unnamed: 0,date,title,content,link,symbols,tags,sentiment
6021,2024-01-01,The 3 Hottest EV Stocks to Watch in 2024,The world is rapidly transitioning towards ele...,https://finance.yahoo.com/news/3-hottest-ev-st...,"[GM.US, LI.US, TSLA.US]",[],"{'polarity': 0.996, 'neg': 0.054, 'neu': 0.85,..."
6025,2024-01-01,UPDATE 1-More EVs lose US tax credits includin...,"(Adds Nissan statement in paragraph 10, Tesla ...",https://finance.yahoo.com/news/1-more-evs-lose...,"[7201.TSE, F.MX, F.US, FDMO34.SA, FMC1.F, FMC1...",[],"{'polarity': 0.996, 'neg': 0.027, 'neu': 0.83,..."
6024,2024-01-01,Chinese Carmakers Launch Sodium-Ion Battery-Po...,Two Chinese state-owned carmakers have launche...,https://finance.yahoo.com/news/chinese-carmake...,"[TL0.F, TL0.XETRA, TSLA.MX, TSLA.US, TSLA34.SA]",[],"{'polarity': 0.906, 'neg': 0.011, 'neu': 0.927..."
6023,2024-01-01,1 Top Artificial Intelligence (AI) Stock to Bu...,"The larger an object, the more force is needed...",https://finance.yahoo.com/news/1-top-artificia...,[TSLA.US],[],"{'polarity': 0.937, 'neg': 0.038, 'neu': 0.891..."
6022,2024-01-01,Dow Jones Futures: Market Rally Strong Heading...,"After a big 2023, the market rally looks stron...",https://finance.yahoo.com/m/8c771d7d-767a-3514...,"[CELH.US, DJI.INDX, GSPC.INDX, IXIC.INDX, LILY...",[],"{'polarity': 0.511, 'neg': 0, 'neu': 0.858, 'p..."
...,...,...,...,...,...,...,...
980,2025-03-12,Nasdaq Correction: I'd Consider Buying the Dip...,Nasdaq Correction: I'd Consider Buying the Dip...,https://www.fool.com/investing/2025/03/12/nasd...,[TSLA.US],[],"{'polarity': 0.599, 'neg': 0, 'neu': 0.794, 'p..."
989,2025-03-12,Musk's Political Role Sparks Investor Concerns...,Tesla (NASDAQ:TSLA) investors are growing unea...,https://finance.yahoo.com/news/musks-political...,"[1TSLA.MI, MS.US, TL0.BE, TL0.F, TL0.XETRA, TL...",[],"{'polarity': -0.937, 'neg': 0.14, 'neu': 0.785..."
990,2025-03-12,"Trump Buys Tesla Model S Plaid, Gary Black Cal...",President Donald Trump‘s recent purchase of a ...,https://finance.yahoo.com/news/trump-buys-tesl...,"[1TSLA.MI, TL0.BE, TL0.F, TL0.XETRA, TL01.F, T...",[],"{'polarity': 0.985, 'neg': 0.028, 'neu': 0.863..."
987,2025-03-12,Tesla Plans to Double U.S. Production Amid Mar...,CEO Elon Musk said on his social networking pl...,https://finance.yahoo.com/news/tesla-plans-dou...,"[1TSLA.MI, TL0.BE, TL0.F, TL0.XETRA, TL01.F, T...",[],"{'polarity': 0.929, 'neg': 0.048, 'neu': 0.852..."


In [74]:
news_df.shape


(6626, 7)

In [75]:
news_df['polarity'] = news_df['sentiment'].apply(lambda x: x['polarity'] if x is not None else 0)
news_df


Unnamed: 0,date,title,content,link,symbols,tags,sentiment,polarity
6021,2024-01-01,The 3 Hottest EV Stocks to Watch in 2024,The world is rapidly transitioning towards ele...,https://finance.yahoo.com/news/3-hottest-ev-st...,"[GM.US, LI.US, TSLA.US]",[],"{'polarity': 0.996, 'neg': 0.054, 'neu': 0.85,...",0.99600000
6025,2024-01-01,UPDATE 1-More EVs lose US tax credits includin...,"(Adds Nissan statement in paragraph 10, Tesla ...",https://finance.yahoo.com/news/1-more-evs-lose...,"[7201.TSE, F.MX, F.US, FDMO34.SA, FMC1.F, FMC1...",[],"{'polarity': 0.996, 'neg': 0.027, 'neu': 0.83,...",0.99600000
6024,2024-01-01,Chinese Carmakers Launch Sodium-Ion Battery-Po...,Two Chinese state-owned carmakers have launche...,https://finance.yahoo.com/news/chinese-carmake...,"[TL0.F, TL0.XETRA, TSLA.MX, TSLA.US, TSLA34.SA]",[],"{'polarity': 0.906, 'neg': 0.011, 'neu': 0.927...",0.90600000
6023,2024-01-01,1 Top Artificial Intelligence (AI) Stock to Bu...,"The larger an object, the more force is needed...",https://finance.yahoo.com/news/1-top-artificia...,[TSLA.US],[],"{'polarity': 0.937, 'neg': 0.038, 'neu': 0.891...",0.93700000
6022,2024-01-01,Dow Jones Futures: Market Rally Strong Heading...,"After a big 2023, the market rally looks stron...",https://finance.yahoo.com/m/8c771d7d-767a-3514...,"[CELH.US, DJI.INDX, GSPC.INDX, IXIC.INDX, LILY...",[],"{'polarity': 0.511, 'neg': 0, 'neu': 0.858, 'p...",0.51100000
...,...,...,...,...,...,...,...,...
980,2025-03-12,Nasdaq Correction: I'd Consider Buying the Dip...,Nasdaq Correction: I'd Consider Buying the Dip...,https://www.fool.com/investing/2025/03/12/nasd...,[TSLA.US],[],"{'polarity': 0.599, 'neg': 0, 'neu': 0.794, 'p...",0.59900000
989,2025-03-12,Musk's Political Role Sparks Investor Concerns...,Tesla (NASDAQ:TSLA) investors are growing unea...,https://finance.yahoo.com/news/musks-political...,"[1TSLA.MI, MS.US, TL0.BE, TL0.F, TL0.XETRA, TL...",[],"{'polarity': -0.937, 'neg': 0.14, 'neu': 0.785...",-0.93700000
990,2025-03-12,"Trump Buys Tesla Model S Plaid, Gary Black Cal...",President Donald Trump‘s recent purchase of a ...,https://finance.yahoo.com/news/trump-buys-tesl...,"[1TSLA.MI, TL0.BE, TL0.F, TL0.XETRA, TL01.F, T...",[],"{'polarity': 0.985, 'neg': 0.028, 'neu': 0.863...",0.98500000
987,2025-03-12,Tesla Plans to Double U.S. Production Amid Mar...,CEO Elon Musk said on his social networking pl...,https://finance.yahoo.com/news/tesla-plans-dou...,"[1TSLA.MI, TL0.BE, TL0.F, TL0.XETRA, TL01.F, T...",[],"{'polarity': 0.929, 'neg': 0.048, 'neu': 0.852...",0.92900000


In [77]:
news_df = news_df.sort_values(by='date')
news_df.to_csv(finacial_news_file_csv, sep='|', index=False) # index=False prevents writing the index to the file

In [78]:
!ls -ltr ./data

total 45192
-rw-r--r--@ 1 ilirkondo  staff     16511 Mar 13 20:33 tsla_eod_prices.csv
-rw-r--r--@ 1 ilirkondo  staff  23116243 Mar 13 20:55 tsla_financial_news.csv


* **Joining EOD prices with news sentiment polarity**
  * Note that there are no EOD prices on weekends or holidays

In [82]:
polarity_df = news_df.groupby('date')['polarity'].mean().reset_index()
polarity_df['date'] = pd.to_datetime(polarity_df['date'])
polarity_df


Unnamed: 0,date,polarity
0,2024-01-01,0.66480769
1,2024-01-02,0.44954545
2,2024-01-03,0.70277500
3,2024-01-04,0.55225000
4,2024-01-05,0.31250000
...,...,...
429,2025-03-08,0.99085714
430,2025-03-09,0.77263636
431,2025-03-10,0.31066667
432,2025-03-11,0.22805882


In [83]:
polarity_df.shape


(434, 2)

In [99]:
combined_df = pd.merge(df, polarity_df, on='date', how='left')
combined_df.fillna({'polarity': 0}, inplace=True)
combined_df


Unnamed: 0,date,open,high,low,close,adjusted_close,volume,polarity
0,2024-01-02,250.08000000,251.25000000,244.41000000,248.42000000,248.42000000,104654203,0.44954545
1,2024-01-03,244.98000000,245.68000000,236.32000000,238.45000000,238.45000000,121082602,0.70277500
2,2024-01-04,239.25000000,242.70000000,237.73000000,237.93000000,237.93000000,102629297,0.55225000
3,2024-01-05,236.86000000,240.11960000,234.90010000,237.49000000,237.49000000,92488945,0.31250000
4,2024-01-08,236.14000000,241.25000000,235.30000000,240.45000000,240.45000000,85166578,0.49025000
...,...,...,...,...,...,...,...,...
294,2025-03-06,272.06000000,272.65000000,260.02000000,263.45000000,263.45000000,98451570,0.23317647
295,2025-03-07,259.32000000,266.24990000,250.73000000,262.67000000,262.67000000,102369602,0.58880000
296,2025-03-10,252.53500000,253.37000000,220.00000000,222.15000000,222.15000000,189076891,0.31066667
297,2025-03-11,225.30500000,237.06490000,217.02000000,230.58000000,230.58000000,174896406,0.22805882


In [100]:
combined_df.shape

(299, 8)

In [101]:
combined_df = combined_df.sort_values(by='date')
combined_df.to_csv(combined_data_file_csv, index=False) # index=False prevents writing the index to the file

In [102]:
! ls -ltr ./data

total 45240
-rw-r--r--@ 1 ilirkondo  staff     16511 Mar 13 20:33 tsla_eod_prices.csv
-rw-r--r--@ 1 ilirkondo  staff  23116243 Mar 13 20:55 tsla_financial_news.csv
-rw-r--r--@ 1 ilirkondo  staff     21248 Mar 13 21:14 tsla_combined_data.csv
