#### load packages

In [2]:
from dotenv import load_dotenv
import os

import requests
from sqlalchemy import create_engine, text
import pandas as pd

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

#### load hidden environment variables

In [15]:
load_dotenv()

fmp_api_key = os.getenv('FMP_API_KEY')

db_name = os.getenv('DB_NAME')
db_password = os.getenv('DB_PASSWORD')

#### connect to mysql db 

this is the db taht we spun up in a docker container and check to see that it is empty if its teh first time that we set it up

In [16]:
mysql_engine = create_engine(f"mysql+mysqlconnector://root:{db_password}@127.0.0.1:3306/{db_name}")
conn = mysql_engine.connect()
list(conn.execute(text("show tables;")))

[]

In [5]:
from datetime import datetime

def format_date_time_object(
    datetime_object: datetime,
    with_time: bool = False
) -> str:
    """
    DEPS:
        datetime -- in requirements.txt
    DEF:
        format a datetime object as a string.
    
    ARGS:
        datetime_object: the datetime object to format
        with_time: whether to include the time in the formatted string
    
    RETURNS:
        a formatted date string
    """

    if with_time:
        return datetime_object.strftime("%Y-%m-%d %H:%M:%S")
    else:
        return datetime_object.strftime("%Y-%m-%d")

In [6]:
def api_call_get(
    url: str, 
    params: dict,
    headers: dict = None,
    return_other_than_json: bool = False
) -> object:
    """
    DEPS:
        requests
    DEF:
        takes a url and parameters and packages and sends the get requests and returns json or response object
    ARGS:
        url: the url endpoint you want to call
        params: {key: value} pairs that serve as parameters for api
        headers: pass headers as {key: value} pairs for more advance config and authorization
        return_other_than_json: returns the response object rather than the json formatted response which allows for more inspection
    RETURNS:
        list of dicts or reponse object if successful, statuscode if not
    """
    if headers is not None:
        response = requests.get(url, params=params, headers=headers)
        if response.status_code != 200:
            return response.raise_for_status()
        else:
            return response.json()
    else:
        response = requests.get(url, params=params)
        if response.status_code != 200:
            return response.raise_for_status()
        else:
            if response.json() == []:
                return "empty response"
            else:
                return response.json()


In [21]:
def call_eod(symbol):
    today = datetime.now()
    time_delta_5_year = today - relativedelta(years=5) + timedelta(days=1) ## max amount of data i can get form api


    base_url = "https://financialmodelingprep.com/stable/historical-price-eod/full"

    today_param = format_date_time_object(today)
    time_delta_param = format_date_time_object(time_delta_5_year)


    params = {
        "symbol": symbol,
        "apikey": fmp_api_key,
        "from": time_delta_param,
        "to": today_param
    }

    response = api_call_get(base_url, params=params)

    return response

In [22]:
def transform_and_commit_data(
    response: object
) -> None:
    """
    DEPS:
        pandas, established connection to a sql database, in this case a mysql data base
    DEF:
        transform response json to df, make a primary key transformation and then write to a sql table
    ARGS:
        reponse: response object from an api in json format
    RETURNS:
        nothing, commits to the db or throws an error, the data should be appended if already exists
    """
    df = pd.DataFrame(response)
    df['date_time_id'] = df['symbol'] + '_' + df['date']
    cols = ['date_time_id'] + [col for col in df.columns if col != 'date_time_id']
    df = df[cols]
    df.to_sql(
        name='raw_ingestion_test',
        con=mysql_engine,
        if_exists='append',
        index=False,
        chunksize=1000
    )
    conn.commit()

In [23]:
stocks_to_gather = [
    'AAPL',
    'AMZN',
    'AXP',
    'BLK',
    'BX',
    'COST',
    'CRM',
    'GOOG',
    'GS',
    'JPM',
    'MNST',
    'MSFT',
    'NFLX',
    'PLTR',
    'QQQ',
    'SBUX',
    'SCHD',
    'SG',
    'SOFI',
    'YUM'
]

In [24]:
for s in stocks_to_gather:
    raw_json_reponse = call_eod(s)
    df = transform_and_commit_data(raw_json_reponse)
    print(f'commited {s} to raw_ingestion_test table!')

commited AAPL to raw_ingestion_test table!
commited AMZN to raw_ingestion_test table!
commited AXP to raw_ingestion_test table!
commited BLK to raw_ingestion_test table!
commited BX to raw_ingestion_test table!
commited COST to raw_ingestion_test table!
commited CRM to raw_ingestion_test table!
commited GOOG to raw_ingestion_test table!
commited GS to raw_ingestion_test table!
commited JPM to raw_ingestion_test table!
commited MNST to raw_ingestion_test table!
commited MSFT to raw_ingestion_test table!
commited NFLX to raw_ingestion_test table!
commited PLTR to raw_ingestion_test table!
commited QQQ to raw_ingestion_test table!
commited SBUX to raw_ingestion_test table!
commited SCHD to raw_ingestion_test table!
commited SG to raw_ingestion_test table!
commited SOFI to raw_ingestion_test table!
commited YUM to raw_ingestion_test table!


In [25]:
pd.read_sql("""
select *
from raw_ingestion_test
""", conn)

Unnamed: 0,date_time_id,symbol,date,open,high,low,close,volume,change,changePercent,vwap
0,AAPL_2025-04-04,AAPL,2025-04-04,193.89,199.88,187.34,188.38,124921508,-5.51,-2.84,192.3725
1,AAPL_2025-04-03,AAPL,2025-04-03,205.54,207.49,201.25,203.19,103419006,-2.35,-1.14,204.3675
2,AAPL_2025-04-02,AAPL,2025-04-02,221.32,225.19,221.02,223.89,35905904,2.57,1.16,222.8550
3,AAPL_2025-04-01,AAPL,2025-04-01,219.81,223.68,218.90,223.19,36412740,3.38,1.54,221.3950
4,AAPL_2025-03-31,AAPL,2025-03-31,217.01,225.62,216.23,222.13,65299321,5.13,2.36,220.2475
...,...,...,...,...,...,...,...,...,...,...,...
24420,YUM_2020-04-14,YUM,2020-04-14,78.30,80.41,77.39,79.89,2502880,1.59,2.03,78.9975
24421,YUM_2020-04-13,YUM,2020-04-13,79.78,80.13,75.17,77.06,2014500,-2.72,-3.41,78.0350
24422,YUM_2020-04-09,YUM,2020-04-09,77.59,82.10,77.00,80.37,4199894,2.78,3.58,79.2650
24423,YUM_2020-04-08,YUM,2020-04-08,72.22,75.66,70.86,75.37,3682000,3.15,4.36,73.5275


In [26]:
pd.read_sql(f""" 
SELECT 
    table_name AS `Table`,
    round(((data_length + index_length) / 1024 / 1024), 2) `Size (MB)`
FROM information_schema.TABLES
WHERE table_schema = "{db_name}"
AND table_name = "raw_ingestion_test";
            """, conn)

Unnamed: 0,Table,Size (MB)
0,raw_ingestion_test,3.52
