### EOD data extraction ETL process

In [1]:
#Importing required libraries
import pandas as pd
import numpy as np
import os
import requests
import gspread
from gspread_dataframe import set_with_dataframe
from utilities import init_session, sanitize_dates, format_date, url, RemoteDataError

##### Creating S3 bucket
This step assumes that you have configured your AWS CLI locally, if this is not the case you can follow this link in order to configure your [AWS Command Line Interface](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html).

You can run the cell above or you can also execute the following line in your command line `aws s3api create-bucket --bucket eod-data-test-bucket --region us-east-2 --create-bucket-configuration LocationConstraint=us-east-2`, it will create a new bucket in your S3 storage named `eod-data-test-bucket` or any other name you prefer, just please follow this link that will guide you on the naming rules for buckets [Bucket naming rules](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html)

In [None]:
!aws s3api create-bucket --bucket eod-data-test-bucket --region us-east-2 --create-bucket-configuration LocationConstraint=us-east-2

In [None]:
#Setting variables
token = os.getenv('EOD_API_TOKEN')
aws_key = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret = os.environ.get('AWS_SECRET_ACCESS_KEY')
#This API key is used for demo purposes
EOD_HISTORICAL_DATA_API_KEY_DEFAULT = "OeAFFmMliFG5orCUuwAKQ8l4WWFQ67YX"
EOD_HISTORICAL_DATA_API_URL = "https://eodhistoricaldata.com/api"
#This is the bucket name that was created in your AWS account
bucket = 'eod-datalake'

##### Helper functions

In [None]:
def get_eod_data(symbol, exchange, endpoint, start=None, end=None, api_key=token, session=None):
    """
    Returns EOD (end of day data) for a given symbol
    """
    symbol_exchange = symbol + "." + exchange
    session = init_session(session)
    start, end = sanitize_dates(start, end)
    endpoint = f"/{endpoint}/{symbol_exchange}"
    url = EOD_HISTORICAL_DATA_API_URL + endpoint
    params = {
        "api_token": api_key,
        "from": format_date(start),
        "to": format_date(end)
    }
    
    r = session.get(url, params=params)
    
    if r.status_code == requests.codes.ok:
        df = pd.read_csv(StringIO(r.text), 
                         skipfooter=1,
                         parse_dates=[0], 
                         index_col=0)
        return df
    
    else:
        params["api_token"] = "YOUR_HIDDEN_API"
        raise RemoteDataError(r.status_code, r.reason, url(url, params))
        
def get_fundamental_data(symbol, exchange, endpoint, api_key=token,session=None):
    """
    Returns EOD (end of day data) for a given symbol
    """
    symbol_exchange = symbol + "." + exchange
    session = init_session(session)
    endpoint = f"/{endpoint}/{symbol_exchange}"
    url = EOD_HISTORICAL_DATA_API_URL + endpoint
    params = {
        "api_token": api_key
    }
    
    r = session.get(url)
    
    if r.status_code == requests.codes.ok:
        #df = pd.read_csv(StringIO(r.text), skipfooter=1,
        #                 parse_dates=[0], index_col=0)
        print(r.text)
    else:
        pass

    return r.text

def write_data_to_bucket(file_name:str, mode:str):
    """
    Parameters:
    ----------
    mode(str): Available write modes are 'append', 'overwrite' and 'overwrite_partitions'
    """

    path = f"s3://{bucket}/raw-data/{file_name}"
    #Sending dataframe of corresponding ticker to bucket
    wr.s3.to_csv(
        df=df,
        path=path,
        index=True,
        dataset=True,
        mode=mode
    )

def read_csv_from_bucket(folder_name:str) -> pd.DataFrame:

    df = wr.s3.read_csv(path = f"s3://{bucket}/raw-data/{folder_name}/",
                        path_suffix = ".csv"
)
    return df