## Update Stock Prices and run PnF calcs for each new trading day

In [2]:
# Import credentials

import json
f = open("/. .<your file path here> . . /credentials.json")
credentials = json.load(f)

file_path = credentials['file_path']
intrinio_key = credentials['intrinio_key']
aws_key = credentials['aws_access_key']
aws_secret_key = credentials['aws_secret_key']
rds_host = credentials['rds_host']
rds_user = credentials['rds_user']
rds_password = credentials['rds_password']
rds_database = credentials['rds_database']
rds_charset = credentials['rds_charset']


In [14]:
# Import Intrinio libraries

import time
import intrinio_sdk as intrinio
from intrinio_sdk.rest import ApiException

intrinio.ApiClient().configuration.api_key['api_key'] = intrinio_key

# Import the usual Python libraries

from tqdm.notebook import tqdm, trange  # to be used to track progress in loop iterations
import pandas as pd
import numpy as np
import datetime as datetime
from datetime import datetime, date, time, timedelta

# Import Prefect library

from prefect.triggers import all_successful, all_failed
from prefect import task, Flow
import pendulum
from prefect.schedules import IntervalSchedule
from prefect.schedules.clocks import IntervalClock

# Import the AWS libraries

import boto3
from boto3.s3.transfer import TransferConfig
from boto3.s3.transfer import S3Transfer
import io
import pyarrow as pa
import pyarrow.parquet as pq

# Import SQL libraries

import mysql.connector 
from mysql.connector import errorcode
from sqlalchemy import create_engine

# Declare the local File Path:

global my_path
my_path = file_path


In [4]:
# Fetch the last (max) date from the Point & Figure history table

@task
def get_max_pnf_date():

    from datetime import datetime, date, time, timedelta

    global lastPnFUpdate
    global td_days
    global todayDate

    mydb = mysql.connector.connect(
      host = rds_host,
      user = rds_user,
      password = rds_password,
      database = rds_database
    )

    mycursor = mydb.cursor()

    mycursor.execute("SELECT MAX(date) FROM base_pnf_data_historical")

    myResultDate = mycursor.fetchall()[0][0].date()

    todayDate = date.today()       # Save today's date
    lastPnFUpdate = myResultDate   # Save the last trading date from the historical data table
    td = todayDate - lastPnFUpdate # Calculate the number of days since the last trading date
    td_days = td.days              # Save the date difference calculation

    print("The last day that prices were updated was", lastPnFUpdate.strftime('%m/%d/%Y'))
    print("That date was", td_days, "days ago.")

    return lastPnFUpdate, td_days, todayDate


In [5]:
# Get new data from the Price History table for each ticker to append to the P&F history table.

@task
def get_price_data(todayDate, lastPnFUpdate):

    from datetime import datetime, date, time, timedelta

    bad_tickers = []
    
    global df_price_data
    global nextDateString
    global df_price_update_total
    global myResultData
    
    df_price_update_total = pd.DataFrame()

    # For each day from the last price update to today, retrieve the new security prices from the Price History table.
    
    mydb = mysql.connector.connect(
      host = rds_host,
      user = rds_user,
      password = rds_password,
      database = rds_database
    )

    mycursor = mydb.cursor()
    
    mycursor.execute("SELECT * FROM price_data_historical WHERE date BETWEEN '" + lastPnFUpdate.strftime('%Y-%m-%d') + \
                     "' AND '" + todayDate.strftime('%Y-%m-%d') + "';")
    
    myResultData = mycursor.fetchall()

    columns = ['key_id', 'ticker', 'figi', 'date', 'open', 'high', 'low', 'close', 'volume', 'adj_open', 'adj_high', 
               'adj_low', 'adj_close', 'adj_volume', 'adj_factor', 'split_ratio', 'change', 'percent_change', 
               'fifty_two_week_high', 'fifty_two_week_low', 'market_cap', 'weighted_avg_shares_out', 'intraperiod', 
               'last_updated_date', 'last_corp_action_date']

    df_price_data = pd.DataFrame(myResultData, columns = columns)  # Save the records from the price history table
                                                                   # that we will apply P&F calculations to.
    
    # Add columns for Plot Symbol, Reversal, Signal Name and Percent Change and other P&F calcs
    
    df_price_data['plot_symbol'] = np.nan
    df_price_data['reversal'] = 0
    df_price_data['signal_name'] = np.nan
    df_price_data['high_point'] = np.nan
    df_price_data['last_high_point'] = np.nan
    df_price_data['prev_high_point'] = np.nan
    df_price_data['low_point'] = np.nan
    df_price_data['last_low_point'] = np.nan
    df_price_data['prev_low_point'] = np.nan
    df_price_data['entry_x'] = np.nan
    df_price_data['entry_o'] = np.nan
    df_price_data['next_entry'] = np.nan
    df_price_data['stop_loss'] = np.nan
    df_price_data['target_price'] = np.nan

    # Reorder the columns
    
    df_price_data = df_price_data[['key_id', 'date', 'figi', 'ticker', 'open', 'high', 'low', 'close', 'change', 'percent_change', 'volume', 'plot_symbol', 'reversal', 
                 'signal_name', 'high_point', 'last_high_point', 'prev_high_point', 'low_point', 'last_low_point', 'prev_low_point', 'entry_x', 'entry_o', 
                 'next_entry', 'stop_loss', 'target_price', 'last_updated_date', 'last_corp_action_date']]

    print("The shape of the new price data DF is", df_price_data.shape)
    
    return df_price_data

    

In [6]:
# Get last record for each stock from the historical database and append them to the update DF.

@task
def get_last_records(lastPnFUpdate):

    from datetime import datetime, date, time, timedelta

    global df_last_records

    mydb = mysql.connector.connect(
      host = rds_host,
      user = rds_user,
      password = rds_password,
      database = rds_database
    )

    mycursor = mydb.cursor()

    mycursor.execute("SELECT * FROM base_pnf_data_historical WHERE date = '" + lastPnFUpdate.strftime('%Y-%m-%d') + "'")

    myresult = mycursor.fetchall()
    
    myColumns = ['key_id', 'date', 'figi', 'ticker', 'open', 'high', 'low', 'close', 'change', 'percent_change', 'volume', 'plot_symbol', 'reversal', 
                 'signal_name', 'high_point', 'last_high_point', 'prev_high_point', 'low_point', 'last_low_point', 'prev_low_point', 'entry_x', 'entry_o', 
                 'next_entry', 'stop_loss', 'target_price', 'last_updated_date','last_corp_action_date']
    
    df_last_records = pd.DataFrame(myresult, columns = myColumns) # Save the P&F records from the last trading date
    
    print("The shape of the last active records DF is", df_last_records.shape)

    return df_last_records


In [7]:
# Join the new price data with the last active records from the history database to get the starting P&F values
# for the new data.

@task
def join_records(df_price_data, df_last_records):
    
    global df_pnf_update

    df_pnf_update = pd.concat([df_price_data, df_last_records])
    df_pnf_update.sort_values(by = ['date', 'ticker', 'plot_symbol'], inplace = True)
    df_pnf_update.drop_duplicates(subset=['key_id'], keep = 'first', inplace = True)
    df_pnf_update.sort_values(by = ['ticker', 'date'], inplace = True)
    
    print("The shape of the new combined DF is", df_pnf_update.shape)
    
    return df_pnf_update

In [8]:
def generate_pnf_calcs(myFigi):
    
    boxSize = .02
    reversalBoxes = 3
    reversalAmount = boxSize * reversalBoxes

    new_data_list = []
    
    data = df_pnf_update.loc[df_pnf_update['figi'] == myFigi].copy()
    data.reset_index(drop = True, inplace = True)
    
    # Set all starting High Points and Low Points equal to the last record for each ticker from the historical data table.

    high_point = data['high_point'].iloc[0]
    low_point = data['low_point'].iloc[0]
    last_high_point = data['last_high_point'].iloc[0]
    last_low_point = data['last_low_point'].iloc[0]
    entry_x = data['entry_x'].iloc[0]
    entry_o = data['entry_o'].iloc[0]
    prev_high_point = data['prev_high_point'].iloc[0]
    prev_low_point = data['prev_low_point'].iloc[0]
    target_price = data['target_price'].iloc[0]

    # Start the loop on the second day, loop through each day's close price after that.
    for i in range(1, len(data)):

        if data['plot_symbol'].iloc[i - 1] == 'X':   #If previous Plot Symbol = "X", then:

            if data['close'].iloc[i] >= data['close'].iloc[i - 1]:     #If current price >= previous price, then:
                data.loc[i, 'plot_symbol'] = 'X'        # Today's Plot Symbol = "X".
                data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]    #and copy yesterday's signal to today.

                if data['close'].iloc[i] > high_point:    #And if today's price is higher than the most recent high price, 
                    high_point = data['close'].iloc[i]       #then make today's price the  high price,
                    data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]   #and copy yesterday's signal to today.

                if data['close'].iloc[i] > last_high_point:  #And if today's price is higher than the high point from the last X column,
                    data.loc[i, 'signal_name'] = "BUY"           #then today's signal = "BUY".

            elif data['close'].iloc[i] < high_point * (1 - reversalAmount):     #Else if today's price is less than the previous high times 1 - reversal,
                data.loc[i, 'plot_symbol'] = 'O'                                     #the Plot Symbol reverses to "O",
                low_point = data['close'].iloc[i]                                   #and the  low point is today's price,
                data.loc[i, 'reversal'] = 1                                         #and reversal = 1,
                prev_high_point = last_high_point                                        #and prev_high_point = last_high_point, saving this ValueSignal to use in the Target Price calc below
                last_high_point = high_point                                               #and last_high_point = most recent high point
                entry_o = data['close'].iloc[i - 1]                                 #and entry_o = previous day's closing price, used in next_entry and stop_loss calcs

                if data['close'].iloc[i] < last_low_point:   #And if today's price is lower than the low point from the last O column,
                    data.loc[i, 'signal_name'] = "SELL"          #then today's signal = "SELL".
                else:
                    data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]   #Else copy yesterday's signal to today.

            else:
                data.loc[i, 'plot_symbol'] = 'X'  #Else, Plot Symbol = "X" (price is down but not enough to triger a reversal)
                data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]   #and copy yesterday's signal to today.


        if data['plot_symbol'].iloc[i - 1] == 'O':   #If previous Plot Symbol = "O", then:

            if data['close'].iloc[i] < data['close'].iloc[i - 1]:            #If current price <= previous price, then:
                data.loc[i, 'plot_symbol'] = 'O'         # Today's Plot Symbol = "O".
                data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]

                if data['close'].iloc[i] < low_point:       #And if today's price is lower than the most recent low price, 
                    low_point = data['close'].iloc[i]         #then make today's price the  low price.
                    data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]   #and copy yesterday's signal to today.

                if data['close'].iloc[i] < last_low_point:   #And if today's price is lower than the low point from the last O column,
                    data.loc[i, 'signal_name'] = "SELL"         #then today's signal = "SELL".


            elif data['close'].iloc[i] > low_point * (1 + reversalAmount):       #Else if today's price is greater than the previous high, times 1 + reversal,
                data.loc[i, 'plot_symbol'] = 'X'                                       #the Plot Symbol reverses to "X",
                high_point = data['close'].iloc[i]                                    #and the  high point is today's price,
                data.loc[i, 'reversal'] = 1                                           #and reversal = 1,
                prev_low_point = last_low_point                                            ##and prev_low_point = last_low_point, saving this ValueSignal to use in the Target Price calc below
                last_low_point = low_point                                                   #and last_low_point = most recent low point
                entry_x = data['close'].iloc[i - 1]                                   #and entry_x = previous day's closing price, used in next_entry and stop_loss calcs

                if data['close'].iloc[i] > last_high_point:  #And if today's price is higher than the high point from the last X column,
                    data.loc[i, 'signal_name'] = "BUY"          #then today's signal = "BUY".

                else:
                    data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]     #Else copy yesterday's signal to today.

            else:
                data.loc[i, 'plot_symbol'] = 'O'  #Else, Plot Symbol = "O" (price is up but not enough to triger a reversal)
                data.loc[i, 'signal_name'] = data['signal_name'].iloc[i - 1]   #and copy yesterday's signal to today.

        data.loc[i, 'high_point'] = high_point            #high_point = current "high_point"
        data.loc[i, 'low_point'] = low_point             #low_point = current "low_point"
        data.loc[i, 'last_high_point'] = last_high_point  #last_high_point = current "last_high_point"
        data.loc[i, 'last_low_point'] = last_low_point    #last_low_point = current "last_low_point"
        data.loc[i, 'prev_high_point'] = prev_high_point  #prev_high_point = current "prev_high_point"
        data.loc[i, 'prev_low_point'] = prev_low_point    #prev_low_point = current "prev_low_point"

        if data['signal_name'].iloc[i] == "BUY":

            next_entry = entry_o * (1 + boxSize)         #Set next_entry at one box up from the price at the last reversal from X to O, which should be near the top of the previous X column
            data.loc[i, 'next_entry'] = next_entry
            stop_loss = entry_x * (1 - boxSize)          #Set the stop_loss at one box down from the price at the last reversal from O to X, which should be near the bottom of the previous O column
            data.loc[i, 'stop_loss'] = stop_loss

            if data['signal_name'].iloc[i - 1] == "SELL":
                target_price = ((last_high_point - prev_low_point) * reversalBoxes) + prev_low_point   #Upon reversal from SELL to BUY, set the target_price equal to the size of the previous X column,
                                                                                                # times the box size, added to the bottom of the previous X column. Once calculated, it does not
                                                                                                # change for the balance of the current BUY signal.
            data.loc[i, 'target_price'] = target_price

        else:
            next_entry = entry_x * (1 - boxSize)         #Set next_entry at one box down from the price at the last reversal from O to X, which should be near the bottom of the previous O column
            data.loc[i, 'next_entry'] = next_entry
            stop_loss = entry_o * (1 + boxSize)          #Set the stop_loss at one box up from the price at the last reversal from X to O, which should be near the top of the previous X column
            data.loc[i, 'stop_loss'] = stop_loss

            if data['signal_name'].iloc[i - 1] == "BUY":
                target_price = prev_high_point - ((prev_high_point - last_low_point) * reversalBoxes)  #Upon reversal from BUY to SELL, set the target_price equal to the size of the previous O column,
                                                                                                # times the box size, subtracted from the top of the previous O column. Once calculated, it does not
                                                                                                # change for the balance of the current SELL signal.
            data.loc[i, 'target_price'] = target_price
            
        data.loc[i, 'entry_x'] = entry_x            #entry_x = current "entry_x"
        data.loc[i, 'entry_o'] = entry_o            #entry_o = current "entry_o"

    data_list = data.values.tolist()
    new_data_list.extend(data_list)
    
    return new_data_list


In [9]:
# Run all the calculations and prepare final dataframe.

@task
def run_all_calcs(df_pnf_update):
    
    global df_pnf_update_load

    import multiprocessing
    from multiprocessing import Pool

    import time
    start_time = time.time()
    new_data_list = []

    figi_list = df_pnf_update['figi'].unique().tolist() # Get the list of FIGI codes to run the calculations against.

    p = Pool()
    result = p.map(generate_pnf_calcs, figi_list)  # Use multiprocessing pool to spread the work over all available chip cores
    p.close()
    p.join()

    end_time = time.time()
    elapsed_time = end_time - start_time

    print("Elapsed time was", round(elapsed_time/60, 2), "minutes.")

    new_data_list = []

    for i in range(0, len(figi_list)):  # Convert the MP pool results to a list of values
        data_list = result[i]
        new_data_list.extend(data_list)

    myColumns = ['key_id', 'date', 'figi', 'ticker', 'open', 'high', 'low', 'close', 'change', 'percent_change', 'volume', 'plot_symbol', 'reversal', 
                 'signal_name', 'high_point', 'last_high_point', 'prev_high_point', 'low_point', 'last_low_point', 'prev_low_point', 'entry_x', 'entry_o', 
                 'next_entry', 'stop_loss', 'target_price', 'last_updated_date','last_corp_action_date']

    df_pnf_data = pd.DataFrame(new_data_list, columns = myColumns)  # Save the pool results list to a dataframe

    # Save the dataframe to a CSV file in case you need to refer to it later.
    df_pnf_data.to_csv(path_or_buf = my_path + "/df_pnf_data_update_" + todayDate.strftime('%Y-%m-%d') + ".csv", index=False)

    print("The intermediate dataframe shape is ", df_pnf_data.shape)
    
    # Make sure the date column is in datetime format and remove the records from the last trading day so there is no overlap with the database.
    df_pnf_update_load = df_pnf_data.copy()  
    df_pnf_update_load['date'] = pd.to_datetime(df_pnf_update_load['date'])
    df_pnf_update_load['last_updated_date'] = pd.to_datetime(df_pnf_update_load['date'].max()).normalize()
    df_pnf_update_load = df_pnf_update_load[df_pnf_update_load['date'] != lastPnFUpdate.strftime('%Y-%m-%d')]
    
    print("The shape of the dataframe to load is ", df_pnf_update_load.shape)
    
    # Confirm that the date range for the new data is what you expect to see.
    startDate = df_pnf_update_load['date'].min().strftime('%Y-%m-%d')
    endDate = df_pnf_update_load['date'].max().strftime('%Y-%m-%d')
    print("The date range of the dataframe to load goes from ", startDate, " to ", endDate)
    
    return df_pnf_update_load


In [10]:
# Push the dataframe to CSV on S3 if you want to use AWS Lambda to take it from there and push it into 
# the RDS table.

@task
def push_data_to_S3(df_pnf_update_load):

    import io
    
    if len(df_pnf_update_load) > 0:

        # Create the AWS client
        client = boto3.client(
            's3',
            aws_access_key_id = aws_key,
            aws_secret_access_key = aws_secret_key,
            region_name = 'us-east-1'
        )

        myBucket = 'bns-intrinio-data'
        myFileLocation = "price-data-daily/df_pnf_update_load_" + todayDate.strftime('%Y-%m-%d') + ".csv"

        with io.StringIO() as csv_buffer:
            df_pnf_update_load.to_csv(csv_buffer, index=False)

            response = client.put_object(
                Bucket = myBucket, Key = myFileLocation, Body=csv_buffer.getvalue()
            )

            status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

            if status == 200:
                print(f"Successful S3 put_object response. Status - {status}")
            else:
                print(f"Unsuccessful S3 put_object response. Status - {status}")


In [11]:
# Use SQLAlchemy to push the final dataframe into SQL DB on AWS RDS:

@task
def push_data_to_RDS(df_pnf_update_load):
    
    if len(df_pnf_update_load) > 0:

        # Set database credentials.
        creds = {'usr': rds_user,
                 'pwd': rds_password,
                 'hst': rds_host,
                 'prt': 3306,
                 'dbn': rds_database}

        # MySQL conection string.
        connstr = 'mysql+mysqlconnector://{usr}:{pwd}@{hst}:{prt}/{dbn}'

        # Create sqlalchemy engine for MySQL connection.
        engine = create_engine(connstr.format(**creds))

        # Write DataFrame to MySQL using the engine (connection) created above.
        df_pnf_update_load.to_sql(name='base_pnf_data_historical', 
                                              con=engine, 
                                              if_exists='append', 
                                              index=False)

        print("The new data has been appended to RDS. The number of new rows added is", df_pnf_update_load.shape[0])


In [None]:
# Set up the daily run schedule.

schedule = IntervalSchedule(
    start_date=pendulum.datetime(2021, 12, 19, 21, 0, 0, tz="America/New_York"),
    interval=timedelta(days=1)
)

In [None]:
# Run the ETL update flow.

if __name__ == "__main__":

    with Flow("Stock-Data-Update-ETL", schedule) as flow:
        
        get_max_pnf_date = get_max_pnf_date()
        get_price_data = get_price_data(todayDate, lastPnFUpdate, upstream_tasks=[get_max_pnf_date])
        get_last_records = get_last_records(lastPnFUpdate, upstream_tasks=[get_price_data])
        join_records = join_records(df_price_data, df_last_records, upstream_tasks=[get_last_records])
        run_all_calcs = run_all_calcs(df_pnf_update, upstream_tasks=[join_records])

        push_data_to_S3 = push_data_to_S3(df_pnf_update_load,upstream_tasks=[run_all_calcs])
        push_data_to_RDS = push_data_to_RDS(df_pnf_update_load, upstream_tasks=[run_all_calcs])

    flow.set_reference_tasks([push_data_to_RDS])
    

In [None]:
flow.run()

In [None]:
# Test the update process

get_max_pnf_date.run()
get_price_data.run(todayDate, lastPnFUpdate)
get_last_records.run(lastPnFUpdate)
join_records.run(df_price_data, df_last_records)
run_all_calcs.run(df_pnf_update)

push_data_to_S3.run(df_pnf_update_load)
push_data_to_RDS.run(df_pnf_update_load)
