In [1]:
# Import libraries
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import mlflow
import mlflow.tensorflow
from mlflow.tracking import MlflowClient
import logging
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Functions
def get_data_tail(input_data, backward_steps, scaler):
    # Extract the relevant portion of the dataset for model inputs
    input_data_pred = input_data[len(input_data) - len(input_data.iloc[-backward_steps:,:]) - pred_span_days:].values
    # Reshape the model inputs to a 2D array with a single column
    input_data_pred = input_data_pred.reshape(-1, 1)
    # Apply the same scaling used for training data to the model inputs
    input_data_pred = scaler.transform(input_data_pred)
    return input_data_pred

# Data Lake params
silver = './data/silver/stock-prices/'
gold = './data/gold/portfolio-optimization/'
silver_table = 'stock_prices.csv'
gold_table = 'portfolio_optimization.csv'

if not os.path.exists(silver):
    os.mkdir(silver)
if not os.path.exists(gold):
    os.mkdir(gold)

In [3]:
# Read data from data lake
data = pd.read_csv(silver+silver_table)
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-05-07,75.805,76.292503,75.4925,75.934998,74.454674,115215200,AAPL
2020-05-08,76.410004,77.587502,76.072502,77.532501,76.22683,133838400,AAPL
2020-05-11,77.025002,79.262497,76.809998,78.752502,77.426292,145946400,AAPL
2020-05-12,79.457497,79.922501,77.727501,77.852501,76.541443,162301200,AAPL
2020-05-13,78.037498,78.987503,75.802498,76.912498,75.617271,200622400,AAPL


In [4]:
# Model params
ticker = ['AAPL','MSFT'] #,'AMZN','TSLA','GOOGL','GOOG','NVDA','BRK-B','META','UNH','^GSPC']
metric_to_predict = 'Adj Close'
days_to_predict = 3 # Short-term future days to predict
pred_span_days = 60 # Set the number of days used for prediction
backward_steps = 180 # Set the backward steps to go from the last observation available

# Experiment
experiment_name = 'Stock Price Prediction'
model_name = 'spp_model'
model_version = '1'

# Model pipe execution per ticker
gld_data = pd.DataFrame()
mlflow.set_tracking_uri('sqlite:///mlflow.db')
for i in ticker:
    # Model load
    model_uri = 'models:/{}/{}'.format(model_name+'_'+i.lower(), model_version)
    loaded_model = mlflow.pyfunc.load_model(model_uri)
    
    # Input data
    pred_data = pd.DataFrame(data[data['Ticker']==i][metric_to_predict])
    
    # Set scaler
    scaler = MinMaxScaler(feature_range=(0,1))
    scaler.fit_transform(pred_data.values.reshape(-1,1)) #pred_data[metric_to_predict]
    
    # Make predictions for future dates (short-term)
    for j in range(1, days_to_predict+1):
        lst=[]
        input_data_pred = get_data_tail(pred_data, backward_steps, scaler)
        last_date = pd.to_datetime(pred_data.reset_index().iloc[-1,0])+pd.DateOffset(days=1)
        lst.append(last_date)
        pred_prices = pd.DataFrame(lst, columns=['Date'])
        real_data = [input_data_pred[len(input_data_pred) - pred_span_days:len(input_data_pred), 0]]
        real_data = np.array(real_data)
        real_data = np.reshape(real_data, (real_data.shape[0], real_data.shape[1], 1))
        prediction = np.reshape(loaded_model.predict(real_data), (-1, 1))
        prediction = scaler.inverse_transform(prediction)
        pred_prices[metric_to_predict] = prediction
        pred_data = pd.concat((pred_data.reset_index(), pred_prices), axis=0).reset_index(drop=True).set_index('Date')
        
    ticker_pred_data = pd.concat([data[data['Ticker']==i][[metric_to_predict]], pred_data.iloc[-days_to_predict:,:]])
    ticker_pred_data['Ticker']=i
    gld_data = pd.concat([gld_data, ticker_pred_data])
gld_data

 - mlflow (current: 2.3.2, required: mlflow==2.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.




 - mlflow (current: 2.3.2, required: mlflow==2.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.




Unnamed: 0_level_0,Adj Close,Ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-05-07,74.454674,AAPL
2020-05-08,76.226830,AAPL
2020-05-11,77.426292,AAPL
2020-05-12,76.541443,AAPL
2020-05-13,75.617271,AAPL
...,...,...
2023-05-04,305.410004,MSFT
2023-05-05,310.649994,MSFT
2023-05-06,279.441101,MSFT
2023-05-07,280.575409,MSFT


In [None]:
# Load data to data lake (gold layer)
gld_data.to_csv(gold+gold_table)
gld_data