# Project 2 - Trading Meme Stocks

## Imports, environment variables and keys 

In [69]:
import requests
import os
import pandas as pd
import numpy as np

import math

import time
import hvplot.pandas
from pathlib import Path
from datetime import date
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from collections import Counter
from pandas.tseries.offsets import DateOffset
from pandas.tseries.offsets import Hour
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler
import joblib



# Basic functionalities
import json


# datetime manipulation
import datetime as dt
from time import sleep
from datetime import timedelta

# Deep learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Deep learning model persistence
from tensorflow.keras.models import model_from_json

# Graphing

import matplotlib.pyplot as plt
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
%matplotlib inline  

In [None]:
# Load environment variables
load_dotenv()

#Set API Key from env variable
ss_key=os.getenv("SS_API_KEY")

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

## Analysis Helper Functions

In [None]:
def create_alpaca_connection():    
    api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    base_url = 'https://paper-api.alpaca.markets',
    api_version = "v2"
    )
    return api

In [None]:
def get_sentiment_df():
    # Create headers variable containing API Key 
    headers = {
        'Authorization' : f'Token {ss_key}',
        'Accept': 'application/json',
    }
    # Get data for daily stock sentiment from API
    sentiment_response = requests.get('https://socialsentiment.io/api/v1/stocks/sentiment/daily/', headers=headers)
    
    # Create dict from sentiment_response['results'] and then a data frame from this dict 
    sentiment_dict = sentiment_response.json()['results']
    sentiment_df = pd.DataFrame.from_dict(sentiment_dict)
    
    # Determine how many lines and then pages are in the sentiment response
    line_count = sentiment_response.json()['count']
    page_count = int(line_count / 50) + (line_count % 50 > 0)
    page=2
    
    # Loop through each page and gather the sentiment data from the API
    while page <= page_count:
        # Loop through all pages available from API and construct dataframe for sentiment data
        sentiment_url = "https://socialsentiment.io/api/v1/stocks/sentiment/daily/?page=%s"%page
        sentiment_response = requests.get(sentiment_url, headers=headers)
        sentiment_dict = sentiment_response.json()['results']
        sentiment_df_loop = pd.json_normalize(sentiment_dict)
        sentiment_df = pd.concat([sentiment_df, sentiment_df_loop], axis=0)
        page += 1
        time.sleep(1)

    # Reset the index on the sentiment df and drop the old index
    sentiment_df.reset_index(inplace=True, drop=True)
    
    # Output the sentiment data for today to a csv file
    path = (f'../Resources/sentiment_{date.today()}.csv')
    sentiment_df.to_csv(path)
    
    return sentiment_df

In [None]:
def get_sentiment_trending_df():
    # Create headers variable containing API Key 
    headers = {
        'Authorization' : f'Token {ss_key}',
        'Accept': 'application/json',
    }
    # Get data for trending stock sentiment from API
    trending_response = requests.get('https://socialsentiment.io/api/v1/stocks/trending/daily/', headers=headers)
    
    # Create dict from trending_response and then a data frame from this dict 
    trending_dict = trending_response.json()
    trending_df = pd.DataFrame.from_dict(trending_dict)
    
    # Output the sentiment data for today to a csv file
    path = (f'../Resources/sentiment_trending_{date.today()}.csv')
    trending_df.to_csv(path)
    
    return trending_df

In [None]:
def create_technical_analysis_df():
    # Create the Alpaca API object, specifying use of the paper trading account:
    api = create_alpaca_connection()
    
    # Set the list of tickers to the top class stock
    tickers = top_class_stocks.index
    # Set get data from API to DataFrame
    today = datetime.now()
    yesterday = today - timedelta(days=1)
    previous_days = today - timedelta(days=365)
    beg_date = previous_days
    end_date = yesterday
    timeframe='1Day'
    start =  pd.Timestamp(f'{beg_date} 09:30:00-0400', tz='America/New_York').replace(hour=9, minute=30, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
    end =  pd.Timestamp(f'{end_date} 16:00:00-0400', tz='America/New_York').replace(hour=16, minute=0, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
    portfolio_df = api.get_bars(tickers, timeframe, start=start, end=end).df
   
    
    # Calculate the 5,15 period high and low rolling SMAs and add each to the portfolio dataframe
    portfolio_df['Lowest_5D'] = portfolio_df.groupby('symbol')['low'].transform(lambda x: x.rolling(window = 12).min())
    portfolio_df['High_5D'] = portfolio_df.groupby('symbol')['high'].transform(lambda x: x.rolling(window = 12).max())
    portfolio_df['Lowest_15D'] = portfolio_df.groupby('symbol')['low'].transform(lambda x: x.rolling(window = 26).min())
    portfolio_df['High_15D'] = portfolio_df.groupby('symbol')['high'].transform(lambda x: x.rolling(window = 26).max())
    
    # Calculate Stochastic Indicators and add each to the portfolio dataframe
    portfolio_df['Stochastic_5'] = ((portfolio_df['close'] - portfolio_df['Lowest_5D'])/(portfolio_df['High_5D'] - portfolio_df['Lowest_5D']))*100
    portfolio_df['Stochastic_15'] = ((portfolio_df['close'] - portfolio_df['Lowest_15D'])/(portfolio_df['High_15D'] - portfolio_df['Lowest_15D']))*100
    portfolio_df['Stochastic_%D_5'] = portfolio_df['Stochastic_5'].rolling(window = 5).mean()
    portfolio_df['Stochastic_%D_15'] = portfolio_df['Stochastic_5'].rolling(window = 15).mean()
    portfolio_df['Stochastic_Ratio'] = portfolio_df['Stochastic_%D_5']/portfolio_df['Stochastic_%D_15']

    # Calculate the TP,sma, mad, cci, previous_close and TR then add each to the portfolio dataframe
    portfolio_df['TP'] = (portfolio_df['high'] + portfolio_df['low'] + portfolio_df['close']) / 3
    portfolio_df['sma'] = portfolio_df.groupby('symbol')['TP'].transform(lambda x: x.rolling(window=26).mean())
    portfolio_df['mad'] = portfolio_df['TP'].rolling(window=26).apply(lambda x: pd.Series(x).mad()) #Calculates Mean Absolute Deviation of 'TP' using a 21 period and returns a pandas series
    portfolio_df['CCI'] = (portfolio_df['TP'] - portfolio_df['sma']) / (0.015 * portfolio_df['mad'])
    portfolio_df['prev_close'] = portfolio_df.groupby('symbol')['close'].shift(1)
    portfolio_df['Actual Returns'] = portfolio_df.groupby('symbol')['close'].pct_change()
    portfolio_df['TR'] = np.maximum((portfolio_df['high'] - portfolio_df['low']),
                                np.maximum(abs(portfolio_df['high'] - portfolio_df['prev_close']), 
                                abs(portfolio_df['prev_close'] - portfolio_df['low'])))

    # Calculate the ATR12 and 26 and add each to the portfolio dataframe
    for i in portfolio_df['symbol'].unique():
        ATR_12 = []
        ATR_26 = []
        TR_data = portfolio_df[portfolio_df.symbol == i].copy()
        portfolio_df.loc[portfolio_df.symbol==i,'ATR_12'] = (TR_data['TR']).rolling(window=12).mean()
        portfolio_df.loc[portfolio_df.symbol==i,'ATR_26'] = (TR_data['TR']).rolling(window=26).mean()
    portfolio_df['ATR_Ratio'] = portfolio_df['ATR_12'] / portfolio_df['ATR_26']
        
    # Reset then set the index on the dataframe then output to csv file for later use
    portfolio_df.reset_index(inplace=True)
    portfolio_df.set_index(['symbol', 'timestamp'], inplace=True)
    path = (f'../Resources/portfolio_indicators_{date.today()}.csv')
    portfolio_df.to_csv(path)
    
    return portfolio_df

In [None]:
def create_returns_df():
    # Create the Alpaca API object, specifying use of the paper trading account:
    api = create_alpaca_connection()
    
    # Set the list of tickers to the top class stock
    tickers = top_class_stocks.index
    # Set get data from API to DataFrame
    today = datetime.now()
    yesterday = today - timedelta(days=1)
    previous_days = today - timedelta(days=30)
    beg_date = previous_days
    end_date = yesterday
    timeframe='1Day'
    start =  pd.Timestamp(f'{beg_date} 09:30:00-0400', tz='America/New_York').replace(hour=9, minute=30, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
    end =  pd.Timestamp(f'{end_date} 16:00:00-0400', tz='America/New_York').replace(hour=16, minute=0, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
    portfolio_df = api.get_bars(tickers, timeframe, start=start, end=end).df

    # Pull prices from the ALPACA API
    data = api.get_bars(tickers, timeframe, start=start, end=end).df
    
    close_df = pd.DataFrame(index=data.index)

    for ticker in tickers:
        vector = data.loc[data["symbol"] == ticker].close
        close_df[ticker] = vector

    close_df.dropna(axis=1, how='all', inplace=True)

    # Use Pandas' forward fill function to fill missing values (be sure to set inplace=True)
    close_df.ffill(inplace=True)
    
    # Define a variable to set prediction period
    forecast = 1

    # Compute the pct_change for 1 min 
    returns = close_df.pct_change(periods=forecast)

    # Shift the returns to convert them to forward returns
    returns = returns.shift(-(forecast))
    returns.dropna(inplace=True)
    path = (f'../Resources/returns_{date.today()}.csv')
    returns.to_csv(path)
    
    return returns

## Fundemental Analysis

In [None]:
sentiment_df = get_sentiment_df()
sentiment_df.head()

In [None]:
sentiment_trending_df = get_sentiment_trending_df()
sentiment_trending_df.head()

## Fundemental Analysis - Visualisations

In [None]:
sentiment_trending_plot_df = sentiment_trending_df.set_index("stock")
sentiment_trending_plot_df["score"].plot(
    kind='bar',
    x='stock',
    y='score', 
    title = "Trending Stock Sentiment Scores",
    figsize=(20,10)
)

#cumulative_returns.plot(figsize=(20,10), title = "Cumulative Returns")

In [None]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
stock_name = pd.DataFrame(sentiment_trending_df['stock'])
stock_name.head(3)

In [None]:
#Prepare Data
X = sentiment_trending_df.set_index("stock")
X.head(3)

In [None]:
# Standardize data
X_scaled = StandardScaler().fit_transform(X)
X_scaled[0:5]

In [None]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components=3)
sentiment_pca = pca.fit_transform(X_scaled)

In [None]:
# Create a DataFrame with the principal components data
pcs_df = pd.DataFrame(
    data=sentiment_pca, columns=["PC 1", "PC 2", "PC 3"], index=X.index
)
pcs_df.head(10)

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [None]:
# Initialize the K-Means model
model = KMeans(n_clusters=5, random_state=0)

# Fit the model
model.fit(pcs_df)

# Predict clusters
predictions = model.predict(pcs_df)

# Create a new DataFrame including predicted clusters and cryptocurrencies features
clustered_df = pd.DataFrame({
    "score": X.score,
    "positive_score": X.positive_score,
    "negative_score": X.negative_score,
    "activity": X.activity,
    "activity_avg_7_days": X.activity_avg_7_days,
    "activity_avg_14_days": X.activity_avg_14_days,
    "activity_avg_30_days": X.activity_avg_30_days,
    "score_avg_7_days": X.score_avg_7_days,
    "score_avg_14_days": X.score_avg_14_days,
    "score_avg_30_days": X.score_avg_30_days,
    "PC 1": pcs_df['PC 1'],
    "PC 2": pcs_df['PC 2'],
    "PC 3": pcs_df['PC 3'],
    "Class": model.labels_,
    },
    index=X.index
)
clustered_df.head()


In [None]:
# Plotting the 3D-Scatter with x="PC 1", y="PC 2" and z="PC 3"
fig = px.scatter_3d(
    clustered_df,
    x="PC 1",
    y="PC 2",
    z="PC 3",
    hover_name='score',
    hover_data= ['activity'],
    height=600,
    color="Class"
)
fig.show()

In [None]:
top_stocks = clustered_df.sort_values("score", ascending=False)

In [None]:
# Get all stocks in the top class
top_class = top_stocks["Class"].mode()
top_class_stocks = top_stocks.loc[top_stocks["Class"] == top_class[0]]
top_class_stocks

## Technical Analysis

In [None]:
portfolio_df = create_technical_analysis_df()
portfolio_df.head()

In [None]:
#Note this .drop function automatically moves the 'symbol' column to create a multi-level index once row 6 is dropped from original df
technicals = portfolio_df[["Stochastic_Ratio","CCI","ATR_Ratio","close"]]
technicals.tail()

---

# Machine Learning - LSTM Neural Network

### Preprocessing for LSTM

Dataframe (stock_df) specified below needs to be close prices with all indicators (fundamental + technical) to be passed as features. 

In [72]:


# Converting feature and targets into arrays readable by the LSTM network
def scale_array(features, target, train_proportion:float = 0.8, scaler: bool = True):
    
    '''
    Prepares four arrays for training within an LSTM neural network. 
    Returns the following objects
    X_train: training set for features
    X_test: testing set for features
    y_train: training set for target(s)
    y_test: testing set for target(s)
    scaler: sklearn MinMaxScaler with fit memory required for inverse transformation of model predictions
    
    Parameters:    
    :features: Pandas dataframe or Series object containing model features
    :target: Pandas dataframe or Series object containing moel target(s)
    :train: Proportion of data to be assigned to train set. The rest will be assigned to test set.
    :scaler: Boolean. Default = True.If False, data will not be scaled.
    '''
    # Convert features and target to arrays
    X = np.array(features)
    y = np.array(target).reshape(-1,1)
    
    # Manually splitting the data
    split = int(0.8 * len(X))
    X_train = X[: split]
    X_test = X[split:]
    y_train = y[: split]
    y_test = y[split:]

    if scaler:
        # Create a MinMaxScaler object
        scaler = MinMaxScaler()

        # Fit the MinMaxScaler object with the features data X
        scaler.fit(X_train)

        # Scale the features training and testing sets
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        # Fit the MinMaxScaler object with the target data Y
        scaler.fit(y_train)

        # Scale the target training and testing sets
        y_train = scaler.transform(y_train)
        y_test = scaler.transform(y_test)
    else:
        pass
    
    # Reshape the features data to pass into LSTM
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    return X_train, X_test, y_train, y_test, scaler

# Creating the LSTM neural network layout

def create_LSTM_model(
    train_set: np.ndarray,
    dropout: float = 0.2,
    layer_one_dropout: float = 0.6,
    number_layers: int = 4,
    optimizer: str = 'adam',
    loss: str = 'mean_squared_error'):
    
    '''
    Initialises a multilayer LSTM neural network, with number of units in the first layer being equal to the number of features. Number of layers is default 4, but can be specified by user.
    Each layer is accompanied by a Dropout with a rate of 0.6 for the first layer and a default of 0.2 for subsequent layers.
    After the first layer, number of units in each LSTM are reduced to 2/3 the initial size.
    '''

    # Define the LSTM RNN model.
    model = Sequential()

    # Initial model setup
    number_units = X_train.shape[1]
    dropout_fraction = dropout


    # Layer 1
    model.add(LSTM(
        units=number_units,
        return_sequences=True,
        input_shape=(X_train.shape[1], 1))
        )
    model.add(Dropout(layer_one_dropout))

    # Intiialize layer counter
    layer_counter = 1
    
    # 'While' loop to keep adding layers until number of layers meet user specifications. Condition is "< - 1" because of need for penultimate layer not to have "return_sequences = True".
    while layer_counter < (number_layers - 1):
        # Layer 2 to n
        model.add(LSTM(units=number_units, return_sequences = True))
        model.add(Dropout(dropout_fraction))
        layer_counter+=1

    # Penultimate layer
    model.add(LSTM(units=number_units))
    model.add(Dropout(dropout_fraction))

    # Output layer
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer=optimizer, loss=loss)
    
    return model

# Function to calculate returns according to strategy, and reappend to existing prices dataframe
def calculate_strategy_returns(prices_df, trading_threshold, shorting: bool = False):
    '''
    prices_df: pd.DataFrame containing an 'Actual' and 'Predicted' column representing actual and model-predicted prices respectively
    
    '''
     # Calculate actual daily returns
    prices_df['actual_returns'] = prices_df['Actual'].pct_change()
    # Create a 'last close' column
    prices_df['last_close'] = prices_df['Actual'].shift()
    # Calculate the predicted daily returns, by taking the predicted price as a proportion of the last close
    prices_df['predicted_returns'] = (prices_df['Predicted'] - prices_df['last_close'])/prices_df['last_close']

    # Actual signal = 1 if actual returns more than threshold,  -1 if less than threshold
    prices_df['actual_signal'] = 0
    prices_df.loc[prices_df['actual_returns'] > trading_threshold , 'actual_signal'] = 1
    if shorting == True:
        prices_df.loc[prices_df['actual_returns'] < -trading_threshold , 'actual_signal'] = -1

    # Strategy signal = 1 if predicted returns > threshold, -1 if less than threshold
    prices_df['strategy_signal'] = 0
    prices_df.loc[prices_df['predicted_returns'] > trading_threshold , 'strategy_signal'] = 1
    if shorting == True:
        prices_df.loc[prices_df['predicted_returns'] < -trading_threshold , 'strategy_signal'] = -1       

    # Compute strategy returns
    prices_df['strategy_returns'] = prices_df['actual_returns'] * prices_df['strategy_signal']
    
    return prices_df


# Function to calculate RMSE. 'math` library needed. 
def calculate_RMSE(y_actual, y_predicted):
    MSE = np.square(np.subtract(y_actual, y_predicted)).mean()
    RMSE = math.sqrt(MSE)
    return RMSE



### Predefine parameters for LSTM network training


In [74]:

# Set target cumulative returns as a threshold for model to achieve.
target_cumulative_return = 1.01

# Set returns threshold for strategy to fire trading signal
trading_threshold = 0.00

# Set maximum numberof iterations to run
max_iter = 3


### LSTM `for` loop to train models for each candidate ticker



In [78]:
tickers = technicals.index.get_level_values('symbol').unique().to_list()

# Initialise list to hold tickers that have successfully trained models that achieve the target cumulative returns:
modelled_tickers = []
trading_signals = []

for ticker in tickers:
    print("="*50)
    print(f"Initialising training for {ticker}")

    # Create signal dataframe as a copy
    signal = technicals.copy().loc[ticker].dropna()
    
    # Create blank row for current trading day and append to end of dataframe
    most_recent_timestamp = signal.index.get_level_values('timestamp').max() + timedelta(minutes = 1)
    signal.loc[most_recent_timestamp, ['target']] = np.nan

    # # Create target
    signal['target'] = signal['close'] 

    # Shift indicators to predict current trading day close
    signal.iloc[:, :-1]  = signal.iloc[:, :-1].shift()

    # Drop first row with NaNs resulting from data shift
    signal = signal.iloc[1:, :]

    # Ensure all data is 'float' type while also dropping null values due to value shifts and unavailable NaN indicator data.
    signal = signal.astype('float')

    # Set features and target
    X = signal.iloc[:, :-1]
    y = signal['target']
      
    # Use predefined scale_array function to transform data and perform train/test split
    X_train, X_test, y_train, y_test, scaler = scale_array(X, y, train_proportion = 0.8)

    # Record start time
    start_time = time.time()
    
    # (Re)set iter_counter and strategy_cumulative_return to 0 
    strategy_cumulative_return = 0
    iter_counter = 0

    # While loop that repeatedly trains LSTM models to adjust weights until it can hit the target cumulative return. Loop stops if max_iter is hit or if returns are achieved on backtesting
    while strategy_cumulative_return < target_cumulative_return and iter_counter != max_iter:
        
        strategy_cumulative_return = 0
        # Start iteration counter
        iter_counter+=1

        # Create model if first iteration. Reset model if subsequent iterations
        model = create_LSTM_model(X_train,
                                  dropout=0.4,
                                  layer_one_dropout=0.6,
                                  number_layers=6
                                 )

        # Set early stopping such that each iteration stops running epochs if validation loss is not improving (i.e. minimising further)
        callback = EarlyStopping(
            monitor='val_loss',
            patience=20, mode='auto',
            restore_best_weights=True
        )

        # Print message to allow visual confirmation of iteration training is currently at.
        print("="*50)
        print(f"Training {ticker} model iteration {iter_counter} ...please wait.\n")

        # Train the model
        history = model.fit(
            X_train, y_train,
            epochs=1000, batch_size=32,
            shuffle=False,
            validation_split = 0.1,  
            verbose = 0,
            callbacks = callback
        )
        # Print confirmation that current iteration has ended.
        print(f"Iteration {iter_counter} ended.")

        # Evaluate loss when predicting test data. Sliced out entry -1 as y_test[-1] target is NaN 
        model_loss = model.evaluate(X_test[:-1], y_test[:-1], verbose=0)
    
        # Make predictions
        predicted = model.predict(X_test)

        # Recover the original prices instead of the scaled version
        predicted_prices = scaler.inverse_transform(predicted)
        real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

        # Create a DataFrame of Real and Predicted values
        prices = pd.DataFrame({
            "Actual": real_prices.ravel(),
            "Predicted": predicted_prices.ravel()
        }, index = signal.index[-len(real_prices): ]) 

        # Use predefined calculate_strategy_returns function to calculate and append strategy returns column to 'prices' dataframe
        prices = calculate_strategy_returns(prices, trading_threshold, shorting = False)
        
        
        # Compute strategy cumulative returns
        strategy_cumulative_return = (1+prices['strategy_returns']).cumprod()[-1]
        
        rmse = calculate_RMSE(prices['Actual'], prices['Predicted'])
        
        # Print performance metrics of the model given the feature weights produced by current iteration
        print(f"LSTM Method iteration {iter_counter} for {ticker} - Performance")
        print("-"*50)
        print(f"Model loss on testing dataset: \n{model_loss:.4f}")
        print(f"RMSE: \n{rmse:.4f}")
        print(f"Cumulative return on testing dataset: \n{strategy_cumulative_return:.4f}")
    
    # Append ticker to modelled_tickers:
    modelled_tickers.append(ticker)
    
    if strategy_cumulative_return >= target_cumulative_return:
        print(f"Target cumulative returns achieved\n")
        # Calculate cumulative returns at their best and worst time points over time.
        min_return = (1+prices['strategy_returns']).cumprod().min()
        max_return = (1+prices['strategy_returns']).cumprod().max()

        
        # Print cumulative return performance
        print(f"From {prices.index.min()} to {prices.index.max()}, the cumulative return of the current model is {strategy_cumulative_return:.2f}.")
        print(f"At its lowest, the model recorded a cumulative return of {min_return:.2f}.")
        print(f"At its highest, the model recorded a cumulative return of {max_return:.2f}.")  
        
        # Convert model to json
        model_json = model.to_json()

        # Save model layout as json
        file_path = Path(f"../LSTM_model_weights/{ticker}.json")
        with open(file_path, "w") as json_file:
            json_file.write(model_json)

        # Save weights
        model.save_weights(f"../LSTM_model_weights/{ticker}.h5")
        
        # Append the trading signal predicted by model
        trading_signals.append(prices.loc[prices.index.max(), 'strategy_signal'])

    else:
        print(f"The LSTM model was not able to achieve the target cumulative returns on the testing dataset within {max_iter} iterations.\n")
        trading_signals.append(0)


print("*"*50)
print(f"Training completed.")

    

Initialising training for ADBE
Training ADBE model iteration 1 ...please wait.

Iteration 1 ended.
LSTM Method iteration 1 for ADBE - Performance
--------------------------------------------------
Model loss on testing dataset: 
0.2099
RMSE: 
99.0676
Cumulative return on testing dataset: 
0.8533
Training ADBE model iteration 2 ...please wait.

Iteration 2 ended.
LSTM Method iteration 2 for ADBE - Performance
--------------------------------------------------
Model loss on testing dataset: 
0.2079
RMSE: 
98.6081
Cumulative return on testing dataset: 
0.8533
Training ADBE model iteration 3 ...please wait.

Iteration 3 ended.
LSTM Method iteration 3 for ADBE - Performance
--------------------------------------------------
Model loss on testing dataset: 
0.2139
RMSE: 
100.0215
Cumulative return on testing dataset: 
0.8533
The LSTM model was not able to achieve the target cumulative returns on the testing dataset within 3 iterations.

Initialising training for BCS
Training BCS model iterati

In [80]:
# Produce trading_signal df.

trading_dict = {'symbol':modelled_tickers, 'trading_signal':trading_signals}
trading_signals_df = pd.DataFrame.from_dict(trading_dict).set_index('symbol')

trading_signals_df

Unnamed: 0_level_0,trading_signal
symbol,Unnamed: 1_level_1
ADBE,0
BCS,0
CRON,0
CRWD,1
DOCU,0
FNKO,1
GME,0
NIO,0
NKLA,0
ORCL,0


## Graphs

Once we have a model that generates the desired cumulative returns, we can print the graph for further visual confirmation that this is a suitable algo. 

Three graphs here 
- The loss metric from the training history of the eligible model
- Predicted prices vs Actual prices
- Strategy cumprod vs Actual cumprod.

In [None]:
# Plot validation loss versus training loss

plt.plot(history.history['loss'], 'r', label='Training loss')
plt.plot(history.history['val_loss'], 'g', label='Validation loss')
plt.title('Training VS Validation loss')
plt.xlabel('No. of Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:

# Plot the real vs predicted prices as a line chart
price_fig = px.line(prices, y = ['Actual', 'Predicted'],  title = "Actual vs Predicted", width= 1500, height = 600)
price_fig.show()

In [None]:
# Plot strategy cumulative returns
strategy_cumulative_returns = (1+prices['strategy_returns']).cumprod()
actual_cumulative_returns = (1+prices['actual_returns']).cumprod()
cumulative_returns_df = pd.concat([strategy_cumulative_returns, actual_cumulative_returns], join = "inner", axis = "columns")

cumulative_returns_fig = px.line(
    cumulative_returns_df,
    y = ['strategy_returns', 'actual_returns'],
    x = cumulative_returns_df.index.values,
    title = f'Strategy  vs Actual Returns',
    width = 1500, height = 600
)

cumulative_returns_fig.show()


---

## Forward Prediction and Trading Signals given saved model weights

### Model Persistence (Load)

Note that I've set up the file name here to save as (ticker_name).json.

In [None]:
# Load json and create mdoel
file_path = Path(f"../LSTM_model_weights/{ticker}.json")
with open (file_path, "r") as json_file:
    model_json = json_file.read()
loaded_model = model_from_json(model_json)

# Load weights into new model
file_path = f"../LSTM_model_weights/{ticker}.h5"
loaded_model.load_weights(file_path)

# Visual confirmation of model setup
print(loaded_model.summary())

In [None]:
# Make predictions with model
predicted = loaded_model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted)

### Develop the Algorithm


#### Use the provided code to ping the Alpaca API and create the DataFrame needed to feed data into the model.
   * This code will also store the correct feature data in `X` for later use.

In [None]:
top_class_stocks_buy = pd.DataFrame(index=top_class_stocks.index)
top_class_stocks_buy["buy"] = [1,1,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1]
top_class_stocks_buy = top_class_stocks_buy.loc[top_class_stocks_buy["buy"] == 1]
top_class_stocks_buy

In [None]:
# Get tickers for top class stocks from FA
tickers = top_class_stocks_buy.index

# declare begin and end date strings
beg_date = '2022-03-08'
end_date = '2022-03-08'
# we convert begin and end date to formats that the ALPACA API requires
start =  pd.Timestamp(f'{beg_date} 09:30:00-0400', tz='America/New_York').replace(hour=9, minute=30, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
end   =  pd.Timestamp(f'{end_date} 16:00:00-0400', tz='America/New_York').replace(hour=16, minute=0, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
# We set the time frequency at which we want to pull prices
timeframe='1Min'

# Pull prices from the ALPACA API
api = create_alpaca_connection()
stocks_ohlcv = api.get_bars(tickers, timeframe, start=start, end=end).df

In [None]:
stocks_ohlcv

#### Using the `top_class_stocks_buy` filter, create a dictionary called `buy_dict` and assign 'n' to each Ticker (key value) as a placeholder.

In [None]:
# Create dictionary from y_pred and assign a 'n' to each of them for now as a placeholder.
buy_dict = dict.fromkeys(top_class_stocks_buy.index.get_level_values(0), 'n')
buy_dict

#### Obtain the total available equity in your account from the Alpaca API and store in a variable called `total_capital`. You will split the capital equally between all selected stocks per the CIO's request.

In [None]:
# Pull the total available equity in our account from the  Alpaca API
api = create_alpaca_connection()
account = api.get_account()
total_capital = float(account.equity)
print(f"Total available capital: {total_capital}")

In [None]:
# Compute capital per stock, divide equity in account by number of stocks
# Use Alpaca API to pull the equity in the account
if len(buy_dict) > 0:
    capital_per_stock = float(total_capital)/ len(buy_dict)
else:
    capital_per_stock = 0
print(f'Capital per stock: {capital_per_stock}')

#### Use a for-loop to iterate through `buy_dict` to determine the number stocks you need to buy for each ticker.

In [None]:
# Use for loop to iterate through dictionary of buys 
# Determine the number stocks we need to buy for each ticker
for ticker in buy_dict:
    try:
        buy_dict[ticker] = int(capital_per_stock / close_df[ticker].iloc[-1])
    except:
        pass

print(buy_dict)

#### Cancel all previous orders in the Alpaca API (so you don't buy more than intended) and sell all currently held stocks to close all positions.

In [None]:
# Cancel all previous orders in the Alpaca API
api.cancel_all_orders()

# Sell all currently held stocks to close all positions
api.close_all_positions()

#### Iterate through `buy_dict` and send a buy order for each ticker with their corresponding number of shares.

In [None]:
# Iterate through the buy_dict object and send a buy order for each ticker with a corresponding number of shares:
for stock, qty in buy_dict.items():    
    # Submit a market order to buy shares as described in buy_dict
    api.submit_order(
        symbol=stock,
        qty=qty,
        side='buy',
        type='market',
        time_in_force='gtc',
    )
    print(f'buying {stock} numShares {qty}')
    

### Automate the algorithm

#### Create `trade()` function that incorporates all of the steps above.

In [None]:
# Add all of the steps conducted above into the function trade
#def trade():
## TO DO
tickers = top_class_stocks.index
# Notice that we remove the start and end variables since we want the latest prices.
timeframe='1Min'
# Use iloc to get the last 10 mins every time we pull new data
prices = api.get_barset(ticker_list, "minute").df.iloc[-11:]
prices.ffill(inplace=True)   
prices

In [None]:
tickers

#### mport Python's schedule module.

In [None]:
# Import Python's schedule module 
# All imports have been completed at the start of the code

#### Use the "schedule" module to automate the algorithm:
* Clear the schedule with `.clear()`.
* Define a schedule to run the trade function every minute at 5 seconds past the minute mark (e.g. `10:31:05`).
* Use the Alpaca API to check whether the market is open.
* Use run_pending() function inside schedule to execute the schedule you defined while the market is open

In [None]:
# Clear the schedule
schedule.clear()

# Define a schedule to run the trade function every minute at 5 seconds past the minute mark (e.g. 10:31:05)
trade_schedule = schedule.every().minute.at(":05").do(trade)

# Use the Alpaca API to check whether the market is open
clock = api.get_clock()

# Use run_pending() function inside schedule to execute the schedule you defined as long as the market is open
while clock.is_open == True:
    print(f'The market trading widow for {clock.next_open} is open, executing trade function')
    schedule.run_pending()
    time.sleep(1)
else:
    print(f'The market is closed the next open market day will be {clock.next_open}')


In [None]:
# Get Scheduled Jobs
schedule.get_jobs()