In [49]:
#import relevant libraries 
import pandas as pd
import plotly.express as px
import panel as pn
import hvplot.pandas
import requests
import json
import numpy as np
import matplotlib.pyplot as plt
import json
from pathlib import Path
import alpaca_trade_api as tradeapi
from dotenv import load_dotenv
import os
%matplotlib inline
import nltk
nltk.download('vader_lexicon')
from datetime import datetime, timedelta,date
from pandas import DataFrame
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

#from streamz import Stream
#from streamz.dataframe import DataFrame as streamz_df
#import hvplot.streamz
#import hvplot.streamz
#from streamz.dataframe import DataFrame

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\melis\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [50]:
## Get Covid Case Count Data
#using url for covid 19 api united states case counts confirmed
request_url = "https://api.covid19api.com/total/dayone/country/united-states/status/confirmed"

def get_case_count(request_url):
    # Submit request and format output
    response_data = requests.get(request_url).json()
    states_json = json.dumps(response_data, indent=4)
    case_count_df = pd.read_json(request_url)
    #clean data
    case_count_df.drop(columns = ['CountryCode', 'Province', 'City', 'CityCode', 'Lat','Lon'], inplace = True)
    #update date time stamp to just date
    case_count_df['Date']=case_count_df['Date'].dt.date
    case_count_df.set_index('Date', inplace = True)
    #add percent change column
    case_pct_change = case_count_df['Cases'].pct_change()
    case_count_df['Pct_Change']= case_pct_change
    #add 7 day rolling moving average
    case_rolling = case_count_df['Cases'].rolling(window = 7).mean()
    case_count_df['7day_Rolling'] = case_rolling
    #drop nulls
    case_count_df.dropna(inplace = True)
    return case_count_df

In [51]:
case_count_df=get_case_count(request_url)


In [52]:
## Get Covid 19 News Data
# Read api key environment variable for news api
load_dotenv()
newsapi_key = os.getenv("newsapikey")

import newsapi
from newsapi import NewsApiClient

#get news articles on covid 19
# Create a newsapi client
newsapi = NewsApiClient(api_key=newsapi_key)
    
    

In [53]:
## Get Market Data for S&P500 
#engage API keys by activating .env file for Alpaca Api
load_dotenv()
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

def get_SP500_data(api):
    #load in historical data for S&P 500
    sp500_df = api.alpha_vantage.historic_quotes('SPY', adjusted=True, output_format='pandas')
    
    #Clean Data
    #Select the column we need,  "adjusted close", and drop the others
    sp500_df = sp500_df['5. adjusted close']
    #Sort earliest to latest so that .pct_change() function works right.
    sp500_df.sort_index(inplace=True, ascending=True)

    #create a dataframe column for the daily returns (pct_change) values and concat with SP500 close
    returns_df = sp500_df.pct_change()
    all_returns_df = pd.concat([sp500_df, returns_df], axis="columns", join="inner")

    #Change column names to avoid confusion
    columns = ['S&P 500 close','S&P 500 Daily Returns']
    all_returns_df.columns = columns

    # Drop nulls
    all_returns_df.dropna(inplace=True)

    #drop duplicates
    all_returns_df.drop_duplicates(inplace=True) 
    return all_returns_df

In [54]:
all_returns_df=get_SP500_data(api)

In [55]:
def covid_case_count(case_count_df):
    case_count_signals_df = case_count_df.loc[:, ['Cases']].copy()
    short_window = 7
    long_window = 30

    case_count_signals_df['7day % Change'] = case_count_signals_df['Cases'].pct_change().rolling(short_window).mean()
    case_count_signals_df['30day % Change'] = case_count_signals_df['Cases'].pct_change().rolling(long_window).mean()

    case_count_signals_df['Case Count Signal'] = 0.0
    case_count_signals_df['Case Count Signal'][short_window:] = np.where(case_count_signals_df['7day % Change'][short_window:] > case_count_signals_df['30day % Change'][short_window:], 1.0, -1.0)
    case_count_signals_df['Case Entry/Exit']=case_count_signals_df['Case Count Signal'].diff()
    return case_count_signals_df
    

In [56]:
def covid_sentiment(newsapi):
    daily_signal_dict={}
    daily_sentiment_dict={}

    for i in range(30):

        # Set start and end datetimes of for 1 days of news
        end_date = datetime.now()
        start_date = end_date + timedelta(-i)
        end_date=end_date.strftime("%Y-%m-%d")
        start_date=start_date.strftime("%Y-%m-%d")

        # Fetch the Covid19 news articles
        covid19_news = newsapi.get_everything(
        from_param=start_date,
        to=end_date,
        q="Covid 19",
        language="en",
        page_size=100,
        sort_by="relevancy"
        )
    

        #covid-19 sentiment list to dataframe
        covid19_sentiment_list=[]
        for article in covid19_news["articles"]:        
            try: 
                text = article["content"]
                sentiment = analyzer.polarity_scores(text)
                compound = sentiment["compound"]
                pos = sentiment["pos"]
                neu = sentiment["neu"]
                neg = sentiment["neg"]
                scores={"Compound":compound, "Negative":neg, "Neutral":neu, "Positive":pos, "text":text}
                covid19_sentiment_list.append(scores)
        
            except: 
                pass

        covid19_sentiment_df=pd.DataFrame(covid19_sentiment_list)
    

        # Describe the  Sentiment Related to Covid19/Coronavirus
        sentiment_df=covid19_sentiment_df.describe()


        #determine trading signal value based on covid news sentiment 
        #if covid19_sentiment_df['Neutral'][i] > (covid19_sentiment_df['Positive'][i] and covid19_sentiment_df['Negative'][i]):
         #   sentiment_signal=0.0
          #  sentiment='Neutral'
        if covid19_sentiment_df['Negative'][i] > (covid19_sentiment_df['Positive'][i]): #and covid19_sentiment_df['Neutral'][i]):
            sentiment_signal=0.0
            sentiment='Negative'
        elif covid19_sentiment_df['Positive'][i] > (covid19_sentiment_df['Negative'][i]): #and covid19_sentiment_df['Netural'][i]):
            sentiment_signal=1.0
            sentiment='Positive'

        daily_signal_dict.update({start_date:sentiment_signal})
        daily_sentiment_dict.update({start_date:sentiment})
   
    #convert dictionary to dataframe     
    daily_signal_df=DataFrame.from_dict(daily_signal_dict,orient='index',columns=['Sentiment Signal'])
    daily_sentiment_df=DataFrame.from_dict(daily_sentiment_dict,orient='index',columns=['Sentiment'])

    sentiment_signal_df=pd.concat([daily_signal_df,daily_sentiment_df],join='inner', axis=1)
    sentiment_signal_df.sort_index(inplace=True, ascending=True)

    # Calculate when a entry or exit should be made 1 or -1
    sentiment_signal_df["Sentiment Entry/Exit"] = sentiment_signal_df["Sentiment Signal"].diff()

    return sentiment_signal_df


In [57]:
def get_EMA(all_returns_df):
    # Grab just the `date` and `close` from the dataset
    sp500_signals = all_returns_df.loc[:, ["S&P 500 close"]].copy()

    # Set the short window and long windows
    short_window = 50
    long_window = 100

    # Generate the short and long exponential moving averages (50 and 100 days, respectively)
    sp500_signals["EWM50"] = sp500_signals['S&P 500 close'].ewm(span=short_window).mean()
    sp500_signals["EWM100"] = sp500_signals['S&P 500 close'].ewm(span=long_window).mean()
    sp500_signals["Signal"] = 0.0

    # Generate the trading signal 0 or 1,
    # where 0 is when the EWM50 is under the EWM100, and
    # where 1 is when the EWM50 is higher (or crosses over) the SMA100
    sp500_signals["Signal"][short_window:] = np.where(
    sp500_signals["EWM50"][short_window:] > sp500_signals["EWM100"][short_window:], 1.0, 0.0
)
    # Calculate the points in time at which a position should be taken, 1 or -1
    sp500_signals["Entry/Exit"] = sp500_signals["Signal"].diff()

    sp500_signals.head(10)# Grab just the `date` and `close` from the dataset
    sp500_signals = all_returns_df.loc[:, ["S&P 500 close"]].copy()

    # Set the short window and long windows
    short_window = 50
    long_window = 100

    # Generate the short and long exponential moving averages (50 and 100 days, respectively)
    sp500_signals["EWM50"] = sp500_signals['S&P 500 close'].ewm(span=short_window).mean()
    sp500_signals["EWM100"] = sp500_signals['S&P 500 close'].ewm(span=long_window).mean()
    sp500_signals["Signal"] = 0.0

    # Generate the trading signal 0 or 1,
    # where 0 is when the EWM50 is under the EWM100, and
    # where 1 is when the EWM50 is higher (or crosses over) the SMA100
    sp500_signals["Signal"][short_window:] = np.where(
    sp500_signals["EWM50"][short_window:] > sp500_signals["EWM100"][short_window:], 1.0, 0.0
)
    # Calculate the points in time at which a position should be taken, 1 or -1
    sp500_signals["EMA Entry/Exit"] = sp500_signals["Signal"].diff()

    return sp500_signals

In [58]:
case_count_signals_df=covid_case_count(case_count_df)
case_count_signals_df.head()

Unnamed: 0_level_0,Cases,7day % Change,30day % Change,Case Count Signal,Case Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-28,5,,,0.0,
2020-01-29,5,,,0.0,0.0
2020-01-30,5,,,0.0,0.0
2020-01-31,7,,,0.0,0.0
2020-02-01,8,,,0.0,0.0


In [59]:
sentiment_signal_df=covid_sentiment(newsapi)
sentiment_signal_df.index = pd.to_datetime(sentiment_signal_df.index)
sentiment_signal_plot= sentiment_signal_df.hvplot(title = 'COVID 19 Sentiment Signals')
sentiment_signal_df.head()

NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 500 requests over a 24 hour period (250 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
EMA_signal_df=get_EMA(all_returns_df)
EMA_signal_df.head()

In [None]:
def get_trading_signals(case_count_signals_df,sentiment_signal_df,EMA_signal_df):
    #case_count_signals_df=case_count_signals_df.tail(29)
    #EMA_signal_df=EMA_signal_df.tail(29)
    trading_signals_df=pd.concat([case_count_signals_df, sentiment_signal_df,EMA_signal_df],axis=1,join="inner")
    trading_signals_df['Signal']=trading_signals_df['Case Count Signal']+trading_signals_df['Sentiment Signal']
    trading_signals_df['Overall Entry/Exit']=trading_signals_df['Signal'].diff()
    return trading_signals_df

In [None]:
trading_signals_df=get_trading_signals(case_count_signals_df,sentiment_signal_df,EMA_signal_df)
trading_signals_df.head()

In [None]:
# Initialize close values as features in the model
def window_data(EMA_signal_df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(EMA_signal_df) - window - 1):
        features = EMA_signal_df.iloc[i:(i + window), feature_col_number]
        target = EMA_signal_df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [None]:
# Predict Closing Prices using a 10 day window of previous closing prices
# Try a window size anywhere from 1 to 10 and see how the model performance changes
window_size = 1

# Column index 0 is the `Close` column
feature_column = 0
target_column = 0
X, y = window_data(all_returns_df.tail(200), window_size, feature_column, target_column)

In [None]:
# Use 70% of the data for training and the remaining 30% for testing
split = int(0.7 * len(X))
X_train = X[: split - 1]
X_test = X[split:]
y_train = y[: split - 1]
y_test = y[split:]

In [None]:
# Use MinMaxScaler to scale the data between 0 and 1. 
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler.fit(y)
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [None]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print (f"X_train sample values:\n{X_train[:1]} \n")
print (f"X_test sample values:\n{X_test[:1]}")

In [None]:
#initialize tensorflow and keras models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
# Build the LSTM model. 
model = Sequential()
number_units = 10
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))

In [None]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [None]:
# Summarize the model
model.summary()

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=20, shuffle=False, batch_size=1, verbose=1)

In [None]:
# Evaluate the model
model.evaluate(X_test, y_test)

In [None]:
# Make some predictions for future price
predicted = model.predict(X_test)

In [None]:
# Recover the original prices instead of the scaled version
predicted_prices = scaler.inverse_transform(predicted)
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
})
stocks.tail()

In [None]:
# Plot the real vs predicted values as a line chart
stock_plot = stocks.hvplot(title ="Real vs Predicted S&P 500 Closing Price", value_label = 'Price (USD)')
#stock_plot.set_ylabel("Price (USD)")
#stock_plot.set_xlabel("Time (Days)")
stock_plot

In [None]:
# Initialize Streaming DataFrame for Market Price Data Showing Backtested Signals

#data_stream = Stream()
#data = pd.DataFrame(all_returns_df['S&P 500 close'])
#data_stream_df = streamz_df(data_stream, example = data)
## Initialize Streaming DataFrame for Signals
#signals_stream = Stream()
#signals_data=trading_signals_df
#signals_stream_df = streamz_df(signals_stream, example=signals_data)
#data_stream_df.hvplot.scatter() 
#signals_stream_df.hvplot.scatter()


In [None]:
#def emit(i):
#    df = pd.DataFrame({'data_stream': [i], 'data':[i]})
#    data_stream.emit(df)
#for i in range(20):
#    emit(i)

In [32]:
#Plot EMA Signals Alone Against SP500 Price
# Visualize exit position relative to close price
exit = EMA_signal_df[EMA_signal_df['EMA Entry/Exit'] == -1.0]['S&P 500 close'].hvplot.scatter(
    color='red',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize entry position relative to close price
entry = EMA_signal_df[EMA_signal_df['EMA Entry/Exit'] == 1.0]['S&P 500 close'].hvplot.scatter(
    color='green',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = EMA_signal_df[['S&P 500 close']].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = EMA_signal_df[['EWM50', 'EWM100']].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
sp500_signals_plot = security_close * moving_avgs * entry * exit
sp500_signals_plot.opts(xaxis=None, title ="Entry/Exit S&P 500 Signals")

In [33]:
#Plot Sentiment Signals Alone Against SP500 Price
# Visualize exit position relative to close price
exit = trading_signals_df[trading_signals_df['Sentiment Entry/Exit'] == -1.0]['S&P 500 close'].hvplot.scatter(
    color='red',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize entry position relative to close price
entry = trading_signals_df[trading_signals_df['Sentiment Entry/Exit'] == 1.0]['S&P 500 close'].hvplot.scatter(
    color='green',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = trading_signals_df[['S&P 500 close']].hvplot(
    line_color='black',
    ylabel='Price in $',
    width=1000,
    height=400
)
# Overlay plots
sentiment_sp500_plot = security_close * entry * exit
sentiment_sp500_plot.opts(xaxis=None, title = 'Sentiment Signal Against S&P 500')

In [43]:
#Plot Case Count Signals Alone Against SP500 Price
# Visualize exit position relative to close price
exit = trading_signals_df[trading_signals_df['Case Entry/Exit'] == -1.0]['S&P 500 close'].hvplot.scatter(
    color='red',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize entry position relative to close price
entry = trading_signals_df[trading_signals_df['Case Entry/Exit'] == 1.0]['S&P 500 close'].hvplot.scatter(
    color='green',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = trading_signals_df[['S&P 500 close']].hvplot(
    line_color='black',
    ylabel='Price in $',
    width=1000,
    height=400
)
# Overlay plots
case_count_sp500_plot = security_close * entry * exit
case_count_sp500_plot.opts(xaxis=None, title = 'Case Count Signals Against S&P 500')

In [35]:
#Plot Overal Signal Against SP500 Price
# Visualize exit position relative to close price
exit = trading_signals_df[trading_signals_df['Overall Entry/Exit'] == -1.0]['S&P 500 close'].hvplot.scatter(
    color='red',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize entry position relative to close price
entry = trading_signals_df[trading_signals_df['Overall Entry/Exit'] == 1.0]['S&P 500 close'].hvplot.scatter(
    color='green',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = trading_signals_df[['S&P 500 close']].hvplot(
    line_color='black',
    y_label='Price in $',
    width=1000,
    height=400
)
# Overlay plots
overal_signal_plot = security_close * entry * exit
overal_signal_plot.opts(xaxis=None, title = 'Overal Signal Against S&P 500')



In [47]:
 ##Dashboard
predicted_column = pn.Column(
    "## Predicted Closing Price of the S&P 500 vs the Real Closing Price",
    stock_plot
    )

covid_column = pn.Column(
    "## Covid-19 Cases in United States and Covid Sentiment",
    case_count_signals_df['Cases'].hvplot(title = "Covid Cases in USA"),
    case_count_sp500_plot.opts(xaxis=None, title = 'Case Count Signals Against S&P 500')
    )

sp500_column = pn.Column(
    "## S&P 500 for 20 Years of Data",
    sp500_signals_plot.opts(xaxis=None)
    )

combined_column = pn.Column(
    '## Combined Signals Against the S&P 500', 
    overal_signal_plot.opts(xaxis=None, title = 'Overal Signal Against SP500 Price',)
)
covid_nlp_column = pn.Column(
    '## COVID-19 Sentiment & Signals',
    sentiment_signal_plot,
    case_count_signals_df['Case Count Signal'].hvplot(title = "Covid Signals")

)
sentiment_sp500_column = pn.Column(
    '## NLP Sentiment Signal with S&P 500',
    sentiment_sp500_plot.opts(xaxis=None, title = 'Sentiment Signal Against S&P 500')
)


trading_dashboard = pn.Tabs(
    ('Predicted Price Model', predicted_column),
    ('S&P 500 Signals', sp500_column),
    ('COVID-19 NLP Sentiment',covid_nlp_column),
    ('NLP Signal', sentiment_sp500_column),
    ('COVID Case Count & Signal', covid_column),
    ('Combined Signal', combined_column)
)

In [48]:
trading_dashboard.servable()
panel

