#### Imports

In [52]:
#supressing Pandas future warnings when indexing on datetime : https://stackoverflow.com/questions/15777951/how-to-suppress-pandas-future-warning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import requests
from configparser import ConfigParser
from io import BytesIO
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout, GRU
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.callbacks import EarlyStopping

### Historical Daily Function (From Viz_EDA_Main Notebook)

In [None]:
def alpha_historical_daily(ticker, start_date, end_date):
    # Alpha vantage API request for daily adjusted prices
    token = alpha_auth_key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&outputsize=full&symbol={ticker}&interval=5min&apikey={token}&datatype=csv'
    r = requests.get(url)

    # Converting from bytes to pandas dataframe : https://stackoverflow.com/questions/47379476/how-to-convert-bytes-data-into-a-python-pandas-dataframe
    df_daily = pd.read_csv(BytesIO(r.content))
    
    # Creating Sorted Datetime index based on inputted date range
    df_daily.set_index('timestamp', inplace=True)
    df_daily = df_daily[df_daily.index >= start_date]
    df_daily = df_daily[df_daily.index <= end_date]
    df_daily.index = pd.to_datetime(df_daily.index)
    df_daily.sort_index(inplace=True)
    df_daily.drop(columns=['dividend_amount', 'split_coefficient'], inplace=True)
    
    # Engineering Cumulative Return and Total Position Column for $100,000 Investment
    daily_returns = df_daily['adjusted_close'] / df_daily['adjusted_close'].iloc[0]      
    df_daily['cum_return'] = daily_returns
    
    # Position column to track performane (Using $100,000 as initial outlay investment)
    allocation = 100_000
    df_daily['position'] = allocation * df_daily['cum_return']
    
    # Engineering Moving Average 10 & 20 days columns
    df_daily['MA10'] = df_daily['adjusted_close'].rolling(10).mean()
    df_daily['MA20'] = df_daily['adjusted_close'].rolling(20).mean()
    
    # Engineering RSI-14 Indicator and 70/30 Bands
    ticker_rsi = df_daily.ta.rsi(close='adjusted_close', length=14, signal_indicators=True, xa=70, xb=30)
    
    # Creating Final DataFrame 
    ticker_final = pd.concat([df_daily.reset_index(drop=True),ticker_rsi.reset_index(drop=True)], axis=1)
    
    # Finalizing with a new Cumulative Return % column
    ticker_final['cum_return_percent'] = ticker_final.cum_return -1
    
    # Setting Datetime index 
    ticker_final.index = df_daily.index
    return ticker_final

### Quiver Quant Alt Data Function (From Quant_Overview Notebook)

In [None]:
def alt_dataframe(ticker, start_date, end_date):
    wsb_data = quiver.wallstreetbets(ticker ='FB')
    wsb_data['Date'] = pd.to_datetime(wsb_data['Date'])
    wsb_data.set_index('Date', inplace=True)
    congress_data = quiver.congress_trading(ticker='FB', politician=False).sort_values(by='ReportDate', ascending=True)[['ReportDate', 'Transaction']]
    congress_data['Transaction'] = congress_data['Transaction'].map({'Purchase' : 1, 'Sale' : -1})
    congress_data['ReportDate'] = pd.to_datetime(congress_data['ReportDate'])
    congress_data.set_index('ReportDate', inplace=True)
    otc_data = quiver.offexchange(ticker='FB')[['Date', 'DPI']].sort_values(by='Date', ascending=True)
    otc_data['Date'] = pd.to_datetime(otc_data['Date'])
    otc_data.set_index('Date', inplace=True)
    df = otc_data.join(congress_data)
    df2 = df.join(wsb_data)
    df2 = df2[df2.index >= f'{start_date}']
    df2 = df2[df2.index <= f'{end_date}']
    df2['Transaction'].fillna(0, inplace=True)
    return df2

### Main DataFrame for Modeling 

In [238]:
ticker = 'FB'
start_date = '2020-06-01'
end_date = '2021-12-10'

def single_stock_modeling_df(ticker, start_date, end_date):
    #historical daily prices from alpha vantage
    model_df = alpha_historical_daily(ticker, start_date, end_date)
    #dropping close since we are predicting adjusted close, no need for returns or position columns anymore for modeling
    model_df.drop(columns=['close', 'cum_return', 'position', 'cum_return_percent'], inplace=True)
    #creating dataframe using quant quiver alt data for ticker
    model_df2 = alt_dataframe(ticker, start_date, end_date)
    #combining into one master dataframe for ticker
    main_model = model_df.join(model_df2)
    #starting at 21st row because of null values for MA20 before then
    main_model = main_model[21: ]
    #removing any nulls
    main_model.dropna(inplace=True)
    return main_model

In [None]:
ticker = 'FB'
start_date = '2020-06-01'
end_date = '2021-12-10'
final_model_df = single_stock_modeling_df(ticker, start_date, end_date)

https://api.quiverquant.com/beta/historical/wallstreetbets/FB


In [None]:
#confirming time oldest date on top
final_model_df.head()

Unnamed: 0,open,high,low,adjusted_close,volume,MA10,MA20,RSI_14,RSI_14_A_70,RSI_14_B_30,DPI,Transaction,Ticker,Mentions,Rank,Sentiment
2020-06-26,232.64,233.09,215.4,216.08,76343939,234.565,232.8655,32.653059,0,0,0.564179,0.0,FB,686.0,2.0,-0.02104
2020-06-29,209.75,220.75,207.11,220.64,58514281,233.379,232.302,38.481385,0,0,0.59648,0.0,FB,726.0,2.0,-0.010247
2020-06-30,220.59,227.5,218.47,227.07,33927068,232.521,232.0195,45.627008,0,0,0.598574,0.0,FB,278.0,5.0,0.024049
2020-07-01,228.5,239.0,227.56,237.55,43403869,232.723,232.389,54.835098,0,0,0.563958,0.0,FB,497.0,3.0,0.00646
2020-07-02,239.0,240.0,232.61,233.42,30635970,232.471,232.7455,51.158251,0,0,0.589858,1.0,FB,263.0,6.0,-0.016154


In [None]:
#confirming time most recent date on top
final_model_df.tail()

Unnamed: 0,open,high,low,adjusted_close,volume,MA10,MA20,RSI_14,RSI_14_A_70,RSI_14_B_30,DPI,Transaction,Ticker,Mentions,Rank,Sentiment
2021-12-03,313.73,313.75,299.5,306.84,27471010,328.806,333.4715,33.588095,0,0,0.358649,0.0,FB,69.0,15.0,-0.03187
2021-12-06,308.13,320.1,306.34,317.87,21758340,326.063,332.3085,42.315873,0,0,0.552784,0.0,FB,48.0,18.0,-0.106515
2021-12-08,325.0,332.75,323.07,330.56,19937722,323.574,331.2775,50.717426,0,0,0.526135,0.0,FB,51.0,16.0,0.02629
2021-12-09,329.54,336.13,328.0,329.82,16879168,322.45,331.3865,50.244715,0,0,0.543867,0.0,FB,37.0,17.0,0.044022
2021-12-10,332.555,335.03,326.37,329.75,14527969,322.113,331.487,50.197054,0,0,0.475808,-1.0,FB,25.0,22.0,-0.129272


In [None]:
final_model_df.shape

(368, 16)

In [None]:
#making sure no nulls for modeling
final_model_df.isna().sum()

open              0
high              0
low               0
adjusted_close    0
volume            0
MA10              0
MA20              0
RSI_14            0
RSI_14_A_70       0
RSI_14_B_30       0
DPI               0
Transaction       0
Ticker            0
Mentions          0
Rank              0
Sentiment         0
dtype: int64

In [None]:
#create csv for modeling in Google Colab
final_model_df.to_csv('FB_final_model.csv')

### Incorporating NASDAQ Retail Activity Tracker

In [None]:
import quandl
from configparser import ConfigParser
parser = ConfigParser()
_ = parser.read('notebook.cfg')
quandl.ApiConfig.api_key = parser.get('my_api', 'nasd_key')

In [None]:
def nasdaq_retail(ticker):
    retail_tracker = quandl.get_table('NDAQ/RTAT', ticker=f'{ticker}', paginate=True)
    # Sorted date in retail_tracker in ascending order
    retail_tracker = retail_tracker.sort_values(by='date', ascending=True, na_position='first')
    # Deleted column ticker from retail_tracker
    retail_tracker.drop('ticker', axis=1, inplace=True)
    # Renamed sentiment to nasdaq_sentiment in retail_tracker
    retail_tracker.rename(columns={"sentiment": "nasdaq_sentiment"}, inplace=True)
    # Renamed activity to nasdaq_activity in retail_tracker
    retail_tracker.rename(columns={"activity": "nasdaq_activity"}, inplace=True)
    # Setting date as index
    retail_tracker.set_index('date', inplace=True)
    return retail_tracker

In [239]:
ticker = 'FB'
start_date = '2020-06-01'
end_date = '2021-12-10'
final_model_df = single_stock_modeling_df(ticker, start_date, end_date)
retail_tracker_df = nasdaq_retail(ticker)

https://api.quiverquant.com/beta/historical/wallstreetbets/FB


In [240]:
final_model_with_nasdaq = final_model_df.join(retail_tracker_df)

In [244]:
final_model_with_nasdaq.to_csv('Meta_ModelingDF_PlusNDAQ.csv')

In [245]:
final_model_with_nasdaq.head(1)

Unnamed: 0,open,high,low,adjusted_close,volume,MA10,MA20,RSI_14,RSI_14_A_70,RSI_14_B_30,DPI,Transaction,Ticker,Mentions,Rank,Sentiment,nasdaq_activity,nasdaq_sentiment
2020-06-26,232.64,233.09,215.4,216.08,76343939,234.565,232.8655,32.653059,0,0,0.564179,0.0,FB,686.0,2.0,-0.02104,0.0404,0.0


## Logistic Regression Model - Using All Features

In [192]:
# read in saved FB file
df_main = pd.read_csv('FB_final_model.csv')

In [193]:
# set Unnammed: 0 as index datetime
df_main.set_index('Unnamed: 0', inplace=True)
df_main.index = pd.to_datetime(df_main.index)
# drop ticker column , not needed for model
df_main.drop(columns='Ticker', inplace=True)

In [194]:
#de-trending numerical data by using percent change
df_main[['open', 'high','low','adjusted_close','volume','MA10','MA20', 'RSI_14','DPI', 'Mentions','Rank','Sentiment']] = df_main[['open', 'high','low','adjusted_close','volume','MA10','MA20',
                   'RSI_14','DPI','Mentions','Rank','Sentiment']].pct_change()
#dropping the first row as all 0 values
df_main = df_main.iloc[1: , :]

In [195]:
# creating target column where the adjusted closing price is compared to the previous day, 1 for higher, 0 for lower
df_main['Target'] = (df_main['adjusted_close'] > 0).astype(int)
df_main.Target = df_main.Target.shift(-1)
df_main.dropna(inplace=True)

### Train/Test Split & MinMaxScaler

In [196]:
features = [col for col in df_main.columns if not col == 'Target']
X = df_main[features]
minmax = MinMaxScaler()
X = minmax.fit_transform(X)
y = df_main['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, train_size=0.8, random_state=0)

In [197]:
logreg = LogisticRegression()

In [198]:
logreg.fit(X_train, y_train)

LogisticRegression()

In [199]:
logreg.score(X_train, y_train)

0.5616438356164384

In [200]:
logreg.score(X_test, y_test)

0.6351351351351351

## Logistic Regression - Using only Price & Volume

In [179]:
# read in saved FB file
df_main2 = pd.read_csv('FB_final_model.csv')

In [180]:
# set Unnammed: 0 as index datetime
df_main2.set_index('Unnamed: 0', inplace=True)
df_main2.index = pd.to_datetime(df_main2.index)
# drop ticker column , not needed for model
df_main2.drop(columns='Ticker', inplace=True)

In [181]:
#de-trending numerical data by using percent change
df_main2 = df_main2[['open', 'adjusted_close','volume']].pct_change()
#dropping the first row as all 0 values
df_main2 = df_main2.iloc[1: , :]

In [182]:
# creating target column where the adjusted closing price is compared to the previous day, 1 for higher, 0 for lower
df_main2['Target'] = (df_main2['adjusted_close'] > 0).astype(int)
df_main2.Target = df_main2.Target.shift(-1)
df_main2.dropna(inplace=True)

In [183]:
features = [col for col in df_main2.columns if not col == 'Target']
X2 = df_main2[features]
minmax = MinMaxScaler()
X2 = minmax.fit_transform(X2)
y2 = df_main2['Target']

X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, shuffle=False, train_size=0.8, random_state=0)

In [184]:
logreg2 = LogisticRegression()

In [185]:
logreg2.fit(X_train2, y_train2)

LogisticRegression()

In [186]:
logreg2.score(X_train2, y_train2)

0.5342465753424658

In [187]:
logreg2.score(X_test2, y_test2)

0.581081081081081

In [188]:
logreg2.coef_

array([[-0.9983874 , -0.25194478, -0.05792579]])

## Logistic Regression - Using All Features + NASDAQ Retail Sentiment

In [281]:
# read in saved FB file
df_main3 = pd.read_csv('Meta_ModelingDF_PlusNDAQ.csv')

In [282]:
# set Unnammed: 0 as index datetime
df_main3.set_index('Unnamed: 0', inplace=True)
df_main3.index = pd.to_datetime(df_main3.index)
# drop ticker column , not needed for model
df_main3.drop(columns='Ticker', inplace=True)

In [283]:
#de-trending numerical data by using percent change
df_main3= df_main3[['open', 'high','low','adjusted_close','volume','MA10','MA20',
                   'RSI_14','DPI','Mentions','Rank','Sentiment', 'nasdaq_sentiment']].pct_change()
#dropping the first row as all 0 values
df_main3 = df_main3.iloc[1: , :]

In [284]:
# creating target column where the adjusted closing price is compared to the previous day, 1 for higher, 0 for lower
df_main3['Target'] = (df_main3['adjusted_close'] > 0).astype(int)
df_main3.Target = df_main3.Target.shift(-1)
df_main3.dropna(inplace=True)

### Train/Test Split & MinMaxScaler

In [285]:
#Remove -inf values causing train/test/split error
df_main3.replace([np.inf, -np.inf], 0, inplace=True)

In [286]:
features3 = [col for col in df_main3.columns if not col == 'Target']
X3 = df_main3[features3]
minmax = MinMaxScaler()
X3 = minmax.fit_transform(X3)
y3 = df_main3['Target']

X_train3, X_test3, y_train3, y_test3 = train_test_split(X3, y3, shuffle=False, train_size=0.8, random_state=0)

In [287]:
logreg = LogisticRegression()

In [288]:
logreg.fit(X_train3, y_train3)

LogisticRegression()

In [289]:
logreg.score(X_train3, y_train3)

0.5819672131147541

In [290]:
logreg.score(X_test3, y_test3)

0.6065573770491803

## Logistic Regression - All Features + NASDAQ Retail
## (1-Week Outlook)

In [315]:
# read in saved FB file
df_main4 = pd.read_csv('Meta_ModelingDF_PlusNDAQ.csv')

In [316]:
# set Unnammed: 0 as index datetime
df_main4.set_index('Unnamed: 0', inplace=True)
df_main4.index = pd.to_datetime(df_main4.index)
# drop ticker column , not needed for model
df_main4.drop(columns='Ticker', inplace=True)

In [317]:
#de-trending numerical data by using percent change
df_main4= df_main4[['open', 'high','low','adjusted_close','volume','MA10','MA20',
                   'RSI_14','DPI','Mentions','Rank','Sentiment', 'nasdaq_sentiment']].pct_change(3)
#dropping the first row as all 0 values
df_main4 = df_main4.iloc[3: , :]

In [318]:
# creating target column where the adjusted closing price is compared to the previous day, 1 for higher, 0 for lower
df_main4['Target'] = (df_main4['adjusted_close'] > 0).astype(int)
df_main4.Target = df_main4.Target.shift(-3)
df_main4.dropna(inplace=True)

### Train/Test Split & MinMaxScaler

In [319]:
#Remove -inf values causing train/test/split error
df_main4.replace([np.inf, -np.inf], 0, inplace=True)

In [320]:
features4 = [col for col in df_main4.columns if not col == 'Target']
X4 = df_main4[features4]
minmax = MinMaxScaler()
X4 = minmax.fit_transform(X4)
y4 = df_main4['Target']

X_train4, X_test4, y_train4, y_test4 = train_test_split(X4, y4, shuffle=False, train_size=0.8, random_state=0)

In [321]:
logreg4 = LogisticRegression()

In [322]:
logreg4.fit(X_train4, y_train4)

LogisticRegression()

In [323]:
logreg.score(X_train4, y_train4)

0.5731225296442688

In [324]:
logreg.score(X_test4, y_test4)

0.546875