In [None]:
!pip install yfinance
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import *
from keras.layers import *

In [None]:
path = '/Users/mmaiti/Downloads/us_symbols.csv'
df = pd.read_csv(path)
stocks = [str(stock).strip() for stock in df.loc[:, 'ticker']]
alltickers = " ".join(stocks)
tickers = yf.Tickers(alltickers)
cols = ['Ordinary Shares Number', 'Total Debt', 'Working Capital', 'Total Assets',
        'Total Liabilities Net Minority Interest', 'Inventory', 'Cash And Cash Equivalents']
print('done loading stocks')

indicator_df = []
labels_df = []
stock_df = []

ctr = 0
for stock in stocks:
    add = True
    try:
        t1 = tickers.tickers[stock].balance_sheet.loc[cols,:].iloc[:, 0].to_numpy()
        t2 = tickers.tickers[stock].recommendations.iloc[-1,:].iloc[1:].to_numpy()
        t3 = tickers.tickers[stock].history(period='1mo').loc[:,'Close'].to_numpy()
        indicator_df.append(t1)
        labels_df.append(t2)
        stock_df.append(t3)
        ctr += 1
    except:
        pass

In [None]:
bad_indices = np.where(np.array([len(i) != 22 for i in stock_df]))[0]
stock_df = [stock_df[i] for i in range(len(stock_df)) if not i in bad_indices]
indicator_df = [indicator_df[i] for i in range(len(indicator_df)) if not i in bad_indices]
labels_df = [labels_df[i] for i in range(len(labels_df)) if not i in bad_indices]

In [None]:
X1 = np.array(indicator_df)
print(X1.shape)

In [None]:
X2 = np.array(stock_df)
print(X2.shape)

In [None]:
Y = np.array(labels_df)
print(Y.shape)

In [None]:
Y_clean = Y[np.sum(Y, axis=1) != 0]
X1_clean = X1[np.sum(Y, axis=1) != 0]
X2_clean = X2[np.sum(Y, axis=1) != 0]

In [None]:
X1 = X1_clean / X1_clean[:,3].reshape(X1_clean.shape[0], 1)
X2 = (X2_clean - np.min(X2_clean, axis=1).reshape(-1,1)) / (np.max(X2_clean, axis=1) - np.min(X2_clean, axis=1)).reshape(-1,1)
Y = (Y_clean - np.min(Y_clean, axis=1).reshape(-1,1)) / (np.max(Y_clean, axis=1) - np.min(Y_clean, axis=1)).reshape(-1,1)
Y = Y / np.sum(Y, axis=1).reshape(-1,1)

In [None]:
def build_model():
  inputs1 = Input(shape=(X2.shape[1],1))
  x = LSTM(32, activation="tanh")(inputs1)
  inputs2 = Input(shape=(X1.shape[1],))
  concatenated = concatenate([x, inputs2], axis=1)
  x = Dense(32, activation="relu")(concatenated)
  x = Dense(Y.shape[1], activation="softmax")(x)
  model = Model(inputs=[inputs1, inputs2], outputs=x)
  return model

In [None]:
model = build_model()
model.summary()

In [None]:
split = int(0.8 * X2.shape[0])
X_t_prices = X2[:split]
X_t_fin = X1[:split]
X_v_prices = X2[split:]
X_v_fin = X1[split:]
y_t = Y[:split]
y_v = Y[split:]

In [None]:
X_t_fin = np.nan_to_num(X_t_fin, nan=0)
X_v_fin = np.nan_to_num(X_v_fin, nan=0)
X_t_fin = X_t_fin.astype(np.float32)
X_v_fin = X_v_fin.astype(np.float32)
y_t = y_t.astype(np.float32)
y_v = y_v.astype(np.float32)

In [None]:
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

In [None]:
history = model.fit([X_t_prices, X_t_fin], y_t, validation_data=([X_v_prices, X_v_fin], y_v), batch_size=32, epochs=100)

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.show()

In [None]:
model.save("stock_vetter_rnn.keras")