<h1 style="color:blue"> Chapter 12 </h1>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
%matplotlib inline

<h2 style="color:red"> Preparing Data </h2>

In [None]:
df = pd.read_csv("stock_predn.csv")

In [None]:
all_columns=['payout', 'roe', 'DE', 'dps', 'PE', 'PB', 'EPS', 'cashEPS', 
                 'CR', 'QR', 'depnNFA', 'invTO', 'invTA','margin', 'ACP', 
                 'CR_g', 'QR_g', 'invTO_g', 'invTA_g', 'margin_g','ACP_g',
                 'DE_g', 'payout_g', 'depnNFA_g', 'roe_g', 'dps_g', 'eps_g','returns_next_num','year']       

In [None]:
feature_columns = ['payout', 'roe', 'DE', 'dps', 'PE', 'PB', 'EPS', 'cashEPS', 
                 'CR', 'QR', 'depnNFA', 'invTO', 'invTA','margin', 'ACP', 
                 'CR_g', 'QR_g', 'invTO_g', 'invTA_g', 'margin_g','ACP_g',
                 'DE_g', 'payout_g', 'depnNFA_g', 'roe_g', 'dps_g', 'eps_g']

In [None]:
df1 = df[all_columns].dropna()

In [None]:
X_train = df1.query('year!=2020')
X_test = df1.query('year==2020')

In [None]:
X_train=X_train[feature_columns]
X_train.shape

In [None]:
X_test=X_test[feature_columns]
X_test.shape

In [None]:
y_train = df1.query('year!=2020').returns_next_num
y_train.shape

In [None]:
y_test = df1.query('year==2020').returns_next_num
y_test.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train =scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

<h2 style="color:red"> Regression Models </h2>

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()
lr.fit(X_train,y_train)

In [None]:
y_pred = lr.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [None]:
mae = mean_absolute_error(y_test,y_pred)
rmse = np.sqrt(mean_squared_error(y_test,y_pred))

In [None]:
print(f"The mean absolute error is {mae:.3f}")
print(f"The root mean squared error is {rmse:.3f}")

In [None]:
from sklearn.linear_model import Lasso, Ridge

In [None]:
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=1,random_state=13)
lasso.fit(X_train,y_train)

In [None]:
y_pred = lasso.predict(X_test)
mae = mean_absolute_error(y_test,y_pred)
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
print(f"The mean absolute error is {mae:.3f}")
print(f"The root mean squared error is {rmse:.3f}")

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
dt = DecisionTreeRegressor(max_depth = 4, random_state=13)

In [None]:
dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)

In [None]:
mae = mean_absolute_error(y_test,y_pred)
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
print(f"The mean absolute error is {mae:.3f}")
print(f"The root mean squared error is {rmse:.3f}")

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf = RandomForestRegressor(n_estimators=500,max_depth=4,random_state=13)
rf.fit(X_train,y_train)

In [None]:
results = pd.Series(rf.feature_importances_,index=feature_columns).sort_values(ascending=False)
results.head(10)

<h2 style="color:red"> Predicting Stock Prices/Returns </h2>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# import yfinance as yf
# https://www.lfd.uci.edu/~gohlke/pythonlibs/#ta-lib
# pip install TA-Lib
%matplotlib inline

In [None]:
ril = pd.read_csv("RIL.csv")
ril.head(1)

In [None]:
ril.Date = pd.to_datetime(ril.Date,format="%d-%m-%Y")
ril.set_index('Date',inplace=True)
ril.head(2)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
plot_acf(ril['Adj Close'])

In [None]:
from pmdarima.arima import ADFTest

In [None]:
ADFTest(alpha=0.05).should_diff(ril['Adj Close'].values)

In [None]:
x = np.random.normal(size=10000).cumsum()/100
y = np.random.normal(size=10000).cumsum()/100

In [None]:
fig,ax = plt.subplots(1,2,figsize=(12,4))
ax[0].plot(x)
ax[1].plot(y);

In [None]:
np.corrcoef(x,y)

In [None]:
ril['ret']=ril['Adj Close'].pct_change()
ril=ril.dropna()
ril.head(2)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
# Divide into training and test dataset
ril_train = ril["2016-08-19":"2020-12-31"]
ril_test = ril["2021-01-01":]

In [None]:
generator_train = TimeseriesGenerator(ril_train['Adj Close'], 
                                      ril_train['Adj Close'], length=3,
                                     batch_size=1)
generator_train[0]

In [None]:
ril_train['Adj Close'].head(4)

In [None]:
generator_test = TimeseriesGenerator(ril_test['Adj Close'], 
                                      ril_test['Adj Close'], length=3,
                                    batch_size=1)

In [None]:
# Build the first model
tf.random.set_seed(13)
model_1 = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu',input_dim=3),
    tf.keras.layers.Dense(1)
])

In [None]:
model_1.summary()

In [None]:
model_1.weights

In [None]:
model_1.compile(optimizer='adam', loss='mse',metrics='mae')

In [None]:
history = model_1.fit(generator_train, epochs=10, validation_data=generator_test)

In [None]:
results = pd.DataFrame(history.history)
results.head(3)                                                               

In [None]:
results[['loss','val_loss']].plot();   

In [None]:
ril_pred = model_1.predict(generator_test)
ril_pred.shape

In [None]:
ril_pred = tf.squeeze(ril_pred)
ril_pred.numpy().shape

In [None]:
ril_test['Adj Close'].shape

In [None]:
plt.scatter(ril_pred,ril_test['Adj Close'][3:])
plt.xlabel("Predicted Value")
plt.ylabel("Actual Value")
plt.title("Prediction of RIL Prices using Deep Learning");

In [None]:
plt.plot(ril_pred,label='Predicted Price')
plt.plot(ril_test['Adj Close'][3:].values,label='Actual Price')
plt.legend();

In [None]:
model_1.evaluate(generator_test)

<h2 style="color:red"> Random Walk Model </h2>

In [None]:
ril_pred_rw = ril_test.shift(1).dropna()
ril_pred_rw.head(3)

In [None]:
plt.plot(ril_pred_rw['Adj Close'],label='RW Predicted Price')
plt.plot(ril_test['Adj Close'],label='Predicted Price')
plt.legend();

In [None]:
from tensorflow.keras.metrics import mean_absolute_error

In [None]:
mean_absolute_error(ril_test['Adj Close'][1:],ril_pred_rw['Adj Close']).numpy()

<h2 style="color:red"> ANN Model </h2>

In [None]:
tf.random.set_seed(13)
model_2 = tf.keras.Sequential([
    tf.keras.layers.Dense(64,input_dim=3,activation='relu'),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model_2.summary()

In [None]:
model_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                loss='mse',metrics='mae')

In [None]:
history_2 = model_2.fit(generator_train,epochs=50, 
                        validation_data=generator_test,
                       verbose=0)

In [None]:
model_2.evaluate(generator_test)

<h2 style="color:red"> Simple RNN Model </h2>

In [None]:
steps=3
ril_train_series = ril_train['Adj Close'].values.reshape(len(ril_train),1)
generator_train = TimeseriesGenerator(ril_train_series, 
                                      ril_train_series, length=steps, 
                                      batch_size=1)
ril_test_series = ril_test['Adj Close'].values.reshape(len(ril_test),1)
generator_test = TimeseriesGenerator(ril_test_series, 
                                      ril_test_series, length=steps, 
                                      batch_size=1)

In [None]:
model_rnn = tf.keras.models.Sequential([
  tf.keras.layers.SimpleRNN(64,activation='relu',input_shape=(steps,1)),
  tf.keras.layers.Dense(1)                                        
])

In [None]:
model_rnn.compile(loss='mse',optimizer='adam',metrics=['mae'])
model_rnn.fit(generator_train,epochs=10,validation_data=generator_test,verbose=0)

In [None]:
model_rnn.evaluate(generator_test)

<h2 style="color:red"> LSTM Model <?h2>

In [None]:
model_lstm = tf.keras.Sequential([
    tf.keras.layers.LSTM(64,activation='relu',input_shape=(steps,1)),
    tf.keras.layers.Dense(1)
])

In [None]:
model_lstm.compile(loss='mse',optimizer='adam',metrics='mae')

In [None]:
history_lstm = model_lstm.fit(generator_train,epochs=10,
            validation_data=generator_test)

In [None]:
model_lstm.evaluate(generator_test)

In [None]:
model_lstm_a = tf.keras.Sequential([
    tf.keras.layers.LSTM(64,activation='relu',return_sequences=True,input_shape=(3,1)),
    tf.keras.layers.LSTM(64,activation='relu'),
    tf.keras.layers.Dense(1)
])
model_lstm_a.compile(loss='mse',optimizer='adam',metrics='mae')
history_lstm_a = model_lstm_a.fit(generator_train,epochs=10,
            validation_data=generator_test)
model_lstm_a.evaluate(generator_test)

<h2 style="color:red"> CNN Model </h2>

In [None]:
model_cnn = tf.keras.Sequential([
    tf.keras.layers.Convolution1D(64,2,padding="causal",
                                  input_shape=(3,1),activation='relu'),
    tf.keras.layers.MaxPool1D(),                
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1)
])

In [None]:
model_cnn.compile(loss='mse',optimizer='adam',metrics=['mae'])

In [None]:
history_cnn = model_cnn.fit(generator_train,epochs=10,
            validation_data=generator_test)

In [None]:
model_cnn.evaluate(generator_test)

<h2 style="color:red"> ARIMA Model </h2>

In [None]:
from pmdarima.arima import auto_arima

In [None]:
model = auto_arima(ril_train['Adj Close'],start_p=0,d=0,start_q=0,
                  max_p=10,max_d=2,max_q=10)

In [None]:
model.summary()

<h2 style="color:red"> Leaky-RELU Activation </h2>

In [None]:
tf.random.set_seed(13)
model_ret = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='leaky_relu',input_dim=3),
    tf.keras.layers.Dense(64,activation='leaky_relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model_ret.compile(optimizer='adam', loss='mse',metrics='mae')

In [None]:
history_ret = model_ret.fit(generator_train, epochs=10, 
                        verbose=0,validation_data=generator_test)

In [None]:
ril_pred = tf.squeeze(model_ret.predict(generator_test))

In [None]:
plt.plot(ril_pred,label='Predicted Return')
plt.plot(ril_test['ret'][3:].values,label='Actual Returns')
plt.legend();

<h2 style="color:red"> Predicting Stock Prices Using Technical Indicators </h2>

In [None]:
for i in range(1,6):
    ril[f"P_{i}"]=ril['Adj Close'].shift(i)

In [None]:
ril[['Adj Close','P_1','P_2','P_3','P_4','P_5']].tail(2)

In [None]:
ril['MA5']=talib.SMA(ril['Adj Close'],timeperiod=5).shift(1)
ril['MA21']=talib.SMA(ril['Adj Close'],timeperiod=21).shift(1)
ril['SMA50']=talib.SMA(ril['Adj Close'],timeperiod=50).shift(1)

In [None]:
ril['RSI_14']=talib.RSI(ril['Adj Close'],timeperiod=14).shift(1)

In [None]:
feature_columns=[ 'P_1','P_2', 'P_3', 'P_4', 'P_5', 
                 'MA5', 'MA21', 'RSI50', 'SMA50', 'RSI_20','RSI_14']

In [None]:
ril_no_na = ril.dropna()
X = ril_no_na[feature_columns]
y = ril_no_na['Adj Close']

In [None]:
X_train, X_test = X["2016-08-19":"2020-12-31"], X["2021-01-01":]
y_train, y_test = y["2016-08-19":"2020-12-31"], y["2021-01-01":]

In [None]:
from xgboost import XGBRegressor

In [None]:
xgb = XGBRegressor(n_estimators=500,max_depth=5,random_state=13)
xgb.fit(X_train,y_train)

In [None]:
xgb.score(X_test,y_test)

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
y_pred = xgb.predict(X_test)
mean_absolute_error(y_test,y_pred)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu',input_shape=(11,)),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(loss='mse',optimizer='adam',
             metrics='mae')

In [None]:
model.fit(X_train,y_train,epochs=10)

In [None]:
model.evaluate(X_test,y_test)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu',input_shape=(11,)),
    tf.keras.layers.Dense(1)
])
model.compile(loss='mse',optimizer='adam',
             metrics='mae')
history = model.fit(X_train,y_train,epochs=100,
                   validation_data=(X_test,y_test),verbose=0)

In [None]:
pd.DataFrame(history.history).tail()

<h2 style="color:red"> Predicting stock returns using valuation multipliers and value drivers </h2>

In [None]:
df = pd.read_csv("MultDrivers.csv")
df.head(2)

In [None]:
df.isna().sum()

In [None]:
df.describe(percentiles=[0.05,0.95]).T

In [None]:
from scipy.stats.mstats import winsorize

In [None]:
df.PE = winsorize(df.PE,limits=[0.05,0.05])
df.PB = winsorize(df.PB,limits=[0.05,0.05])
df.g = winsorize(df.g,limits=[0.05,0.05])
df.ROE = winsorize(df.ROE,limits=[0.05,0.05])

In [None]:
df.nic2.nunique()

In [None]:
df1 = df.drop(['Id', 'PE','nic2'],axis=1).dropna()

In [None]:
X = df1.drop(['PB'],axis=1)
y = df1.PB

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                   random_state=13)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train.drop('Returns',axis=1),y_train)

In [None]:
y_pred_ols = lr.predict(X_test.drop('Returns',axis=1))
strategy_returns_ols = np.dot((y_pred_ols>y_test),X_test.Returns)/sum(y_pred_ols>y_test)
strategy_returns_ols

In [None]:
X_test.Returns.mean()

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
mean_absolute_error(y_test,y_pred_ols)

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
knn = KNeighborsRegressor(n_neighbors=5)

In [None]:
knn.fit(X_train.drop('Returns',axis=1),y_train)
y_pred_knn = knn.predict(X_test.drop('Returns',axis=1))
strategy_returns_knn = np.dot((y_pred_knn>y_test),X_test.Returns)/sum(y_pred_knn>y_test)
strategy_returns_knn

In [None]:
mean_absolute_error(y_test,y_pred_knn)

In [None]:
import tensorflow as tf

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu',
                         input_shape=(3,)),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(loss='mse',optimizer='adam')

In [None]:
model.fit(X_train.drop('Returns',axis=1),y_train,epochs=100,verbose=0)

In [None]:
y_pred_nn = tf.squeeze(model.predict(X_test.drop('Returns',axis=1)))
strategy_returns_nn = np.dot((y_pred_nn>y_test),X_test.Returns)/sum((y_pred_nn>y_test).numpy())
strategy_returns_nn

In [None]:
mean_absolute_error(y_test,y_pred_nn)

<h2 style="color:red"> Predicting returns based on factor exposures/stock characteristics </h2>

In [None]:
from scipy.stats.mstats import winsorize

In [None]:
winsorize(df1.PE,limits=(0.05,0.05),inplace=True)
winsorize(df1.PB,limits=(0.05,0.05),inplace=True)
winsorize(df1.returns_1,limits=(0.05,0.05),inplace=True)

In [None]:
train_data = df1[df1.fyear<2018]
test_data=df1[df1.fyear.isin([2018, 2019])]

In [None]:
train_data.shape, test_data.shape

In [None]:
feature_columns = ['mktcap', 'PE', 'PB', 'tradeQ', 'tradeVal', 'nTran', 'Mom1', 'Mon3',
       'Mom6', 'Mom12', 'MomAv1', 'MomAv3', 'MomAv6', 'MomAv12', 'Beta']

In [None]:
X_train, X_test = train_data[feature_columns],test_data[feature_columns]
y_train, y_test = train_data['returns_1'],test_data['returns_1']

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
mean_absolute_error(y_test,np.full(shape=len(y_test),fill_value = y_test.mean()))

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)

In [None]:
y_pred_knn = knn.predict(X_test)
mean_absolute_error(y_test,y_pred_knn)

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf = RandomForestRegressor(n_estimators=500,max_depth=5,
                          random_state=13)

In [None]:
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
mean_absolute_error(y_test, y_pred_rf)

In [None]:
pd.Series(rf.feature_importances_, index = feature_columns).\
sort_values().plot(kind='barh');

In [None]:
from xgboost import XGBRegressor

In [None]:
xgb = XGBRegressor(n_estimators = 500, max_depth=5, random_state=13)

In [None]:
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
mean_absolute_error(y_test, y_pred_xgb)

In [None]:
pd.Series(xgb.feature_importances_, index = feature_columns).\
sort_values().plot(kind='barh');

In [None]:
import tensorflow as tf

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu',input_shape=(15,)),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(loss='mse',optimizer='adam',metrics=['mae'])

In [None]:
history = model.fit(X_train, y_train, epochs=10,
          validation_data=(X_test,y_test),verbose=2,
                   callbacks=tf.keras.callbacks.EarlyStopping(patience=2))

In [None]:
model.evaluate(X_test,y_test,verbose=0)

<h2 style="color:red"> Convert Target to Categorical Variable </h2>

In [None]:
df1['ret_cat'] = np.where(df1.returns_1>0.05,2,
                         np.where(df1.returns_1<-0.01,0,1))
df1.ret_cat.value_counts()

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='elu',input_shape=(15,)),
    tf.keras.layers.Dense(64,activation='elu'),
    tf.keras.layers.Dense(3,activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=10,
          validation_data=(X_test,y_test),verbose=2)

In [None]:
model.evaluate(X_test,y_test,verbose=0)