In [56]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

In [57]:
def build_model():
    model = keras.Sequential([
        keras.layers.InputLayer(input_shape=(13,), name='input'),
        keras.layers.Dense(50, activation='relu', name='h1'),
        keras.layers.Dense(50, activation='relu', name='h2'),
        keras.layers.Dense(1, name='output')
    ], name='Sequential')
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss='mse'
    )
    return model

In [58]:
data = pd.read_csv('../../data/cleansed.csv', index_col=0)
data.head()

Unnamed: 0,Net Income Before Taxes,Total Liabilities,Income Available to Common Excluding Extraordinary Items,avg Volume,Total Liabilities & Shareholders' Equity,Diluted Net Income,Net Income Before Extraordinary Items,Total Assets,Diluted Weighted Average Shares,Total Equity,Total Common Shares Outstanding,Net Income After Taxes,Net Income,High,Low
0,0.256721,0.009259,0.242315,0.015391,0.012484,0.244534,0.242315,0.012484,0.029191,0.063605,0.02999,0.247508,0.242153,208.95,163.38
1,0.253811,0.00997,0.238286,0.021397,0.012557,0.240516,0.238286,0.012557,0.029036,0.059451,0.030301,0.243599,0.238123,182.54,114.04
2,0.246107,0.010004,0.230782,0.017608,0.01184,0.233035,0.230782,0.01184,0.029181,0.054105,0.030165,0.236173,0.230618,219.75,150.58
3,0.25527,0.007716,0.238935,0.016673,0.00966,0.239665,0.238935,0.00966,0.030026,0.053596,0.030239,0.244286,0.237269,259.77,176.87
4,0.206828,2.4e-05,0.192936,0.001604,0.000356,0.195299,0.192936,0.000356,0.002219,0.037786,0.002317,0.198485,0.192764,379.3,261.27


In [59]:
data = shuffle(data)
data.head()

Unnamed: 0,Net Income Before Taxes,Total Liabilities,Income Available to Common Excluding Extraordinary Items,avg Volume,Total Liabilities & Shareholders' Equity,Diluted Net Income,Net Income Before Extraordinary Items,Total Assets,Diluted Weighted Average Shares,Total Equity,Total Common Shares Outstanding,Net Income After Taxes,Net Income,High,Low
1180,0.221436,0.004053,0.205813,0.012085,0.005674,0.208181,0.205813,0.005674,0.025476,0.04924,0.026414,0.211443,0.205686,89.59,74.3
1303,0.212721,0.002376,0.198378,0.004512,0.003629,0.200725,0.198378,0.003629,0.008273,0.045671,0.008603,0.20389,0.198207,108.28,78.89
793,0.207867,0.002889,0.19347,0.010939,0.004568,0.199839,0.194059,0.004568,0.016925,0.049003,0.017589,0.199601,0.197907,89.48,63.51
1970,0.209726,0.000807,0.196415,0.003509,0.001168,0.198768,0.196415,0.001168,0.002624,0.038426,0.002762,0.20194,0.196243,260.3,152.01
170,0.217248,0.002633,0.205046,0.018361,0.005369,0.206914,0.205096,0.005369,0.018672,0.056403,0.019385,0.210561,0.204465,103.59,76.62


In [60]:
y = data[['High']]
y.head()

Unnamed: 0,High
1180,89.59
1303,108.28
793,89.48
1970,260.3
170,103.59


In [61]:
alt_y = (y-y.mean()) / y.std()
alt_y = (alt_y-alt_y.min()) / (alt_y.max()-alt_y.min())
alt_y.head()

Unnamed: 0,High
1180,0.014508
1303,0.017633
793,0.014489
1970,0.043057
170,0.016849


In [62]:
x = data.drop(['High', 'Low'], axis=1)
print(len(list(x)))
x.head()

13


Unnamed: 0,Net Income Before Taxes,Total Liabilities,Income Available to Common Excluding Extraordinary Items,avg Volume,Total Liabilities & Shareholders' Equity,Diluted Net Income,Net Income Before Extraordinary Items,Total Assets,Diluted Weighted Average Shares,Total Equity,Total Common Shares Outstanding,Net Income After Taxes,Net Income
1180,0.221436,0.004053,0.205813,0.012085,0.005674,0.208181,0.205813,0.005674,0.025476,0.04924,0.026414,0.211443,0.205686
1303,0.212721,0.002376,0.198378,0.004512,0.003629,0.200725,0.198378,0.003629,0.008273,0.045671,0.008603,0.20389,0.198207
793,0.207867,0.002889,0.19347,0.010939,0.004568,0.199839,0.194059,0.004568,0.016925,0.049003,0.017589,0.199601,0.197907
1970,0.209726,0.000807,0.196415,0.003509,0.001168,0.198768,0.196415,0.001168,0.002624,0.038426,0.002762,0.20194,0.196243
170,0.217248,0.002633,0.205046,0.018361,0.005369,0.206914,0.205096,0.005369,0.018672,0.056403,0.019385,0.210561,0.204465


In [63]:
dataset = tf.data.Dataset.from_tensor_slices((x.values, alt_y.values))
for feat, targ in dataset.take(5):
    print ('Features: {}, Target: {}'.format(feat, targ))

Features: [0.22143574 0.00405274 0.20581331 0.01208477 0.00567373 0.20818119
 0.20581331 0.00567373 0.02547586 0.04923977 0.02641434 0.21144325
 0.20568648], Target: [0.01450776]
Features: [0.21272115 0.00237649 0.19837808 0.00451166 0.00362897 0.20072517
 0.19837808 0.00362897 0.00827344 0.04567076 0.00860269 0.20388959
 0.19820697], Target: [0.0176334]
Features: [0.20786731 0.00288876 0.19347049 0.01093904 0.004568   0.19983913
 0.1940595  0.004568   0.01692541 0.04900337 0.01758923 0.1996007
 0.19790727], Target: [0.01448936]
Features: [0.20972618 0.00080721 0.1964147  0.00350921 0.00116773 0.19876754
 0.1964147  0.00116773 0.00262414 0.03842597 0.00276153 0.20193971
 0.19624317], Target: [0.04305668]
Features: [0.21724783 0.00263311 0.20504563 0.0183605  0.00536928 0.20691358
 0.20509608 0.00536928 0.0186725  0.05640291 0.01938538 0.2105614
 0.20446533], Target: [0.01684907]


In [64]:
print(len(dataset))
print(int(len(dataset)*0.75))

1988
1491


In [65]:
test_split = 0.2
test_dataset = dataset.take(int(len(dataset)*test_split)).batch(1)

In [66]:
for feat, targ in test_dataset.take(5):
    print ('Features: {}, Target: {}'.format(feat, targ))

Features: [[0.22143574 0.00405274 0.20581331 0.01208477 0.00567373 0.20818119
  0.20581331 0.00567373 0.02547586 0.04923977 0.02641434 0.21144325
  0.20568648]], Target: [[0.01450776]]
Features: [[0.21272115 0.00237649 0.19837808 0.00451166 0.00362897 0.20072517
  0.19837808 0.00362897 0.00827344 0.04567076 0.00860269 0.20388959
  0.19820697]], Target: [[0.0176334]]
Features: [[0.20786731 0.00288876 0.19347049 0.01093904 0.004568   0.19983913
  0.1940595  0.004568   0.01692541 0.04900337 0.01758923 0.1996007
  0.19790727]], Target: [[0.01448936]]
Features: [[0.20972618 0.00080721 0.1964147  0.00350921 0.00116773 0.19876754
  0.1964147  0.00116773 0.00262414 0.03842597 0.00276153 0.20193971
  0.19624317]], Target: [[0.04305668]]
Features: [[0.21724783 0.00263311 0.20504563 0.0183605  0.00536928 0.20691358
  0.20509608 0.00536928 0.0186725  0.05640291 0.01938538 0.2105614
  0.20446533]], Target: [[0.01684907]]


In [67]:
train_dataset = dataset.skip(int(len(dataset)*test_split))
train_dataset = train_dataset.batch(1)
for feat, targ in train_dataset.take(5):
    print ('Features: {}, Target: {}'.format(feat, targ))

Features: [[0.20363878 0.00122885 0.19007555 0.0120917  0.00163019 0.1924044
  0.19007555 0.00163019 0.01339572 0.03895351 0.01399745 0.19564414
  0.18985997]], Target: [[0.00825312]]
Features: [[0.22278945 0.00386227 0.20741739 0.02221839 0.00775255 0.20973802
  0.20755517 0.00775255 0.01584635 0.06532801 0.01653183 0.21294483
  0.20738601]], Target: [[0.00999906]]
Features: [[0.24454183 0.10462641 0.22834094 0.01252552 0.10951915 0.23763927
  0.23054334 0.10951915 0.02232569 0.12893816 0.02268953 0.23624911
  0.23744023]], Target: [[0.02658222]]
Features: [[0.22931087 0.03982584 0.21758504 0.02952889 0.05509232 0.2198759
  0.21758504 0.05509232 0.06266635 0.16666444 0.06560626 0.2229645
  0.21741803]], Target: [[0.02464395]]
Features: [[0.20864486 0.019264   0.1932989  0.04117922 0.02319765 0.19801854
  0.19330744 0.02319765 0.04394185 0.0742667  0.04543586 0.19823494
  0.19550034]], Target: [[0.0103118]]


In [68]:
model = build_model()
hist = model.fit(train_dataset, epochs=200, batch_size=20)
plt.plot(hist.history['loss'], 'y', label='train loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200

In [None]:
result = model.evaluate(test_dataset, return_dict=True)
print(result)

{'loss': 0.036454953253269196}


In [None]:
def revert(value):
    value = value * (alt_y.max().values[0] - alt_y.min().values[0]) + alt_y.min().values[0]
    value = value * y.std().values[0] + y.mean().values[0]
    return value

In [None]:
for feat, targ in test_dataset:
    pred = model.predict(feat, verbose=False)
    y_ori = revert(targ.numpy()[0][0])
    y_hat = revert(pred[0][0])
    err = (y_ori - y_hat) / y_ori * 100
    print(f'Target={y_ori:0.2f}\tPredict={y_hat:0.2f}\tError={err:0.2f}%')
result = model.evaluate(test_dataset, return_dict=True)
print(result)

Target=188.92	Predict=188.43	Error=0.26%
Target=187.50	Predict=185.34	Error=1.15%
Target=189.50	Predict=184.32	Error=2.73%
Target=191.65	Predict=186.62	Error=2.63%
Target=198.07	Predict=201.41	Error=-1.68%
Target=198.51	Predict=198.67	Error=-0.08%
Target=193.46	Predict=188.79	Error=2.41%
Target=202.40	Predict=189.25	Error=6.49%
Target=188.47	Predict=184.28	Error=2.22%
Target=187.49	Predict=183.43	Error=2.16%
Target=186.85	Predict=183.43	Error=1.83%
Target=185.30	Predict=183.43	Error=1.01%
Target=181.05	Predict=186.01	Error=-2.74%
Target=180.72	Predict=183.43	Error=-1.50%
Target=180.84	Predict=183.43	Error=-1.43%
Target=181.19	Predict=183.43	Error=-1.24%
Target=186.53	Predict=183.43	Error=1.66%
Target=182.95	Predict=183.43	Error=-0.26%
Target=180.23	Predict=190.46	Error=-5.67%
Target=179.53	Predict=209.26	Error=-16.56%
Target=199.93	Predict=189.46	Error=5.24%
Target=197.56	Predict=189.14	Error=4.26%
Target=191.67	Predict=189.30	Error=1.23%
Target=187.92	Predict=189.03	Error=-0.59%
Targe