In [1]:
# -- general imports --
import yaml
import pandas as pd
# -- base configs of importing from deeptendies --
from deeptendies.stonks import *
from deeptendies.utils import generate_time_fields

# just an example, use generated key from https://finnhub.io/dashboard
# finnhub_token = "c1c318v48v6sp0s58ffg"

# load secrets from yaml example:
credentials='/home/stan/github/mltrade/secrets.yaml'
with open(credentials) as credentials:
    credentials = yaml.safe_load(credentials)
    finnhub_token=credentials['finnhub-apikey']

stock_sym='GME'
days_ago=250
start='2020-12-01'
metrics_interested=['next_3_high', 'next_3_low']

# get df from finnhub
df = pd.DataFrame.from_dict(get_stock_data(stock_sym, days_ago, 'D', finnhub_token))
generate_time_fields(df)
time.sleep(0.2)

# get df with added enriched data, right now only supports daily value
df= get_enriched_stock_data(df, "^DJI", days_ago, 'D', finnhub_token)
print(df.head())

# feature engineering, calendar and ma, vwap
df_proc = get_calendar_features(df)
df_proc = get_moving_average(df)
df_proc.fillna(method='backfill')
df_proc = add_vwap_col(df)

# feature engineering, get high and get low
days=[1,3,5,7]
df_new = get_high(df, days)
df_new = get_low(df, days)
print(df.head)
print(df.shape)

      c     h       l     o   s           t        v       wma         ts  \
0  4.19  4.29  4.1300  4.13  ok  1594771200  1474126  0.000000 2020-07-15   
1  4.17  4.20  4.0900  4.19  ok  1594857600  1330461  0.000000 2020-07-16   
2  3.96  4.23  3.9381  4.16  ok  1594944000  3066549  4.068333 2020-07-17   
3  3.85  4.06  3.7700  3.95  ok  1595203200  3401088  3.940000 2020-07-20   
4  4.01  4.09  3.8800  3.90  ok  1595289600  3341012  3.948333 2020-07-21   

         date       c_dji       h_dji       l_dji       o_dji s_dji  \
0  2020-07-15  26870.0996  27071.3301  26692.4805  27009.8105    ok   
1  2020-07-16  26734.7109  26879.1602  26590.0098  26746.5703    ok   
2  2020-07-17  26671.9492  26808.4297  26619.8809  26774.6191    ok   
3  2020-07-20  26680.8691  26765.0195  26504.1992  26660.2891    ok   
4  2020-07-21  26840.4004  27025.3809  26766.2207  26833.1406    ok   

        t_dji      v_dji       wma_dji              ts_dji    date_dji  
0  1594819800  384900000      0.00000

In [2]:
metric_interested = 'next_3_low'
df[df[metric_interested].eq(0)] = np.nan

# plt_visual_raw(stock_sym, metric_interested, df)
# Create a new dataframe with only the 'Close column
data = df.filter([metric_interested])

# Convert the dataframe to a numpy array
dataset = data.values

# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .95 ))

# print("training_data_len: %s" %training_data_len )

In [3]:
#scaling
# Scale the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
# scaled_data
# Create the training data set
# Create the scaled training data set
train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i - 60:i, 0])
    y_train.append(train_data[i, 0])
    if i <= 61:
        print(x_train)
        print(y_train)
        print()

[array([0.        , 0.        , 0.        , 0.00054394, 0.00074173,
       0.00089008, 0.00074173, 0.00074173, 0.00074173, 0.00074173,
       0.00098897, 0.00108787, 0.00111259, 0.00178015, 0.00143401,
       0.00143401, 0.00143401, 0.00202739, 0.00281857, 0.00291747,
       0.00359294, 0.00328834, 0.00328834, 0.00328834, 0.0036592 ,
       0.0036592 , 0.00390644, 0.00390644, 0.0054888 , 0.0056861 ,
       0.00618108, 0.00714533, 0.00949414, 0.01483459, 0.01651585,
       0.01681254, 0.01681254, 0.01196657, 0.01038422, 0.01038422,
       0.01038422, 0.01315334, 0.01441428, 0.01612026, 0.02279583,
       0.02279583, 0.02279583, 0.02591109, 0.02591109, 0.02591109,
       0.02635613, 0.02858132, 0.0292736 , 0.0273451 , 0.02709885,
       0.02635613, 0.02635613, 0.02635613, 0.02669139, 0.02680117])]
[0.03772931810314988]

[array([0.        , 0.        , 0.        , 0.00054394, 0.00074173,
       0.00089008, 0.00074173, 0.00074173, 0.00074173, 0.00074173,
       0.00098897, 0.00108787, 0.00

In [4]:
# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.atleast_2d(x_train) # experimenting to solve the tuple index out of range issue

x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# print("x_train.shape:")
# print(x_train.shape)


#LSTM
from keras.models import Sequential
from keras.layers import Dense, LSTM, Masking

# Build the LSTM model
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(x_train.shape[1], 1))) # handle nans https://stackoverflow.com/questions/52570199/multivariate-lstm-with-missing-values
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=20)

# Test
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002

# training and validating
test_data = scaled_data[training_data_len - 60:, :]

# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i - 60:i, 0])

# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# Get the models predicted price values
predictions = model.predict(x_test)

print(predictions)

predictions = scaler.inverse_transform(predictions)

print(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
print("rmse %s" %rmse)

## Plot the data Again
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

# plot_predicted()

print(valid)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[0.92542017]
 [1.133013  ]
 [0.9001262 ]
 [0.9272224 ]
 [0.92792666]
 [0.98765486]
 [       nan]
 [       nan]]
[[190.91772]
 [232.89923]
 [185.80252]
 [191.28218]
 [191.4246 ]
 [203.50343]
 [      nan]
 [      nan]]
rmse nan
     next_3_low  Predictions
164      206.00   190.917725
165      172.35   232.899231
166      172.35   185.802521
167      172.35   191.282181
168      182.66   191.424606
169         NaN   203.503433
170         NaN          NaN
171         NaN          NaN


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid['Predictions'] = predictions
