<a href="https://colab.research.google.com/github/j03m/lstm-price-predictor/blob/main/Coin_Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Current Todo:


Some ideas here: 

* Use pandas_ta to attach technical data to timeseries then use random forest to see if any features are super good.

* Use vector_bt to back test your models

* Integrate the different time granularity models, 15m etc into the core configs

* Fix coinbase - we can't filter on volume amount without extra work

* Add TA fields, use random forest to check which fields are the best, verify with mean square error (this could take a while)

* Move to an "always" on model where we more closely monitor exits and entries. We can constantly check predictions and refine entry/exit points. Can we pick a better entry point? We had at some point discussed trying to predict all 4 values which would help. 

* Can we monitor and alarm/email the 15 min chart of an entry and see if it is
move toward or away from our target?

* pandas_ta strategy/back testers look interesting?

Read me: https://www.kaggle.com/code/vuhuyduongnia/vn30-stock-prediction-by-lstm-model-accuracy-90


#IMPORT DATASETS AND LIBRARIES


In [54]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import sys

sys.path.insert(0,'/content/drive/My Drive/ml-trde-notebooks')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Library

In [65]:
%run -i "/content/drive/My Drive/ml-trde-notebooks/predictions_common.py"

True

# Data Loader

In [None]:
if train_models:
  all_stock_dfs = []
  all_coin_dfs = []
  #we don't need 15 right now because they get loaded in the train, that will prob need to be more sophisticated soon
  #all_stock_df_15 = []
  for ticker in tickers:
    print("load:", ticker)
    df = sort_date(pd.read_csv(data_path +'/' + ticker + '.csv'))
    all_stock_dfs.append(df)
    #df2 = sort_date(pd.read_csv(data_path + '/' + ticker + '-15.csv').rename(columns={"Datetime":"Date"}))

  for coin in coins:
    print("load:", coin)
    df = sort_date(pd.read_csv(data_path +'/' + coin + '.csv'))
    all_coin_dfs.append(df)



# Model Config and Supporting Functions

In [57]:
models = {}

In [60]:
gbl_all_features = ["Close", "Volume", "RVI", "VWAP", "VWAPD"]
gbl_target_column = ["Target"]
gbl_all_columns = gbl_all_features + gbl_target_column

all_model_names = ["lstm_cv", 
        "lstm_coins_cv", 
        "lstm_att_cv", 
        "lstm_att_ohlcv", 
        "lstm_cv_rvi",
        "lstm_cv_vwap", 
        "lstm_ohlc",
        "svm_cv",
        "svm_cv_vwap"]

model_config = {
    "day_bar_models": [
        "lstm_cv", 
        "lstm_att_cv", 
        "lstm_cv_rvi",
        "svm_cv"
        ],
    "training_filter": [],
    "backtest_filter": [],
    "15m_bars": ["lstm_15m"],
    "training_types":{
        "lstm_coins_cv":"all"
    },
    "column_sets": {
        "lstm_cv": ["Close", "Volume"],
        "lstm_15m": ["Close", "Volume"],
        "lstm_coins_cv": ["Close", "Volume"],
        "lstm_ohlc": ["Open", "High", "Low", "Close", "Volume"],
        "lstm_att_cv": ["Close", "Volume"],
        "lstm_att_ohlcv": ["Open", "High", "Low", "Close", "Volume"],
        "lstm_cv_rvi": ["Close", "Volume", "RVI"],
        "lstm_cv_vwap": ["Close", "Volume", "VWAP", "VWAPD"],
        "svm_cv": ["Close", "Volume"],
        "svm_cv_vwap": ["Close", "Volume", "VWAP", "VWAPD"]
    },
    "build_type":{
        "lstm_att_cv":"att",
        "lstm_att_ohlcv":"att"
    },
    "load_type": {
      "svm_cv": "joblib",
      "svm_cv_vwap": "joblib"
    }
}

  
def build_model_from_config(models, name, config):
  if not name in models:
    print("newing: ", name)
    features = config["column_sets"][name]
    num_features = len(features)
    if name in config["build_type"] and config["build_type"][name] == "att":
      models[name] = build_attention_model(num_features, 1)
    if name in config["build_type"] and config["build_type"][name] == "svm":
      models[name] = SVR(kernel='rbf')
    else:
      models[name] = build_model(num_features, 1)

def train_config_model_against_df(name, config, df):
  features = config["column_sets"][name]
  num_features = len(features)
  print("Data prep for:", name)
  [scaled_features, X, y, normal_features] = (df, columns=features+gbl_target_column)  
  print("training:", name)
  model = models[name]
  history = model.fit(X, y)
  return [model, history]    

def build_and_stash_all_config_models():
    for name in model_config["day_bar_models"]:
      build_model_from_config(models, name, model_config)

def get_training_datasets_for_model(name):
  if not name in model_config["training_types"]:
    print(name, " should train on stocks")
    return all_stock_dfs
  training_type = model_config["training_types"][name]
  if (training_type == "coin"):
    print(name, " should train on coins")
    return all_coin_dfs
  if (training_type == "stocks"):
    print(name, " should train on stocks")
    return all_stock_dfs
  if (training_type == "all"):
    print(name, " should train on everything")
    return all_stock_dfs + all_coin_dfs
  raise Exception("Bad name or config error:" + name + " type:" + training_type)
  
  

# Load all models from disk

In [7]:
if load_models:
  for name in model_config["day_bar_models"]:
    print("loading:", name)
    if name in model_config["load_type"] and model_config["load_type"][name] == "joblib":
      models[name] = joblib.load(model_path + "/" + name + ".joblib") 
    else:
      models[name] = keras.models.load_model(model_path + "/" + name + ".h15")
  lstm_15m = keras.models.load_model(model_path + "/lstm_15m.h15")
  print ("models loaded")  

else:
  print ("Not loading")
  


loading: lstm_cv
loading: lstm_att_cv
loading: lstm_cv_rvi
loading: svm_cv
models loaded


# Train models


In [59]:
histories = {}
training_filter = model_config["training_filter"];
if train_models:
  build_and_stash_all_config_models()
  for name in model_config["day_bar_models"]:
    if (len(training_filter)!=0 and not name in training_filter):
      print("skipping: ", name)
      continue
    print("training: ", name)
    # todo - we need a training set type flag in config for 15m models to get the correct files/data
    training_dfs = get_training_datasets_for_model(name)
    for df in training_dfs:
      print("training: ", name)
      history = train_config_model_against_df(name, model_config, df)
      if not name in histories:
        histories[name] = []
      histories[name].append(history)
  lstm_15m = build_15m_model()

newing:  lstm_cv
newing:  lstm_att_cv
newing:  lstm_cv_rvi
newing:  svm_cv
training:  lstm_cv
lstm_cv  should train on stocks
training:  lstm_cv
Data prep for: lstm_cv
training: lstm_cv


KeyboardInterrupt: ignored

## Train a single model against a single dataset

In [9]:
#build_and_stash_all_config_models()
#coin_dfs = get_raw_data_for_coin_list(180)

In [10]:
#[model, history] = train_config_model_against_df("lstm_cv", model_config, coin_dfs["FCON-USDT"])
#models["lstm_cv"] = model
#df_raw = get_coin_data_frames(180, "FCON-USDT")
#test_name = "lstm_ohlc"
#all_columns = model_config["column_sets"][test_name]+gbl_target_column
#[scaled_features, X, y, normal_features] = convert_to_training_dataset(df, columns=all_columns)  
#[p_scaled, p] = predict_trade(models[test_name], test_name, "FCON-USDT", X, all_columns)
#build_trade_model(p, normal_features, "FCON-USDT", test_name)

# Save all models to disk

In [11]:
if save_models:
  for name in model_config["day_bar_models"]:
    print("saving:", name)
    if name in model_config["load_type"] and model_config["load_type"][name] == "joblib":
      joblib.dump(models[name], model_path + "/" + name + ".joblib") 
    else:
      models[name].save(model_path + "/" + name + ".h15")
  lstm_15m.save(model_path + "/lstm_15m.h15")
  print ("models saved")  
else:
  print ("Not saving")


Not saving


# Visualize and Compare all Models

In [29]:
#data
df_raw = get_coin_data_frames(60, "DOGE3L-USDT")
[scaled_features, X, y, normal_features] = convert_to_training_dataset(df_raw, columns=gbl_all_columns)  

df_chart = pd.DataFrame();
normal_features.reset_index(inplace=True)
df_chart["Date"] = normal_features["Date"]
scaled_close = X[:,[0]]
df_chart["Close"] = scaled_close
graph_columns = ["Date","Close"]

results = pd.DataFrame();

stuff = models.items()

#backtest_filter = model_config["backtest_filter"]
#backtest_filter = ["lstm_cv", "lstm_cv_rvi", "lstm_att_cv", "svm_cv"]
backtest_filter = ["lstm_att_cv"]
#backtest_filter = []
for key, model in stuff:
  

  if (len(backtest_filter)!=0 and not key in backtest_filter):
    print("skipping:", key)
    continue

  print("testing:", key)
  graph_columns.append(key)
  
  # colums = todo, supply columns to each and pluck unwanted so this
  columns = model_config["column_sets"][key]
  fields = list(range(len(columns)))

  # todo recompair model orig
  x = X[:,fields]
  predicted = model.predict(x).flatten()

  mse = mean_squared_error(scaled_close, predicted)
  print("predicted mse for model: ", key, mse)
  
  [df_profit, profit] = build_profit_estimate(predicted, normal_features)
  print("profit for model: ", key, profit)

  results = results.append([[key, mse, profit]])

  df_chart[key] = predicted

interactive_plot(df_chart[graph_columns], "wow") 
results

skipping: lstm_cv
testing: lstm_att_cv
predicted mse for model:  lstm_att_cv 0.0019416673260038795
profit for model:  lstm_att_cv 4797.864928457733
skipping: lstm_cv_rvi
skipping: svm_cv


Unnamed: 0,0,1,2
0,lstm_att_cv,0.001942,4797.864928


In [None]:
df_profit

# What has a buy indicator for tomorrow?

In [None]:
# Fetch the top 10 and see if they predict up
df_products = get_all_products()
df_products = df_products[df_products.id.str.endswith('USDT')]

# we have a desired budget of 10k in trading capital to deploy. 
# "volValue": is the 24h total, the trading volume in quote currency of last 24 hours
# Any markets we enter need to have a signifcantly higher trading value volume otherwise
# we can't really trade there without breaking things up. As we grow our strat here will need to change.
# for now we cut down our set to 1m in volVal
volumeCutoff = 500000
df_products["volValue"] = [float(x) for x in df_products['volValue']]
df_products = df_products[df_products["volValue"] > volumeCutoff]
df_products = df_products.sort_values(by = ['id'])

models_in_play = [
        "lstm_cv", 
        "lstm_att_cv", 
        "lstm_cv_rvi",
        "svm_cv"]
if coin_base:
  df_products = df_products[df_products.trading_disabled == False]
  df_products = df_products[df_products.cancel_only == False]

df_trades = pd.DataFrame();
df_estc = pd.DataFrame(); #expected short term closes
df_estc["Product"] = [];
df_estc["Est Close"] = [];
df_estc["Est Close Raw"] = [];
bars_long = 180
bars_short = 30
counter = 0;
print("iterating through:", len(df_products))
tries = 3

def downloadAndPredict(all_trades, product, length):
  print("download day bars: ", product, " bar set:", length)
  df_raw = get_coin_data_frames(length, product)
  for name in models_in_play:
    print("predicting trade:", name, "for", product)
    df_trade = predict_config_model_for_product(df_raw, name, product)
    df_trade["Period"] = [length]
    all_trades = all_trades.append(df_trade)
  return all_trades

def downloadAndPredict15(all_trades, product):
  print("download 15 bars: ", product)
  [predicted_scaled, predicted] = fetch_and_predict_short_term(lstm_15m,product)
  df2 = pd.DataFrame({'Product': [product], 'Est Close': [predicted], 'Est Close Raw': predicted_scaled})
  all_trades = all_trades.append(df2)
  return all_trades


for index, row in df_products.iterrows():
    
    loop = True
    count = 0
    while(loop):
      try:
        print("start day long")
        df_trades = downloadAndPredict(df_trades, row.id, bars_long)
        time.sleep(1)
        
        print("start day short")
        df_trades = downloadAndPredict(df_trades, row.id, bars_short)
        time.sleep(1)

        print("start 15m")
        df_estc = downloadAndPredict15(df_estc, row.id)
        time.sleep(1)
        loop = False
      except Exception as inst:
        print("Error: ", inst)
        time.sleep(1)
        count = count+1
        if count>tries:
          loop = False

df_trades.reset_index()
df_trades_filtered = df_trades.loc[(df_trades["Period"] == 180) & (df_trades["MSE"] < 0.0005) | (df_trades["Period"] == 30) & (df_trades["MSE"] < 0.005)]
df_buys = df_trades_filtered[df_trades_filtered['Move'] > 0] 
df_shorts = df_trades_filtered[df_trades_filtered['Move'] < 0] 
df_weighted = df_trades_filtered.groupby("Product").apply(consensus_percent)
df_weighted = df_weighted.rename("Consensus Percent")
df_trades_final = pd.merge(df_trades_filtered, df_weighted, left_on='Product', right_index=True)
df_weighted = df_trades_filtered.groupby("Product").apply(consensus_prediction)
df_weighted = df_weighted.rename("Consensus Prediction")
df_trades_final = pd.merge(df_trades_final, df_weighted, left_on='Product', right_index=True)
df_view = df_trades_final[["Product",  "Model Name",  "MSE", "Period", "Close", "Predicted", "Percent","Consensus Percent", "Consensus Prediction"]]
df_view = df_trades_final.sort_values(by=["Consensus Percent", "MSE"], ascending=[True, False])

In [53]:
df_trades_filtered = df_trades.loc[(df_trades["Period"] == 180) & (df_trades["MSE"] < 0.0005) | (df_trades["Period"] == 30) & (df_trades["MSE"] < 0.002)]
df_buys = df_trades_filtered[df_trades_filtered['Move'] > 0] 
df_shorts = df_trades_filtered[df_trades_filtered['Move'] < 0] 

df_weighted = df_trades_filtered.groupby("Product").apply(consensus_percent)
df_weighted = df_weighted.rename("Consensus Percent")
df_trades_final = pd.merge(df_trades_filtered, df_weighted, left_on='Product', right_index=True)

df_weighted = df_trades_filtered.groupby("Product").apply(consensus_prediction)
df_weighted = df_weighted.rename("Consensus Prediction")
df_trades_final = pd.merge(df_trades_final, df_weighted, left_on='Product', right_index=True)

df_weighted = df_trades_filtered.groupby("Product").apply(consensus_overall)
df_weighted = df_weighted.rename("Consensus Score")
df_trades_final = pd.merge(df_trades_final, df_weighted, left_on='Product', right_index=True)


df_view = df_trades_final[["Product",  "Model Name",  "MSE", "Period", "Close", "Predicted", "Percent","Consensus Percent", "Consensus Prediction", "Consensus Score"]]
df_view = df_view.sort_values(by=["Consensus Percent", "MSE"], ascending=[False, True])

df_view

Unnamed: 0_level_0,Product,Model Name,MSE,Period,Close,Predicted,Percent,Consensus Percent,Consensus Prediction,Consensus Score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-13,GALAX3S-USDT,lstm_att_cv,0.000469,180,0.038400,0.889748,2217.052967,2217.052967,0.889748,
2023-01-13,MANA3S-USDT,svm_cv,0.001479,30,0.380500,0.912971,139.939754,149.305741,0.948608,94.448054
2023-01-13,MANA3S-USDT,lstm_att_cv,0.001769,30,0.380500,0.987583,159.548768,149.305741,0.948608,94.448054
2023-01-13,AVAX3L-USDT,lstm_att_cv,0.000248,180,0.000146,0.000332,126.997022,135.036912,0.000344,94.782413
2023-01-13,AVAX3L-USDT,svm_cv,0.000336,180,0.000146,0.000358,144.388269,135.036912,0.000344,94.782413
...,...,...,...,...,...,...,...,...,...,...
2023-01-13,AGIX-USDT,svm_cv,0.000250,180,0.160880,0.140818,-12.470124,-12.061753,0.141475,98.055126
2023-01-13,AGIX-USDT,lstm_att_cv,0.000251,180,0.160880,0.136514,-15.145231,-12.061753,0.141475,98.055126
2023-01-13,AGIX-USDT,lstm_cv,0.001505,30,0.160880,0.144271,-10.323822,-12.061753,0.141475,98.055126
2023-01-13,AGIX-USDT,lstm_cv_rvi,0.001527,30,0.160880,0.144800,-9.995174,-12.061753,0.141475,98.055126


In [20]:
df_estc

Unnamed: 0,Product,Est Close,Est Close Raw
0,1EARTH-USDT,0.005297,0.364395
0,AAVE-USDT,76.767961,0.393722
0,ACE-USDT,0.007650,0.399487
0,ADA-USDT,0.346649,0.398091
0,ADA3L-USDT,0.574921,0.389640
...,...,...,...
0,XTZ-USDT,0.993724,0.405590
0,XYO-USDT,0.006841,0.366588
0,YFI-USDT,7031.025073,0.393449
0,ZEC-USDT,45.843459,0.413159


In [30]:
df_trades[df_trades["Product"] == "XDB-USDT"]

Unnamed: 0_level_0,Close,Volume,Target,Predicted_Scaled,Predicted,Product,Model Name,Move,MSE,Percent,RawPercent,250Fees,5kFees,10kFees,250Profit,5kProfit,10k0Profit,Period,RVI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-01-13,0.002521,389989833.447,0.002364,0.027388,0.002945,XDB-USDT,lstm_cv,0.000424,0.000483,16.810579,0.168106,2.0,40.0,50.0,40.026448,800.528953,1631.057907,180,
2023-01-13,0.002521,389989833.447,0.002364,0.019626,0.002735,XDB-USDT,lstm_att_cv,0.000214,0.000102,8.495408,0.084954,2.0,40.0,50.0,19.238519,384.770377,799.540753,180,
2023-01-13,0.002521,389989833.447,0.002364,0.028107,0.002964,XDB-USDT,lstm_cv_rvi,0.000443,0.000445,17.580839,0.175808,2.0,40.0,50.0,41.952098,839.04195,1708.083901,180,52.232888
2023-01-13,0.002521,389989833.447,0.002364,0.029437,0.003,XDB-USDT,svm_cv,0.000479,0.000254,19.005966,0.19006,2.0,40.0,50.0,45.514914,910.298278,1850.596556,180,
2023-01-13,0.002521,389989833.447,0.002364,0.845276,0.002475,XDB-USDT,lstm_cv,-4.6e-05,0.00309,-1.821784,-0.018218,2.0,40.0,50.0,-6.55446,-131.089204,-232.178408,30,
2023-01-13,0.002521,389989833.447,0.002364,0.7946,0.002459,XDB-USDT,lstm_att_cv,-6.2e-05,0.003827,-2.463795,-0.024638,2.0,40.0,50.0,-8.159488,-163.189754,-296.379508,30,
2023-01-13,0.002521,389989833.447,0.002364,0.848851,0.002476,XDB-USDT,lstm_cv_rvi,-4.5e-05,0.002274,-1.776489,-0.017765,2.0,40.0,50.0,-6.441222,-128.824439,-227.648878,30,65.468982
2023-01-13,0.002521,389989833.447,0.002364,0.829376,0.00247,XDB-USDT,svm_cv,-5.1e-05,0.003245,-2.023213,-0.020232,2.0,40.0,50.0,-7.058032,-141.160636,-252.321273,30,


In [16]:
now = datetime.now()
today = now.strftime("%Y-%m-%d")

if coin_base:
  token = "cb"
else:
  token = "ku"

df_buys.to_csv(data_path+"/buy-" + token + "-" + today + ".csv")
df_estc.to_csv(data_path+"/15m-" + token + "-" + today + ".csv")
df_shorts.to_csv(data_path+"/shorts-" + token + "-" + today + ".csv")
df_trades_final.to_csv(data_path+"/final-" + token + "-" + today + ".csv")

In [18]:
df_estc

Unnamed: 0,Product,Est Close,Est Close Raw
0,1EARTH-USDT,0.005297,0.364395
0,AAVE-USDT,76.767961,0.393722
0,ACE-USDT,0.007650,0.399487
0,ADA-USDT,0.346649,0.398091
0,ADA3L-USDT,0.574921,0.389640
...,...,...,...
0,XTZ-USDT,0.993724,0.405590
0,XYO-USDT,0.006841,0.366588
0,YFI-USDT,7031.025073,0.393449
0,ZEC-USDT,45.843459,0.413159
