#IMPORT DATASETS AND LIBRARIES


In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import plotly.express as px
from copy import copy
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
import plotly.figure_factory as ff
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
import requests
from requests.exceptions import HTTPError
import json as js
from datetime import datetime, timedelta 
import time
from os.path import exists


# Things to try

* Training on more than the sp500 - can we train on all daily stocks?
* Can we get more granular data from yahoo and train on that?
* Can we train on more than just close - open, high, low, close - would that help?
* How do we measure training accuracy? if we try different things how do we know whats good?
* N Bar Volatility as a feature?
* Can we do anything with correlation? How correlated are the markets?

#Library

In [32]:
# Function to plot interactive plots using Plotly Express
sc = MinMaxScaler()
REST_API = 'https://api.pro.coinbase.com'
PRODUCTS = REST_API+'/products'

def interactive_plot(df, title):
  fig = px.line(title = title)
  for i in df.columns[1:]:
    fig.add_scatter(x = df['Date'], y = df[i], name = i)
  fig.show()

def get_single_stock(price_df, vol_df, name):
    return pd.DataFrame({'Date': price_df['Date'], 'Close': price_df[name], 'Volume': vol_df[name]})

def scale_data(data):
  # Scale the data
  scaled_data = sc.fit_transform(data)
  return scaled_data

def sort_date(pric_df):
  pric_df = pric_df.sort_values(by = ['Date'])
  return pric_df

def append_price_dif(df):
  df['Target'] = df['Close'].shift(-1)
  df['Diff'] = df['Target'] - df['Close']
  df = df[:-1]
  return df

def append_price_dif_(df):
  df['Target'] = df['Close'].shift(-1)
  df['Diff'] = df['Target'] - df['Close']
  return df

def append_15d_slope(df):
  df['15Close'] = df['Close'].shift(15)
  df['15Date'] = df['Date'].shift(15)
  df['Trend'] = (df['Close'] - df['15Close']) / 15
  df = df[15:]
  return df

def show_plot(data, title):
  plt.figure(figsize = (13, 5))
  plt.plot(data, linewidth = 3)
  plt.title(title)
  plt.grid()

def build_model(features, outcomes):
  # Create the model
  inputs = keras.layers.Input(shape=(features,outcomes))
  x = keras.layers.LSTM(150, return_sequences= True)(inputs)
  x = keras.layers.Dropout(0.3)(x)
  x = keras.layers.LSTM(150, return_sequences=True)(x)
  x = keras.layers.Dropout(0.3)(x)
  x = keras.layers.LSTM(150)(x)
  outputs = keras.layers.Dense(1, activation='linear')(x)

  model = keras.Model(inputs=inputs, outputs=outputs)
  model.compile(optimizer='adam', loss="mse")
  return model

def connect(url, params):   
  response = requests.get(url,params)
  response.raise_for_status()
  return response

def coinbase_json_to_df(delta, product, granularity='86400'):
  start_date = (datetime.today() - timedelta(seconds=delta*int(granularity))).isoformat()
  end_date = datetime.now().isoformat()
  # Please refer to the coinbase documentation on the expected parameters
  params = {'start':start_date, 'end':end_date, 'granularity':granularity}
  response = connect(PRODUCTS+'/' + product + '/candles', params)
  response_text = response.text
  df_history = pd.read_json(response_text)
  # Add column names in line with the Coinbase Pro documentation
  df_history.columns = ['time','low','high','open','close','volume']
  return df_history

def get_coin_data_frames(time, product, granularity='86400'):
  df_raw = coinbase_json_to_df(time, product, granularity)
  df_btc_history = df_raw
  if len(df_btc_history.index) == 0:
    print("No data for ", product)
  df_btc_history['time'] = [datetime.fromtimestamp(x) for x in df_btc_history['time']]
  df_btc_history = df_btc_history.rename(columns={"time":"Date", "close":"Close", "volume":"Volume"})
  df_btc_history = sort_date(df_btc_history)
  df_btc_history = df_btc_history.drop(columns={"high", "low", "open"})
  df_btc_history = append_price_dif_(df_btc_history)
  df_btc_history = append_15d_slope(df_btc_history)
  df_btc_features = df_btc_history[["Close", "Volume", "Trend"]]
  df_history_scaled = sc.fit_transform(df_btc_features)
  return [df_btc_history, df_btc_features, df_history_scaled, df_raw]

def build_profit_estimate(predicted, df_btc_history):
  df_predicted_chart = pd.DataFrame();
  df_predicted_chart["Date"] = df_btc_history["Date"]
  df_predicted_chart["Predicted"] = predicted
  df_predicted_chart["Predicted-Target"] = df_predicted_chart["Predicted"].shift(-1)
  df_predicted_chart["Predicted-Diff"] = df_predicted_chart["Predicted-Target"] - df_predicted_chart["Predicted"]
  df_predicted_chart["Should-Trade"] = np.where(df_predicted_chart["Predicted-Diff"] > 0, True, False)
  df_predicted_chart["RealDiff"] = df_btc_history["Diff"]
  df_predicted_chart["Percent"] = df_predicted_chart["RealDiff"] / df_btc_history["Close"]
  df_predicted_chart["Profit"] = np.where(df_predicted_chart["Should-Trade"] > 0, df_predicted_chart["Percent"] * budget, 0)
  profit = df_predicted_chart["Profit"].sum()
  return [df_predicted_chart, profit]

def debug_prediction_frame(predicted, df_history, df_history_scaled):
  df_predicted_chart = pd.DataFrame();
  df_predicted_chart["Date"] = df_history["Date"]
  df_predicted_chart["Predicted"] = predicted
  df_predicted_chart["Original"] = df_history_scaled[:,0]
  df_predicted_chart["Original-Target"] = df_history_scaled[:,2]
  df_predicted_chart["Target-Date"] = df_predicted_chart["Date"].shift(-1)
  df_predicted_chart["Predicted-Diff"] = df_predicted_chart["Predicted"] - df_predicted_chart["Original"]
  df_predicted_chart["Actual-Diff"] = df_predicted_chart["Original-Target"] - df_predicted_chart["Original"]
  df_predicted_chart["Should-Trade"] = np.where(df_predicted_chart["Predicted-Diff"] > 0, True, False)
  df_predicted_chart["Close"] = df_history["Close"]
  df_predicted_chart["Target"] = df_history["Target"]
  df_predicted_chart["RealDiff"] = df_history["Diff"]
  df_predicted_chart["Percent"] = df_predicted_chart["RealDiff"] / df_predicted_chart["Close"]
  df_predicted_chart["Profit"] = np.where(df_predicted_chart["Should-Trade"] > 0, df_predicted_chart["Percent"] * budget, 0)
  return df_predicted_chart

def get_all_products():
  response = connect(PRODUCTS, {})
  response_text = response.text
  df_products = pd.read_json(response_text)
  return df_products

def predict_trade(product, bars):
  [df_full, df_features, npa_scaled, df_raw] = get_coin_data_frames(bars, product)
  predicted = model.predict(npa_scaled).flatten()
  
  #convert to data frames that have the correct shape for being unscaled
  df_scaled = pd.DataFrame(npa_scaled, columns = ["Close", "Volume", "Trend"])
  
  # I want to believe that scaling happens on a per column basis, we only care about
  # price here so we will dummy out volume and trend and use the scaler on it
  # this kinda sucks, if we add features we'll need to add them here for unscaling
  df_temp = pd.DataFrame(predicted, columns = ["Close"])
  df_temp["Volume"] = 0
  df_temp["Trend"] = 0
  
  # unscale them both
  df_temp = pd.DataFrame(sc.inverse_transform(df_temp), columns = ["Close", "Volume", "Trend"])
  df_trade = pd.DataFrame(sc.inverse_transform(df_scaled), columns = ["Close", "Volume", "Trend"])
  
  # add predicted
  df_trade["Predicted"] = df_temp["Close"]
  df_trade = df_trade.tail(1)

  # add the product, derive a move and percent
  df_trade["Product"] = row.id;
  df_trade["Move"] = df_trade["Predicted"] - df_trade["Close"]
  df_trade["Percent"] = (df_trade["Move"] / df_trade["Close"]) * 100
  df_trade["RawPercent"] = df_trade["Move"] / df_trade["Close"]
  df_trade["250Fees"] = (250 * 0.004) * 2
  df_trade["5kFees"] = (5000 * 0.004) * 2
  df_trade["10kFees"] = (10000 * 0.0025) * 2
  df_trade["250Profit"] = (250 * df_trade["RawPercent"]) - df_trade["250Fees"] 
  df_trade["5kProfit"] = (5000 * df_trade["RawPercent"]) - df_trade["5kFees"]
  df_trade["10k0Profit"] = (10000 * df_trade["RawPercent"]) - df_trade["10kFees"]
  return df_trade

# Training

## Scale

In [4]:
model = None
model_path = "/content/drive/My Drive/model_ohlc.h5"
file_exists = exists(model_path)
if file_exists:
  print("hello")
  model = keras.models.load_model(model_path)
else:
  print("nope")
  all_stocks_price_df = sort_date(pd.read_csv('/content/drive/My Drive/Colab Notebooks/stock.csv'))
  all_stocks_vol_df = sort_date(pd.read_csv("/content/drive/My Drive/Colab Notebooks/stock_volume.csv"))
  target_df = get_single_stock(all_stocks_price_df, all_stocks_vol_df, "sp500")
  target_df = append_price_dif(target_df)
  target_df = append_15d_slope(target_df)
  features = target_df[["Close", "Volume", "Trend", "Target"]]
  scaled_features = scale_data(features)

hello


## Separate

In [5]:
if model == None:
  # Read stock prices data
  num_features = 3

  X = []
  y = []
  for i in range(0, len(target_df)):
      X.append(scaled_features [i][0:num_features])
      y.append(scaled_features [i][num_features])

  scaled_features[0]

## Actually, train

In [6]:
if model == None:
  X = np.asarray(X)
  y = np.asarray(y)

  # Split the data
  #split = int(0.7 * len(X))
  #X_train = X[:split]
  #y_train = y[:split]
  #X_test = X[split:]
  #y_test = y[split:]

  # Reshape the 1D arrays to 3D arrays to feed in the model
  X_train = np.reshape(X, (X.shape[0], X.shape[1], 1))
  model = build_model(num_features, 1)
  history = model.fit(
      X_train, y,
      epochs = 20,
      batch_size = 32,
      validation_split = 0.2
  )

# Backtest the coin in question

In [7]:
budget = 5000
[btc_history, df_btc_features, df_history_scaled, df_raw] = get_coin_data_frames(90, "SYN-USD")
predicted = model.predict(df_history_scaled).flatten()
[df_profit, profit] = build_profit_estimate(predicted, btc_history)
df_chart = debug_prediction_frame(predicted, btc_history, df_history_scaled)
interactive_plot(df_chart[["Date","Original", "Predicted"]], "Wtf")
print("Profit:", profit)
df_chart




Profit: 6984.653938147559


Unnamed: 0,Date,Predicted,Original,Original-Target,Target-Date,Predicted-Diff,Actual-Diff,Should-Trade,Close,Target,RealDiff,Percent,Profit
74,2022-10-11,0.815351,0.892473,0.469388,2022-10-12,-0.077122,-0.423085,False,1.090,1.030,-0.060,-0.055046,0.000000
73,2022-10-12,0.757511,0.811828,0.420408,2022-10-13,-0.054317,-0.391420,False,1.030,0.966,-0.064,-0.062136,0.000000
72,2022-10-13,0.722392,0.725806,0.266667,2022-10-14,-0.003414,-0.459140,False,0.966,0.963,-0.003,-0.003106,0.000000
71,2022-10-14,0.685509,0.721774,0.212245,2022-10-15,-0.036265,-0.509529,False,0.963,0.973,0.010,0.010384,0.000000
70,2022-10-15,0.682548,0.735215,0.337415,2022-10-16,-0.052667,-0.397800,False,0.973,0.979,0.006,0.006166,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,2022-12-20,0.046811,0.051075,0.555102,2022-12-21,-0.004264,0.504027,False,0.464,0.426,-0.038,-0.081897,0.000000
3,2022-12-21,0.006309,0.000000,0.508844,2022-12-22,0.006309,0.508844,True,0.426,0.518,0.092,0.215962,1079.812207
2,2022-12-22,0.153569,0.123656,0.691156,2022-12-23,0.029913,0.567501,True,0.518,0.514,-0.004,-0.007722,-38.610039
1,2022-12-23,0.219369,0.118280,0.620408,2022-12-24,0.101090,0.502129,True,0.514,0.483,-0.031,-0.060311,-301.556420


In [8]:
#interactive_plot(df_predicted_chart[["Date", "Predicted", "Original"]], "Original Vs Prediction")
#df_predicted_chart[["Date", "Close", "Target", "Percent", "RealDiff", "Bad-Trade", "Profit"]]


# Which coins are most profitable based on the above trading signals?

In [9]:
# download all known products and check who has the highest profit in 90 days
"""
# Fetch the top 10 and see if they predict up
df_products = get_all_products()
df_products = df_products[df_products.id.str.endswith('USD')]
df_products = df_products[df_products.trading_disabled == False]
df_products = df_products[df_products.cancel_only == False]
df_profit = pd.DataFrame();
df_profit["Product"] = [];
df_profit["Profit"] = [];
for index, row in df_products.iterrows():
  try:
    print("fetching: ", row.id)
    [df_full, df_features, df_scaled] = get_coin_data_frames_test(90, row.id)
    predicted = model.predict(df_scaled.flatten())
    [df_chart, profit] = build_profit_estimate(predicted, df_full)
    df_profit.loc[len(df_profit.index)] = [row.id, profit] 
  except Exception as inst:
    print("Error: ", inst)
  time.sleep(1)

df_profit
"""

'\n# Fetch the top 10 and see if they predict up\ndf_products = get_all_products()\ndf_products = df_products[df_products.id.str.endswith(\'USD\')]\ndf_products = df_products[df_products.trading_disabled == False]\ndf_products = df_products[df_products.cancel_only == False]\ndf_profit = pd.DataFrame();\ndf_profit["Product"] = [];\ndf_profit["Profit"] = [];\nfor index, row in df_products.iterrows():\n  try:\n    print("fetching: ", row.id)\n    [df_full, df_features, df_scaled] = get_coin_data_frames_test(90, row.id)\n    predicted = model.predict(df_scaled.flatten())\n    [df_chart, profit] = build_profit_estimate(predicted, df_full)\n    df_profit.loc[len(df_profit.index)] = [row.id, profit] \n  except Exception as inst:\n    print("Error: ", inst)\n  time.sleep(1)\n\ndf_profit\n'

# What has a buy indicator for tomorrow?

In [35]:
# Fetch the top 10 and see if they predict up
df_products = get_all_products()
df_products = df_products[df_products.id.str.endswith('USD')]
df_products = df_products[df_products.trading_disabled == False]
df_products = df_products[df_products.cancel_only == False]
df_trades = pd.DataFrame();
bars = 90
counter = 0;
for index, row in df_products.iterrows():
  try:
    print("fetching: ", row.id)
    
    df_trade = predict_trade(row.id, bars)
   
    # we need to unscale the predicted values so that we have an entry and exit point
    # entry should be roughly close and exit should be roughly predicted

    # Stick this on the end of the main dataframe
    df_trades = df_trades.append(df_trade);
    
    #counter+=1
    #if counter > 5:
    #  break
  except Exception as inst:
    #raise inst
    print("Error: ", inst)
  time.sleep(1)
df_trades.reset_index()
df_buys = df_trades[df_trades['Move'] > 0] 
df_shorts = df_trades[df_trades['Move'] < 0] 




fetching:  BICO-USD
fetching:  SHPING-USD
fetching:  BUSD-USD
fetching:  FORT-USD
fetching:  BIT-USD
fetching:  WAMPL-USD
fetching:  RAI-USD
fetching:  CGLD-USD
fetching:  API3-USD
fetching:  CVC-USD
fetching:  QUICK-USD
fetching:  INJ-USD
fetching:  BAL-USD
fetching:  AGLD-USD
fetching:  UNI-USD
fetching:  ABT-USD
fetching:  SYN-USD
fetching:  DDX-USD
fetching:  GFI-USD
fetching:  OP-USD
fetching:  CLV-USD
fetching:  LOKA-USD
fetching:  PLA-USD
fetching:  RAD-USD
fetching:  AIOZ-USD
fetching:  METIS-USD
fetching:  INDEX-USD
fetching:  WCFG-USD
fetching:  POLY-USD
fetching:  EGLD-USD
fetching:  MATIC-USD
fetching:  FET-USD
fetching:  ATA-USD
fetching:  JUP-USD
fetching:  NMR-USD
fetching:  MNDE-USD
fetching:  BNT-USD
fetching:  ELA-USD
fetching:  DAI-USD
fetching:  SUPER-USD
fetching:  ASM-USD
fetching:  REP-USD
fetching:  GRT-USD
fetching:  GST-USD
fetching:  BCH-USD
fetching:  YFI-USD
fetching:  PYR-USD
fetching:  COTI-USD
fetching:  KRL-USD
fetching:  00-USD
fetching:  MAGIC-USD
fet

In [34]:
df_trade = predict_trade("SUKU-USD", 90)
df_trade



Unnamed: 0,Close,Volume,Trend,Predicted,Product,Move,Percent,RawPercent,250Fees,5kFees,10kFees,250Profit,5kProfit,10k0Profit
74,0.0425,10317798.0,-0.000547,0.042937,BIT-USD,0.000437,1.027079,0.010271,2.0,40.0,50.0,0.567697,11.353939,52.707878


In [12]:
path = '/content/drive/My Drive/output.csv'
with open(path, 'w', encoding = 'utf-8-sig') as f:
  df_trades.to_csv(f)

In [37]:
 model.save(model_path)

In [36]:
df_buys

Unnamed: 0,Close,Volume,Trend,Predicted,Product,Move,Percent,RawPercent,250Fees,5kFees,10kFees,250Profit,5kProfit,10k0Profit
74,0.2565,2434708.8,-0.002953,0.272061,FIS-USD,0.015561,6.066777,0.060668,2.0,40.0,50.0,13.166942,263.338833,556.677667
74,0.0423,10417589.8,-0.00056,0.04276,SUKU-USD,0.00046,1.088182,0.010882,2.0,40.0,50.0,0.720454,14.409082,58.818165
74,5.55,71510.38,-0.112667,5.63563,LPT-USD,0.08563,1.542889,0.015429,2.0,40.0,50.0,1.857222,37.144437,104.288874
74,0.3549,2023294.8,0.003113,0.366901,DREP-USD,0.012001,3.381448,0.033814,2.0,40.0,50.0,6.453619,129.072378,288.144756
74,0.00366,54447520.8,-3.9e-05,0.003679,XYO-USD,1.9e-05,0.527316,0.005273,2.0,40.0,50.0,-0.68171,-13.634197,2.731606
74,0.3556,511391.56,-0.000867,0.359113,POLS-USD,0.003513,0.987958,0.00988,2.0,40.0,50.0,0.469894,9.397878,48.795756
45,0.5499,462331.33,0.00708,0.551214,WAXL-USD,0.001314,0.238949,0.002389,2.0,40.0,50.0,-1.402627,-28.052548,-26.105096
