<a href="https://colab.research.google.com/github/j03m/lstm-price-predictor/blob/main/coin_charts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Current Todo:


Some ideas here: 

* Use pandas_ta to attach technical data to timeseries then use random forest to see if any features are super good.

* Use vector_bt to back test your models

* Integrate the different time granularity models, 15m etc into the core configs

* Fix coinbase - we can't filter on volume amount without extra work

* Add TA fields, use random forest to check which fields are the best, verify with mean square error (this could take a while)

* Move to an "always" on model where we more closely monitor exits and entries. We can constantly check predictions and refine entry/exit points. Can we pick a better entry point? We had at some point discussed trying to predict all 4 values which would help. 

* Can we monitor and alarm/email the 15 min chart of an entry and see if it is
move toward or away from our target?

* pandas_ta strategy/back testers look interesting?

Read me: https://www.kaggle.com/code/vuhuyduongnia/vn30-stock-prediction-by-lstm-model-accuracy-90


#IMPORT DATASETS AND LIBRARIES


In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import sys

sys.path.insert(0,'/content/drive/My Drive/ml-trde-notebooks')


import warnings
import pandas as pd

# suppress warning message
warnings.filterwarnings("ignore", message="The frame.append method is deprecated")


Mounted at /content/drive


# Library

In [None]:
# ***** WARNING : Install deps - This will BUILD TALib and takes a while!
%run -i '/content/drive/My Drive/ml-trde-notebooks/installs.ipynb'

In [3]:
import yfinance as yf
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose
import plotly.graph_objs as go
import numpy as np
from scipy.stats import norm
from scipy.signal import find_peaks, peak_prominences

In [74]:
# Executes this notebook in our space, making all of its functions/globals available
%run -i '/content/drive/My Drive/ml-trde-notebooks/common.ipynb'
train_models = False
save_models = False
load_models = True


Modified
1680882497
1680882497.0
1680882497.0


# Load all models from disk

In [5]:
if load_models:
  load_all_models()
else:
  print ("Not loading")
  


loading: lstm_cv
models loaded


In [162]:
def scale_to_price(series, df):
    price_range = [df['Close'].min(), df['Close'].max()]
    scaled_series = (series - series.min()) / (series.max() - series.min()) * (price_range[1] - price_range[0]) + price_range[0]
    return scaled_series

def plot(df, trend, prob_above_trend, prob_below_trend, model, df_durations):

  # Scale probabilities to the same range as the original time series
  scaled_prob_above_trend =  pd.Series(scale_to_price(prob_above_trend, df)) #.rolling(window=30, center=True).mean()
  scaled_prob_below_trend =  pd.Series(scale_to_price(prob_below_trend, df)) #.rolling(window=30, center=True).mean() 


  line_index = df.tail(len(trend)).index
  # Plot time series with trend, scaled seasonal component, and seasonal component trend
  
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=df.index, y=df["Close"], mode="lines", name="Value"))
  fig.add_trace(go.Scatter(x=line_index, y=trend, mode="lines", name="Trend"))
  #fig.add_trace(go.Scatter(x=df.index, y=scaled_prob_above_trend, mode='lines', name='Prob Above Trend'))
  #fig.add_trace(go.Scatter(x=df.index, y=scaled_prob_below_trend, mode='lines', name='Prob Below Trend'))
  fig.add_trace(go.Scatter(x=df.index, y=df['high_prob_start'], mode='markers', name='Window Start', marker=dict(symbol='diamond', size=8, color='blue')))
  fig.add_trace(go.Scatter(x=df.index, y=df['cross_over_positive'], mode='markers', name='Up Cross', marker=dict(symbol='diamond', size=8, color='green')))
  fig.add_trace(go.Scatter(x=df.index, y=df['cross_over_negative'], mode='markers', name='Down Cross', marker=dict(symbol='diamond', size=8, color='red')))


  last_30_days_trend = trend[-30:]
  x = np.arange(1, 60)
  line_pred = model.predict(x.reshape(-1, 1))  
  new_index = pd.date_range(start=last_30_days_trend.index[0] + pd.DateOffset(days=1), end=df.index[-1] + pd.DateOffset(days=31))
  

  fig.add_trace(go.Scatter(
    x=new_index,
    y=line_pred,
    mode='lines',
    name='Linear Regression Line'))

  fig.update_layout(title='Time Series with Trend, Scaled Seasonal Component, and Probabilities', xaxis_title='Date', yaxis_title='Close Price')
  df["scaled_prob_above_trend"] = scaled_prob_above_trend
  df["scaled_prob_below_trend"] = scaled_prob_below_trend

  for i, row in df_durations.iterrows():
    pass
    #fig.add_trace(go.Scatter(x=[row['start'], row['end']], y=[i+1]*2, mode='lines', line=dict(color='red', width=row['duration']/2), name=f'Duration {row["duration"]}'))

  fig.show()


def calc_durations(df_raw):
  # get last index
  last_index = df_raw.iloc[-1].name

  # get teh first index that is the begining of a high probabilty window
  start_index = df_raw['high_prob_start'].first_valid_index()
  df_durations = pd.DataFrame(columns=['start', 'end', 'duration'])

  # loop through all high probability windows
  while start_index < last_index:
    start_pos = df_raw.index.get_loc(start_index)
    
    #loop through all indexes after the high probability window starts, searching for a cross to mark its end
    for index in df_raw.index[start_pos+1:]:
      cross1 = df_raw.loc[index, 'cross_over_positive']
      cross2 = df_raw.loc[index, 'cross_over_negative']
      
      #continue until one of these is not nan
      if np.isnan(cross1) and np.isnan(cross2):
          continue
    
      #we found a cross, calculate how far it was from the probability start
      duration = (index - start_index).days

      # Create a new row using a dictionary
      row = {'start': start_index, 'end': index, 'duration': duration}
      df_durations = pd.concat([df_durations, pd.DataFrame([row])], ignore_index=True)
      
      # once we find a cross, we need to exit. Get the position of the exit.
      start_pos = df_raw.index.get_loc(index)

      break

    # find the next high probability window start AFTER the exit
    start_index = df_raw['high_prob_start'].iloc[start_pos+1:].first_valid_index()

    if start_index == None:
      break

  # Create a box plot of the duration data
  return df_durations

def attach_markers(df_raw, trend, prob_above_trend):
  threshold = 0.8
  threshold_low = 0.2
  prob_above_trend = pd.Series(prob_above_trend, index=df_raw.index)
  high_prob_zones = (prob_above_trend > threshold) | (prob_above_trend < threshold_low)
  high_prob_starts = high_prob_zones[high_prob_zones == 1].index

  df_raw['high_prob_start'] = np.nan
  # Iterate over the high probability start dates
  for i, start_date in enumerate(high_prob_starts):
      df_raw.loc[start_date, 'high_prob_start'] = df_raw.loc[start_date, 'Close']
    
  # Calculate the sign of the difference between Close and trend at each point in time
  diff_sign = np.sign(trend - df_raw["Close"])

  # Take the difference of the sign values to detect when the sign changes
  cross_over = diff_sign.diff().fillna(0)

  # Detect when the sign changes from positive to negative or negative to positive
  cross_over_positive = (cross_over == -2).astype(int).diff().fillna(0)
  cross_over_negative = (cross_over == 2).astype(int).diff().fillna(0)

  # Create empty columns in df_raw
  df_raw['cross_over_positive'] = np.nan
  df_raw['cross_over_negative'] = np.nan

  # Set the values of the new columns based on cross_over_positive and cross_over_negative
  df_raw.loc[cross_over_positive == 1, 'cross_over_positive'] = df_raw.loc[cross_over_positive == 1, 'Close']
  df_raw.loc[cross_over_negative == 1, 'cross_over_negative'] = df_raw.loc[cross_over_negative == 1, 'Close']

  return df_raw

def estimate(current_btc, current_btc3s, start, end, est_btc):
    # Calculate the percentage change in the BTC price from the current price to the estimated price on end date
    btc_price_change = (est_btc - current_btc) / current_btc
    
    # Calculate the percentage change in the NAV of BTC3S based on the BTC price change and the leverage factor of the token
    nav_change = 3 * -btc_price_change
    
    # Calculate the potential NAV of BTC3S based on the current price and the NAV change
    potential_nav = current_btc3s * (1 + nav_change)
    
    # Calculate the number of days held between start and end
    days_held = (end - start).days
    
    # Calculate the management fee for the number of days held
    management_fee = 0.1 / 100 * days_held
    
    # Calculate the potential price of BTC3S on end date, taking into account the management fee
    potential_btc3s_price = potential_nav / (1 + management_fee)
    
    return potential_btc3s_price



# Visualize and Compare all Models

In [163]:
windows = [300, 600, 900, 1500]

#for window in windows:
window = 300
coin_base = False
ku_coin = True
df_raw = get_coin_data_frames(window, "BTC-USDT")


#[results, data, features, fig] = renderPredictions(df_raw, models, [], False)
#features = features.set_index("Date")
df_raw = df_raw.set_index("Date")
df_raw = df_raw.sort_index()
trend, prob_above_trend, prob_below_trend, volatility, model = generate_probability(df_raw)

df_raw = attach_markers(df_raw, trend, prob_above_trend)
df_durations = calc_durations(df_raw)
plot(df_raw, trend, prob_above_trend, prob_below_trend, model, df_durations)




# Set the start date and number of periods
start_date = pd.to_datetime('2023-03-17')
n_periods = 60

# Initialize the DataFrame with the start date
dates = [start_date + pd.DateOffset(days=i) for i in range(n_periods)]
df = pd.DataFrame({'date': dates})
df_durations = df_durations[df_durations["duration"]>=5]
durations = df_durations['duration'].values.tolist()

from scipy.stats import poisson
rate = np.mean(durations)

# Fit a Poisson distribution to the durations
poisson_dist = poisson(rate)

#mu, std = norm.fit(durations)

numbers = np.arange(1, 61)

probabilities = []
trend_values = []
percent = []
last_close = df_raw.iloc[-1]["Close"]
for number in numbers:
   
  probability = poisson_dist.pmf(number)
  probabilities.append(probability)
  trend_value = model.predict(np.array([[number]]))[0]
  trend_values.append(trend_value)
  percentage_change = (trend_value - last_close) / last_close * 100  
  percent.append(percentage_change)



# Calculate the probability of reverting to the mean for each date
df['probability'] = probabilities
df['Est Close'] = trend_values
df['Percent Change'] = percent
df

Unnamed: 0,date,probability,Est Close,Percent Change
0,2023-03-17,0.0,23724.23841,-14.931823
1,2023-03-18,0.0,23789.038564,-14.699469
2,2023-03-19,0.0,23853.838718,-14.467115
3,2023-03-20,2e-06,23918.638872,-14.23476
4,2023-03-21,8e-06,23983.439026,-14.002406
5,2023-03-22,2.9e-05,24048.23918,-13.770052
6,2023-03-23,9.3e-05,24113.039334,-13.537697
7,2023-03-24,0.000263,24177.839488,-13.305343
8,2023-03-25,0.00066,24242.639642,-13.072988
9,2023-03-26,0.00149,24307.439796,-12.840634
