In [None]:
%pip install scikit-learn 
%pip install pandas
%pip install yfinance
%pip install plotly
%pip install tqdm
%pip install openpyxl
%pip install matplotlib
%pip install tensorflow
%pip install keras

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
from keras.losses import MeanSquaredError
from sklearn.feature_selection import mutual_info_regression
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

In [2]:
btc_price_history = yf.download('BTC-USD', period='max')
nasdaq_price_history = yf.download('^IXIC', start='2010-01-01', end='2023-05-22')
sp500_price_history = yf.download('^GSPC',  start='2010-01-01', end='2023-05-22')
dji_price_history = yf.download('^DJI',  start='2010-01-01', end='2023-05-22')
dxy_price_history = yf.download('DX-Y.NYB', start='2010-01-01', end='2023-05-22')
gold_price_history = yf.download('GC=F', start='2010-01-01', end='2023-05-22')

btc_price_history.to_csv('btc_price_history.csv')
nasdaq_price_history.to_csv('nasdaq_price_history.csv')
sp500_price_history.to_csv('sp500_price_history.csv')
dji_price_history.to_csv('dji_price_history.csv')
dxy_price_history.to_csv('dxy_price_history.csv')
gold_price_history.to_csv('gold_price_history.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [3]:
btc = pd.read_csv('./btc_price_history.csv')
btc.columns = btc.columns.str.lower()
btc['date'] = pd.to_datetime(btc['date'])
btc['day_of_week'] = btc['date'].dt.dayofweek
btc['day_of_month'] = btc['date'].dt.day
btc['month'] = btc['date'].dt.month
btc['year'] = btc['date'].dt.year
btc['weekend'] = btc['day_of_week'].isin([5, 6]).astype(int)
btc.rename(columns={'open': 'btc_open', 'high': 'btc_high', 'low': 'btc_low', 'close': 'btc_close', 'volume': 'btc_volume'}, inplace=True)
btc = btc.drop('adj close', axis=1)

# other btc price downloaded from somewhere, has earlier dates like 2011.
#btc2 = pd.read_excel('./btc_price_history2.xlsx')
#btc2['date'] = pd.to_datetime(btc2['date'])

avg_transaction_fee = pd.read_excel('./avg_transaction_fee_usd.xlsx')
avg_transaction_fee['date'] = pd.to_datetime(avg_transaction_fee['date'], format='%d.%m.%Y')

ndaq = pd.read_csv('./nasdaq_price_history.csv')
ndaq.columns = ndaq.columns.str.lower()
ndaq = ndaq.drop(['adj close', 'open', 'high', 'low', 'volume'], axis=1)
ndaq['date'] = pd.to_datetime(ndaq['date'])
ndaq.rename(columns={'close': 'ndaq_close'}, inplace=True)

dji = pd.read_csv('./dji_price_history.csv')
dji.columns = dji.columns.str.lower()
dji = dji.drop(['adj close', 'open', 'high', 'low', 'volume'], axis=1)
dji['date'] = pd.to_datetime(dji['date'])
dji.rename(columns={'close': 'dji_close'}, inplace=True)

dxy = pd.read_csv('./dxy_price_history.csv')
dxy.columns = dxy.columns.str.lower()
dxy = dxy.drop(['adj close', 'open', 'high', 'low', 'volume'], axis=1)
dxy['date'] = pd.to_datetime(dxy['date'])
dxy.rename(columns={'close': 'dxy_close'}, inplace=True)

sp500 = pd.read_csv('./sp500_price_history.csv')
sp500.columns = sp500.columns.str.lower()
sp500 = sp500.drop(['adj close', 'open', 'high', 'low', 'volume'], axis=1)
sp500['date'] = pd.to_datetime(sp500['date'])
sp500.rename(columns={'close': 'sp500_close'}, inplace=True)

gold = pd.read_csv('./gold_price_history.csv')
gold.columns = gold.columns.str.lower()
gold = gold.drop(['adj close', 'open', 'high', 'low', 'volume'], axis=1)
gold['date'] = pd.to_datetime(gold['date'])
gold.rename(columns={'close': 'gold_close'}, inplace=True)

halvenings = pd.read_excel('./halvenings.xlsx')
halvenings['date'] = pd.to_datetime(halvenings['date'])

# halvening 0	Jan 3rd	    2009
# halvening 1	Nov 28th	2012
# halvening 2	July 9th	2016
# halvening 3	May 11th	2020
# halvening 4	April 8th	2024
# daph, dunh = days away from previous halvening, days until next halvening

# Manually lead it 2 months. (Because in May, only data until March is available)
m2 = pd.read_excel('./m2.xlsx')
m2['date'] = pd.to_datetime(m2['date'])

total_bitcoins = pd.read_csv('./total_bitcoins.csv')
total_bitcoins['date'] = pd.to_datetime(total_bitcoins['date'])

new_moons = pd.read_excel('./new_moons.xlsx')
new_moons['date'] = new_moons['date'].apply(lambda x: ' '.join(x.split(' ')[:3]))
new_moons['date'] = pd.to_datetime(new_moons['date'], format='%Y %b %d').dt.date
new_moons['date'] = pd.to_datetime(new_moons['date'])

full_moons = pd.read_excel('./full_moons.xlsx')
full_moons['date'] = full_moons['date'].apply(lambda x: ' '.join(x.split(' ')[:3]))
full_moons['date'] = pd.to_datetime(full_moons['date'], format='%Y %b %d').dt.date
full_moons['date'] = pd.to_datetime(full_moons['date'])

btc_google_trends = pd.read_csv('./btc_google_trends.csv')
btc_google_trends['date'] = pd.to_datetime(btc_google_trends['date'])

In [4]:
X = pd.merge(btc, ndaq, on='date', how='left')
X = pd.merge(X, dji, on='date', how='left')
X = pd.merge(X, dxy, on='date', how='left')
X = pd.merge(X, sp500, on='date', how='left')
X = pd.merge(X, gold, on='date', how='left')
X = pd.merge(X, halvenings, on='date', how='left')
X = pd.merge(X, total_bitcoins, on='date', how='left')
X = pd.merge(X, new_moons, on='date', how='left')
X = pd.merge(X, full_moons, on='date', how='left')
X = pd.merge(X, m2, on='date', how='left')
# other btc price, dropped later for now
#X = pd.merge(X, btc2, on='date', how='left')
X = pd.merge(X, avg_transaction_fee, on='date', how='left')
X = pd.merge(X, btc_google_trends, on='date', how='left')

X['ndaq_close'] = X['ndaq_close'].fillna(method='ffill')
X['dji_close'] = X['dji_close'].fillna(method='ffill')
X['dxy_close'] = X['dxy_close'].fillna(method='ffill')
X['sp500_close'] = X['sp500_close'].fillna(method='ffill')
X['gold_close'] = X['gold_close'].fillna(method='ffill')

X['m2_lead_billions'] = X['m2_lead_billions'].fillna(method='ffill')
# Manual fills, must be in this order, and be careful (For 2014 beginning price data)!
X['m2_lead_billions'] = X['m2_lead_billions'].fillna(11429.9)

X['new_moon'] = X['new_moon'].fillna(0)
X['full_moon'] = X['full_moon'].fillna(0)

# Fill and then backfilll, must be in that order!
# total bitcoins had to do alot of imputing - won't need to if i get the full btc supply data (that has every day).
X['total_bitcoins'] = X['total_bitcoins'].interpolate(method='linear')
X['total_bitcoins'] = X['total_bitcoins'].fillna(method='bfill')

X['btc_google_trends'] = X['btc_google_trends'].fillna(method='ffill')
# Manual fills, must be in this order, and be careful (For 2014 beginning price data)!
X['btc_google_trends'] = X['btc_google_trends'].fillna(method='bfill')

# Manual fill (will need to fix later)
X['avg_transaction_fee_usd'] = X['avg_transaction_fee_usd'].fillna(4.29)

X['days'] = X.index + 1

# how to deal with things that only started in like 2016 or 2012 (Twitter, ETH, etc.) Features that didn't start until later. How impute?

In [5]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 100)

In [6]:
X

Unnamed: 0,date,btc_open,btc_high,btc_low,btc_close,btc_volume,day_of_week,day_of_month,month,year,weekend,ndaq_close,dji_close,dxy_close,sp500_close,gold_close,dafph,dunh,block_rewards,total_bitcoins,new_moon,full_moon,m2_lead_billions,avg_transaction_fee_usd,btc_google_trends,days
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,2,17,9,2014,0,4562.189941,17156.849609,84.699997,2001.569946,1234.400024,658,661,25.00,1.328680e+07,0.0,0.0,11429.9,0.072079,3.0,1
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,3,18,9,2014,0,4593.430176,17265.990234,84.320000,2011.359985,1225.699951,659,660,25.00,1.328680e+07,0.0,0.0,11429.9,0.073564,3.0,2
2,2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700,4,19,9,2014,0,4579.790039,17279.740234,84.800003,2010.400024,1215.300049,660,659,25.00,1.328680e+07,0.0,0.0,11429.9,0.067389,3.0,3
3,2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600,5,20,9,2014,1,4579.790039,17279.740234,84.800003,2010.400024,1215.300049,661,658,25.00,1.329150e+07,0.0,0.0,11429.9,0.065692,3.0,4
4,2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100,6,21,9,2014,1,4579.790039,17279.740234,84.800003,2010.400024,1215.300049,662,657,25.00,1.329620e+07,0.0,0.0,11429.9,0.064141,3.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3167,2023-05-20,26888.841797,27155.158203,26843.277344,27129.585938,7044911360,5,20,5,2023,1,12657.900391,33426.628906,103.199997,4191.979980,1978.699951,1104,324,6.25,1.937625e+07,0.0,0.0,20818.1,5.398034,12.0,3168
3168,2023-05-21,27118.423828,27265.917969,26706.921875,26753.826172,8647416921,6,21,5,2023,1,12657.900391,33426.628906,103.199997,4191.979980,1978.699951,1105,323,6.25,1.937625e+07,0.0,0.0,20818.1,4.005610,12.0,3169
3169,2023-05-22,26749.892578,27045.734375,26549.734375,26851.277344,11056770492,0,22,5,2023,0,12657.900391,33426.628906,103.199997,4191.979980,1978.699951,1106,322,6.25,1.937625e+07,0.0,0.0,20818.1,4.290000,12.0,3170
3170,2023-05-23,26855.960938,27434.683594,26816.179688,27225.726562,13697203143,1,23,5,2023,0,12657.900391,33426.628906,103.199997,4191.979980,1978.699951,1107,321,6.25,1.937625e+07,0.0,0.0,20818.1,4.290000,12.0,3171


In [7]:
X = pd.get_dummies(X, columns=['day_of_week', 'day_of_month', 'month', 'year', 'block_rewards'])

In [8]:
X_train = X[X.index < 2500].copy()
X_test = X[X.index >= 2500].copy()

In [9]:
X_train

Unnamed: 0,date,btc_open,btc_high,btc_low,btc_close,btc_volume,weekend,ndaq_close,dji_close,dxy_close,sp500_close,gold_close,dafph,dunh,total_bitcoins,new_moon,full_moon,m2_lead_billions,avg_transaction_fee_usd,btc_google_trends,days,day_of_week_0,day_of_week_1,day_of_week_2,day_of_week_3,day_of_week_4,day_of_week_5,day_of_week_6,day_of_month_1,day_of_month_2,day_of_month_3,day_of_month_4,day_of_month_5,day_of_month_6,day_of_month_7,day_of_month_8,day_of_month_9,day_of_month_10,day_of_month_11,day_of_month_12,day_of_month_13,day_of_month_14,day_of_month_15,day_of_month_16,day_of_month_17,day_of_month_18,day_of_month_19,day_of_month_20,day_of_month_21,day_of_month_22,day_of_month_23,day_of_month_24,day_of_month_25,day_of_month_26,day_of_month_27,day_of_month_28,day_of_month_29,day_of_month_30,day_of_month_31,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,year_2014,year_2015,year_2016,year_2017,year_2018,year_2019,year_2020,year_2021,year_2022,year_2023,block_rewards_6.25,block_rewards_12.5,block_rewards_25.0
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,0,4562.189941,17156.849609,84.699997,2001.569946,1234.400024,658,661,1.328680e+07,0.0,0.0,11429.9,0.072079,3.0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,0,4593.430176,17265.990234,84.320000,2011.359985,1225.699951,659,660,1.328680e+07,0.0,0.0,11429.9,0.073564,3.0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
2,2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700,0,4579.790039,17279.740234,84.800003,2010.400024,1215.300049,660,659,1.328680e+07,0.0,0.0,11429.9,0.067389,3.0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
3,2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600,1,4579.790039,17279.740234,84.800003,2010.400024,1215.300049,661,658,1.329150e+07,0.0,0.0,11429.9,0.065692,3.0,4,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
4,2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100,1,4579.790039,17279.740234,84.800003,2010.400024,1215.300049,662,657,1.329620e+07,0.0,0.0,11429.9,0.064141,3.0,5,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,2021-07-17,31397.308594,31935.945312,31223.990234,31533.068359,18895018942,1,14427.240234,34687.851562,92.690002,4327.160156,1814.500000,432,996,1.875900e+07,0.0,0.0,20432.4,2.178004,23.0,2496,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
2496,2021-07-18,31533.884766,32398.996094,31215.492188,31796.810547,18787986667,1,14427.240234,34687.851562,92.690002,4327.160156,1814.500000,433,995,1.876007e+07,0.0,0.0,20432.4,2.366778,23.0,2497,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
2497,2021-07-19,31800.011719,31885.859375,30563.734375,30817.832031,20434789545,0,14274.980469,33962.039062,92.889999,4258.490234,1808.699951,434,994,1.876088e+07,0.0,0.0,20432.4,2.673313,23.0,2498,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
2498,2021-07-20,30838.285156,31006.187500,29360.955078,29807.347656,23148267245,0,14498.879883,34511.988281,92.970001,4323.060059,1810.900024,435,993,1.876168e+07,0.0,0.0,20432.4,2.395533,23.0,2499,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0


In [None]:
# minmaxscaler/ maybe log1p too: prices/volumes
# dafph/dunh probably fine, avg_trans_fee probably fine. google trends probably fine.
# cyclically encode 'day_of_month', 'month' etc.? Could be better than One-hot encoding
# maybe get rid of 'day_of_week', just keep 'weekend'

In [None]:
# SCALING - EXPERIEMENT AND SEE WHICH ONES YOU NEED TO SCALE


# In many cases, you might want to apply both transformations. For example, you might first use log1p to make the distribution of data more symmetric, and then use MinMaxScaler to make sure that all input features are on a similar scale.
# You can Min-Max scaling for some columns, log1p scaling for others, and even both to a column if it helps. May need to log1p some of the data.

In [12]:
#y_train = X_train['btc_close']
#y_test = X_test['btc_close']
#y_train = np.array(y_train).reshape(-1, 1)
#y_test = np.array(y_test).reshape(-1, 1)

In [10]:
y_train = X_train['btc_close']
y_test = X_test['btc_close']

btc_close_scaler = MinMaxScaler()
btc_close_train = np.array(y_train).reshape(-1, 1)
btc_close_test = np.array(y_test).reshape(-1, 1)

btc_close_train_scaled = btc_close_scaler.fit_transform(btc_close_train)
btc_close_test_scaled = btc_close_scaler.transform(btc_close_test)

y_train = btc_close_train_scaled
y_test = btc_close_test_scaled

scaler = MinMaxScaler()

cols_to_scale = ["btc_open", "btc_high", "btc_low", "btc_volume", "ndaq_close", "dji_close", "dxy_close", "sp500_close", "gold_close", "total_bitcoins", "m2_lead_billions"] 

X_train.loc[:, cols_to_scale] = scaler.fit_transform(X_train[cols_to_scale])
X_test.loc[:, cols_to_scale] = scaler.transform(X_test[cols_to_scale])

drops = ['date', 'btc_close']
X_train = X_train.drop(drops, axis=1)
X_test = X_test.drop(drops, axis=1)

In [13]:
drops = ['btc_volume', 'weekend', 'ndaq_close', 'dji_close', 'dxy_close', 'sp500_close', 'gold_close', 'dafph', 'dunh', 'total_bitcoins', 'new_moon','full_moon','m2_lead_billions','avg_transaction_fee_usd','btc_google_trends', 'day_of_week_0', 'day_of_week_1', 'day_of_week_2','day_of_week_3','day_of_week_4','day_of_week_5','day_of_week_6','day_of_month_1','day_of_month_2','day_of_month_3','day_of_month_4','day_of_month_5','day_of_month_6','day_of_month_7','day_of_month_8','day_of_month_9','day_of_month_10','day_of_month_11','day_of_month_12','day_of_month_13','day_of_month_14','day_of_month_15','day_of_month_16','day_of_month_17','day_of_month_18','day_of_month_19','day_of_month_20','day_of_month_21','day_of_month_22','day_of_month_23','day_of_month_24','day_of_month_25','day_of_month_26','day_of_month_27','day_of_month_28','day_of_month_29','day_of_month_30','day_of_month_31', 'month_1','month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12','year_2014','year_2015','year_2016','year_2017','year_2018','year_2019','year_2020','year_2021','year_2022','year_2023', 'block_rewards_6.25','block_rewards_12.5','block_rewards_25.0']
X_train = X_train.drop(drops, axis=1)
X_test = X_test.drop(drops, axis=1)

In [16]:
X_train

Unnamed: 0,btc_open,btc_high,btc_low,days
0,0.004562,0.003967,0.004528,1
1,0.004420,0.003792,0.003894,2
2,0.003902,0.003343,0.003434,3
3,0.003438,0.003272,0.003520,4
4,0.003650,0.003104,0.003573,5
...,...,...,...,...
2495,0.492849,0.490697,0.500544,2496
2496,0.495005,0.497859,0.500407,2497
2497,0.499206,0.489922,0.489901,2498
2498,0.484024,0.476316,0.470513,2499


                            Making dataframes from 2D to 3D

In [17]:
def create_dataset(X, y, time_steps=1):
    features, target = [], []
    for i in range(len(X) - time_steps):
        slice = X[i:(i + time_steps)]
        features.append(slice)        
        target.append(y[i + time_steps])
    return np.array(features), np.array(target)

time_steps = 10

X_train, y_train = create_dataset(X_train, y_train, time_steps)
X_test, y_test = create_dataset(X_test, y_test, time_steps)

print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (2490, 10, 4)
y_train shape:  (2490, 1)
X_test shape:  (662, 10, 4)
y_test shape:  (662, 1)


                                    Actually making the LSTM model now

In [22]:
# Making/Trainig model here
##### Might not have needed to split data into 'train' and 'test', find out and ask GPT. #####

# Define the model
model = Sequential()

# First LSTM layer with dropout
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]), activation='tanh', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

# Second LSTM layer
model.add(LSTM(16, activation='tanh', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

# Dense layer
model.add(Dense(1, activation='linear'))

# Compile the model
optimizer = Adam(learning_rate=0.01)
model.compile(loss=MeanSquaredError(), optimizer=optimizer)

# Fit the model
history = model.fit(
    X_train, 
    y_train, 
    epochs=20, 
    batch_size=10,
    validation_split=0.2,
    verbose=1, 
    shuffle=False,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10)]
)


#It includes two LSTM layers with tanh activation function, and L2 regularization is applied to these layers.
#The model includes a dense layer with a linear activation function, which is suitable for regression problems.

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


In [23]:
model.evaluate(X_test, y_test)



0.18471992015838623

In [24]:
# assuming I have a trained LSTM model called 'model' and the MinMaxScaler is called 'scaler':

# Get the most recent 10 days data
recent_data = X_test[-1].reshape(1, X_train.shape[1], X_train.shape[2])

# Use the model to predict the normalized 'btc_close' price
normalized_prediction = model.predict(recent_data)

# Reshape the normalized prediction to match the shape the scaler expects
normalized_prediction = normalized_prediction.reshape(-1, 1)

# Use the scaler to denormalize the predicted 'btc_close' price
predicted_price = btc_close_scaler.inverse_transform(normalized_prediction)

print("The forecasted 'btc_close' price for 5/25/2023 is: ", predicted_price[0,0])

The forecasted 'btc_close' price for 5/25/2023 is:  9399.054


In [25]:
# Predict the 'btc_close' prices for training and testing data
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Remember to inverse transform the predictions because the model's output is scaled
y_train_pred = btc_close_scaler.inverse_transform(y_train_pred)
y_test_pred = btc_close_scaler.inverse_transform(y_test_pred)

# Also inverse transform the actual 'btc_close' prices for the same reason
y_train_actual = btc_close_scaler.inverse_transform(y_train)
y_test_actual = btc_close_scaler.inverse_transform(y_test)


# Create traces
trace0 = go.Scatter(
    x = np.arange(len(y_train)),
    y = y_train_actual.flatten(),
    mode = 'lines',
    name = 'Train Actual'
)

trace1 = go.Scatter(
    x = np.arange(len(y_train)),
    y = y_train_pred.flatten(),
    mode = 'lines',
    name = 'Train Predicted'
)

trace2 = go.Scatter(
    x = np.arange(len(y_train), len(y_train) + len(y_test)),
    y = y_test_actual.flatten(),
    mode = 'lines',
    name = 'Test Actual'
)

trace3 = go.Scatter(
    x = np.arange(len(y_train), len(y_train) + len(y_test)),
    y = y_test_pred.flatten(),
    mode = 'lines',
    name = 'Test Predicted'
)

data = [trace0, trace1, trace2, trace3]

# Plot and edit the layout
layout = go.Layout(title = "Predicted vs Actual 'btc_close' prices",
                   xaxis = {'title' : "Date Index"},
                   yaxis = {'title' : "Price"})
fig = go.Figure(data=data, layout=layout)

fig.show()



In [None]:
# Also, note that this process assumes that the other features in your data remain constant or follow a known pattern. If there are other time-varying features in your data that the model is using to make predictions, and you don't have values for these features in the future, this could pose a problem. For example, if one of your features is "ndaq_close", and your model is using that to predict 'btc_close', you'd need to have a way to forecast or make reasonable assumptions about what "ndaq_close" will be on 5/24/2023 and onwards.

#Please note that this prediction will be more accurate if your other 80 features for the prediction day are also accurate. The prediction is not only based on the past 'btc_close' values but also on the past values of the other 80 features. If the other features for the prediction day are unknown or are just placeholders, the prediction accuracy might be affected.

# This all basically means I need to have lag features/prices/indicators and shit.

                                                            MISC BULLSHIT/EDA/Tuning

In [None]:
# Having a solid pipeline so you can change models/features/etc. is really important.

In [None]:
discrete_features = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1]
# btc_open, btc_high, btc_low, btc_volume, days, day_of_week, day_of_month, month, year, weekend, ndaq_close, dji_close, dxy_close, sp500_close, dafph, dunh,
# block_rewards, total_bitcoins, new_moon, full_moon, m2_lead, btc_price, avg_transaction_fee_usd, btc_google_trends

def make_mi_scores(X, y, discrete_features):
    mi_scores = mutual_info_regression(X, y, discrete_features=discrete_features, random_state=42)
    mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
    mi_scores = mi_scores.sort_values(ascending=False)
    return mi_scores

def plot_mi_scores(scores):
    scores = scores.sort_values(ascending=True)
    width = np.arange(len(scores))
    ticks = list(scores.index)
    plt.barh(width, scores)
    plt.yticks(width, ticks)
    plt.title("Mutual Information Scores")

mi_scores = make_mi_scores(X, y, discrete_features)
plot_mi_scores(mi_scores)

In [None]:

fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=X['@@@@@@@@@@@@@@@FILL THIS HERE@@@@@@@@@@@'], y=X['btc_close'], mode='lines', name='Open'))
fig1.update_yaxes(type="log", dtick='D10')
fig1.update_layout(title='Bitcoin Price', xaxis_title='Days', yaxis_title='Close USD')
fig1.show()

fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=X['@@@@@@@@@@@@@@@FILL THIS HERE@@@@@@@@@@@'], y=X['btc_volume'], mode='lines', name='Volume'))
fig2.update_yaxes(type="log", dtick='D10')
fig2.update_layout(title='Bitcoin Vol', xaxis_title='Days', yaxis_title='Amount')
fig2.show()

In [None]:
correlation_values = X.corr(method='spearman').values
feature_names = X.columns.values

data = [
    go.Heatmap(
        z=correlation_values,
        x=feature_names,
        y=feature_names,
        colorscale='Viridis',
        reversescale=False,
        text=correlation_values,
        opacity=1.0
    )
]

layout = go.Layout(
    title='Spearman Correlation of Features',
    xaxis=dict(ticks='', nticks=100),
    yaxis=dict(ticks=''),
    width=900,
    height=700
)

fig = go.Figure(data=data, layout=layout)
fig.show()

In [None]:
# Tools/Resources used:

# ChatGPT-3.5 & ChatGPT-4
# https://www.blockchain.com/explorer/charts (no sentiment)
# https://www.lookintobitcoin.com/ (no sentiment)
# https://studio.glassnode.com/home/charts (no sentiment)
# https://data.bitcoinity.org/markets/volume/30d?c=e&t=b (Exchanges data and some basics)
# https://charts.woobull.com/ (interesting indicators)
# https://bitinfocharts.com/comparison/bitcoin-tweets.html (has tweets and google trends but can't download)
# https://charts.coinmetrics.io/ (price only?)
# https://blockchair.com/bitcoin/charts (no sentiment)
# https://fred.stlouisfed.org/
# https://aa.usno.navy.mil/data/MoonPhases
# https://www.epochconverter.com/batch
# https://github.com/cjhutto/vaderSentiment (pip install vaderSentiment)
# https://arxiv.org/ftp/arxiv/papers/2303/2303.09397.pdf (Sentiment project with twitter)
