In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

# Supress warnings
import warnings
warnings.filterwarnings('ignore')

# Scikit Learn
import sklearn
sklearn.set_config(display='diagram')
from sklearn.preprocessing import StandardScaler, PowerTransformer, OneHotEncoder, PolynomialFeatures
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet  
from sklearn.metrics import mean_squared_error
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.feature_selection import SelectKBest, f_regression

sns.set_style()
pd.set_option('display.max_columns', None)
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (16, 9)

from IPython import get_ipython
ipython = get_ipython()

if 'ipython' in globals():
    ipython.magic('matplotlib inline')

In [24]:
df = pd.read_csv('data/combined.csv', index_col = 't')

In [25]:
df.head()

Unnamed: 0_level_0,market_mvrv_more_155,addresses_accumulation_balance,addresses_new_non_zero_count,indicators_sopr_account_based,indicators_sopr_adjusted,transactions_rate,market_mvrv,addresses_accumulation_count,entities_receiving_count,market_marketcap_usd,indicators_nvts,addresses_min_1k_count,indicators_puell_multiple,transactions_transfers_volume_entity_adjusted_sum,entities_net_growth_count,indicators_sopr_more_155,market_price_drawdown_relative,entities_min_1k_count,market_price_realized_usd,market_mvrv_less_155,indicators_net_unrealized_profit_loss_account_based,transactions_transfers_volume_adjusted_sum,indicators_hodled_lost_coins,indicators_sopr,addresses_profit_relative,entities_new_count,market_mvrv_z_score,indicators_mvrv_account_based,transactions_size_sum,entities_profit_relative,transactions_transfers_volume_miners_net,indicators_nupl_more_155_account_based,indicators_liveliness,indicators_liveliness_account_based,transactions_entity_adjusted_count,transactions_count,addresses_min_10k_count,indicators_reserve_risk,indicators_realized_profit,addresses_count,indicators_nupl_more_155,addresses_active_count,market_marketcap_realized_usd,market_price_usd_close,addresses_non_zero_count,indicators_nvt,entities_active_count,indicators_net_unrealized_profit_loss,indicators_sopr_less_155
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1
2010-01-01,,70018.15,134.0,,,2.3e-05,,8.0,2.0,,,9.0,,6700.0,48.0,,,92.0,0.04951,,,100.0,1616534.0,,,50.0,,,316.0,,6500.0,,0.00896,0.00914,134.0,2.0,4.0,,,32745.0,,4.0,80758.2365,,29959.0,,4.0,,
2010-01-02,,70018.15,126.0,,,0.0,,8.0,0.0,,,9.0,,6300.0,37.0,,,96.0,0.04951,,,0.0,1622842.0,,,37.0,,,,,6300.0,,0.008891,0.009069,126.0,0.0,4.0,,,32871.0,,0.0,81067.674,,30085.0,,0.0,,
2010-01-03,,70018.15,186.0,,,0.0,,8.0,0.0,,,9.0,,9300.0,61.0,,,97.0,0.04951,,,0.0,1632199.0,,,61.0,,,,,9300.0,,0.008806,0.008983,186.0,0.0,4.0,,,33057.0,,0.0,81528.117,,30271.0,,0.0,,
2010-01-04,,90722.5,183.0,,,2.3e-05,,9.0,3.0,,,9.0,,29854.35,57.0,,,98.0,0.04951,,,20804.35,1639507.0,,,59.0,,,9649.0,,5050.0,,0.00981,0.009984,183.0,2.0,4.0,,,33240.0,,85.0,81976.1825,,30372.0,,5.0,,
2010-01-05,,90722.5,198.0,,,0.0,,9.0,0.0,,,9.0,,9900.0,62.0,,,98.0,0.04951,,,0.0,1647175.0,,,62.0,,,,,9900.0,,0.011092,0.011264,198.0,0.0,4.0,,,33438.0,,0.0,82466.3315,,30570.0,,0.0,,


In [42]:
((df.isnull().sum())/len(df)*100).sort_values(ascending = False)

market_mvrv_more_155                                   0.0
addresses_accumulation_balance                         0.0
addresses_new_non_zero_count                           0.0
indicators_sopr_account_based                          0.0
indicators_sopr_adjusted                               0.0
transactions_rate                                      0.0
market_mvrv                                            0.0
addresses_accumulation_count                           0.0
entities_receiving_count                               0.0
market_marketcap_usd                                   0.0
indicators_nvts                                        0.0
addresses_min_1k_count                                 0.0
indicators_puell_multiple                              0.0
transactions_transfers_volume_entity_adjusted_sum      0.0
entities_net_growth_count                              0.0
indicators_sopr_more_155                               0.0
market_price_drawdown_relative                         0

In [27]:
# df[df['indicators_sopr_more_155'].isnull() == False]

Unnamed: 0_level_0,market_mvrv_more_155,addresses_accumulation_balance,addresses_new_non_zero_count,indicators_sopr_account_based,indicators_sopr_adjusted,transactions_rate,market_mvrv,addresses_accumulation_count,entities_receiving_count,market_marketcap_usd,indicators_nvts,addresses_min_1k_count,indicators_puell_multiple,transactions_transfers_volume_entity_adjusted_sum,entities_net_growth_count,indicators_sopr_more_155,market_price_drawdown_relative,entities_min_1k_count,market_price_realized_usd,market_mvrv_less_155,indicators_net_unrealized_profit_loss_account_based,transactions_transfers_volume_adjusted_sum,indicators_hodled_lost_coins,indicators_sopr,addresses_profit_relative,entities_new_count,market_mvrv_z_score,indicators_mvrv_account_based,transactions_size_sum,entities_profit_relative,transactions_transfers_volume_miners_net,indicators_nupl_more_155_account_based,indicators_liveliness,indicators_liveliness_account_based,transactions_entity_adjusted_count,transactions_count,addresses_min_10k_count,indicators_reserve_risk,indicators_realized_profit,addresses_count,indicators_nupl_more_155,addresses_active_count,market_marketcap_realized_usd,market_price_usd_close,addresses_non_zero_count,indicators_nvt,entities_active_count,indicators_net_unrealized_profit_loss,indicators_sopr_less_155
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1
2010-12-23,3.231542,4.585587e+05,681.0,1.184698,,0.004329,2.290016,882.0,257.0,1.233581e+06,69.476566,346.0,,17729.700000,114.0,2.744538,-0.492342,522.0,0.108694,1.394285,0.556128,62713.540000,4.057768e+06,1.025355,0.939098,211.0,2.065642,2.257837,127731.0,0.945200,5061.980000,0.799337,0.181225,0.182175,506.0,374.0,57.0,0.014541,3.974164e+02,170925.0,0.690550,784.0,5.386780e+05,0.248753,64406.0,79.024402,324.0,0.559091,1.166872
2010-12-24,3.316539,4.608220e+05,615.0,0.967090,,0.004433,2.294736,904.0,236.0,1.240200e+06,69.275454,347.0,,19783.927481,69.0,2.744538,-0.493878,525.0,0.108890,1.385889,0.553989,60406.229187,4.065808e+06,0.993452,0.934750,160.0,2.068072,2.259420,116313.0,0.942328,4552.385992,0.798560,0.180826,0.181787,517.0,383.0,57.0,0.014409,1.050780e+02,171540.0,0.698481,707.0,5.404544e+05,0.248000,64589.0,82.165367,304.0,0.556993,0.968438
2010-12-25,3.447517,4.596881e+05,819.0,1.100668,,0.005394,2.283546,922.0,248.0,1.240606e+06,68.168427,348.0,,34114.017431,96.0,2.744538,-0.490000,526.0,0.109275,1.388628,0.556029,112597.040000,4.073744e+06,1.022911,0.937665,184.0,2.047710,2.248354,131257.0,0.945610,3204.827431,0.799899,0.180613,0.181586,624.0,466.0,58.0,0.014203,4.285243e+02,172359.0,0.709936,915.0,5.432803e+05,0.249900,64882.0,44.154802,316.0,0.558914,1.068515
2010-12-26,4.069384,4.581898e+05,720.0,1.151690,,0.005081,2.337016,945.0,260.0,1.275171e+06,69.395511,350.0,,24241.309038,79.0,2.744538,-0.459184,528.0,0.109589,1.455695,0.580054,70038.780164,4.081981e+06,1.032988,0.968602,169.0,2.129715,2.300469,134987.0,0.967997,2865.188874,0.811287,0.180152,0.181133,562.0,439.0,58.0,0.014386,4.337047e+02,173079.0,0.754263,885.0,5.456406e+05,0.265000,65091.0,71.088474,326.0,0.582816,1.111867
2010-12-27,4.131722,4.620030e+05,607.0,1.004255,,0.004167,2.334431,953.0,254.0,1.278616e+06,68.922773,353.0,,56188.234962,88.0,2.744538,-0.459184,523.0,0.109824,1.449247,0.579092,81517.658317,4.087583e+06,1.012033,0.967560,187.0,2.119549,2.313249,147845.0,0.967178,-9186.098555,0.811155,0.180393,0.181383,500.0,360.0,59.0,0.014215,7.554248e+02,173686.0,0.757970,985.0,5.477206e+05,0.265000,65003.0,61.179996,325.0,0.581680,1.006392
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-11-01,1.972435,2.339565e+06,390458.0,1.014785,1.029550,3.658067,1.605087,456787.0,179362.0,1.655956e+11,32.532021,2146.0,1.173380,304698.356350,7137.0,1.408601,-0.532584,1731.0,5723.786509,0.952407,0.366031,690371.906605,7.021816e+06,1.012579,0.766710,102675.0,1.113265,1.567522,161291108.0,0.786878,-209.692805,0.527332,0.610433,0.610556,251395.0,316057.0,106.0,0.002487,2.063249e+08,574223388.0,0.493013,795073.0,1.031692e+11,9260.417255,27609940.0,26.108609,236941.0,0.381607,0.997222
2019-11-02,1.986508,2.338577e+06,324716.0,1.017062,1.010385,3.290613,1.624258,457269.0,148390.0,1.676497e+11,32.942728,2147.0,1.223350,162664.966332,6241.0,2.169331,-0.529328,1731.0,5725.856679,0.959678,0.371021,411182.420190,7.015116e+06,1.003909,0.774575,83747.0,1.148417,1.586064,132554176.0,0.793231,523.135984,0.530911,0.610840,0.610587,199838.0,284309.0,107.0,0.002512,4.465687e+07,574548104.0,0.496604,630458.0,1.032162e+11,9324.929668,27642006.0,43.840213,194134.0,0.385700,1.002787
2019-11-03,1.967825,2.342505e+06,325031.0,1.007849,0.999692,3.405428,1.616042,457487.0,144228.0,1.668297e+11,33.118700,2147.0,1.252883,163448.145192,3208.0,1.360762,-0.533856,1730.0,5726.271278,0.951208,0.365203,419132.566159,7.017688e+06,0.999817,0.762535,82789.0,1.132852,1.577824,133757984.0,0.784628,220.005211,0.526418,0.610735,0.610503,207699.0,294229.0,107.0,0.002495,2.557810e+07,574873135.0,0.491825,626683.0,1.032335e+11,9235.205295,27667594.0,43.012764,193219.0,0.379741,0.996427
2019-11-04,2.003473,2.343007e+06,382722.0,1.015366,1.008352,3.597766,1.619373,457297.0,189985.0,1.672444e+11,33.464315,2146.0,1.199976,262682.754655,8474.0,1.603278,-0.524818,1728.0,5728.172351,0.968539,0.376605,584369.902107,7.020714e+06,1.003970,0.784500,108610.0,1.138840,1.581482,153737430.0,0.800264,-43.159573,0.534799,0.610603,0.610421,242273.0,310847.0,107.0,0.002497,6.010684e+07,575255857.0,0.500867,743527.0,1.032772e+11,9414.270633,27696503.0,30.853232,246589.0,0.391191,1.003646


In [28]:
df.dropna(inplace = True)
df.shape

(3035, 49)

In [48]:
market_cols = [col for col in df.columns if 'market' in col]
market_cols

['market_mvrv_more_155',
 'market_mvrv',
 'market_marketcap_usd',
 'market_price_drawdown_relative',
 'market_price_realized_usd',
 'market_mvrv_less_155',
 'market_mvrv_z_score',
 'market_marketcap_realized_usd',
 'market_price_usd_close']

In [58]:
indicator_cols = [col for col in df.columns if 'indicators' in col]
indicator_cols

['indicators_sopr_account_based',
 'indicators_sopr_adjusted',
 'indicators_nvts',
 'indicators_puell_multiple',
 'indicators_sopr_more_155',
 'indicators_net_unrealized_profit_loss_account_based',
 'indicators_hodled_lost_coins',
 'indicators_sopr',
 'indicators_mvrv_account_based',
 'indicators_nupl_more_155_account_based',
 'indicators_liveliness',
 'indicators_liveliness_account_based',
 'indicators_reserve_risk',
 'indicators_realized_profit',
 'indicators_nupl_more_155',
 'indicators_nvt',
 'indicators_net_unrealized_profit_loss',
 'indicators_sopr_less_155']

In [78]:
X = df.drop(columns = market_cols + indicator_cols)
y = df['market_price_realized_usd']

In [79]:
X.columns

Index(['addresses_accumulation_balance', 'addresses_new_non_zero_count',
       'transactions_rate', 'addresses_accumulation_count',
       'entities_receiving_count', 'addresses_min_1k_count',
       'transactions_transfers_volume_entity_adjusted_sum',
       'entities_net_growth_count', 'entities_min_1k_count',
       'transactions_transfers_volume_adjusted_sum',
       'addresses_profit_relative', 'entities_new_count',
       'transactions_size_sum', 'entities_profit_relative',
       'transactions_transfers_volume_miners_net',
       'transactions_entity_adjusted_count', 'transactions_count',
       'addresses_min_10k_count', 'addresses_count', 'addresses_active_count',
       'addresses_non_zero_count', 'entities_active_count'],
      dtype='object')

In [80]:
X.shape

(3035, 22)

In [81]:
y.shape

(3035,)

In [82]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [83]:
scaler = StandardScaler()

In [84]:
lr = LinearRegression()

In [85]:
lr.fit(X_train, y_train)

In [86]:
lr.score(X_train, y_train)

0.986703411537047

In [87]:
lr.score(X_test, y_test)

0.9860311443816832

In [None]:
# Time-Series
# SARIMA, SARIMAX // ARIMA, ARIMAX

# Recurrent Neural Net

# Bayesian Model (facebook profit library)

In [None]:
# sktime