## Ensure stationarity of explanatory variables and merge with targets

In [1]:
import numpy as np
import pandas as pd

from functions import unit_root_testing, HeskedTesting

In [2]:
btc_data = pd.read_parquet('btc_numeric_data.parquet.gzip')
eth_data = pd.read_parquet('eth_numeric_data.parquet.gzip')

### Check stationarity

#### Perform heteroskedasticity tests

In [3]:
HeskedTesting.run_all_tests(btc_data, conf=0.01, tabsize=58)

Results of White, Breusch-Pagan and Goldfeld-Quandt tests by column (p-values):

btc_total_volume --                                        White: [31m0.0000[0m,                                  Breusch-Pagan: [31m0.0000[0m,                           Goldfeld-Quandt: [31m0.0000[0m
btc_twitter_followers --                                   White: [31m0.0000[0m,                                  Breusch-Pagan: [31m0.0000[0m,                           Goldfeld-Quandt: [31m0.0000[0m
btc_reddit_subscribers --                                  White: [31m0.0000[0m,                                  Breusch-Pagan: [31m0.0000[0m,                           Goldfeld-Quandt: [31m0.0000[0m
btc_reddit_accounts_active_48h --                          White: [31m0.0000[0m,                                  Breusch-Pagan: [32m0.2672[0m,                           Goldfeld-Quandt: [31m0.0000[0m
btc_forks --                                               White: [31m0.0000[0m,         

In [4]:
HeskedTesting.run_all_tests(eth_data, conf=0.01, tabsize=45)

Results of White, Breusch-Pagan and Goldfeld-Quandt tests by column (p-values):

eth_total_volume --                           White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_twitter_followers --                      White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_reddit_subscribers --                     White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_reddit_accounts_active_48h --             White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_forks --                                  White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_stars --                                  White: [31m0.0000[0m,          

#### Perform unit root tests

In [5]:
unit_root_testing(btc_data, conf=0.01, tabsize=58)

Results of ADF, PP and KPSS tests by column (p-values):

btc_total_volume --                                        ADF: [31m0.3517[0m,                                    PP: [32m0.0000[0m,                                      KPSS: [31m0.0001[0m
btc_twitter_followers --                                   ADF: [31m0.9971[0m,                                    PP: [31m0.9963[0m,                                      KPSS: [31m0.0001[0m
btc_reddit_subscribers --                                  ADF: [31m0.9990[0m,                                    PP: [31m1.0000[0m,                                      KPSS: [31m0.0001[0m
btc_reddit_accounts_active_48h --                          ADF: [31m0.2424[0m,                                    PP: [31m0.0107[0m,                                      KPSS: [31m0.0001[0m
btc_forks --                                               ADF: [31m0.9188[0m,                                    PP: [31m0.9859[0m,                       

  a = op(a[slice1], a[slice2])
  a = op(a[slice1], a[slice2])


btc_indicator_BBM --                                       ADF: [31m0.4233[0m,                                    PP: [31m0.6312[0m,                                      KPSS: [31m0.0001[0m
btc_indicator_BBW --                                       ADF: [32m0.0000[0m,                                    PP: [32m0.0000[0m,                                      KPSS: [31m0.0066[0m
btc_indicator_DCM --                                       ADF: [31m0.4593[0m,                                    PP: [31m0.5993[0m,                                      KPSS: [31m0.0001[0m
btc_indicator_DCW --                                       ADF: [32m0.0000[0m,                                    PP: [32m0.0000[0m,                                      KPSS: [31m0.0039[0m
btc_indicator_KCM --                                       ADF: [31m0.3718[0m,                                    PP: [31m0.6315[0m,                                      KPSS: [31m0.0001[0m
btc_indicator_KCW --

In [6]:
unit_root_testing(eth_data, conf=0.01, tabsize=45)

Results of ADF, PP and KPSS tests by column (p-values):



eth_total_volume --                           ADF: [31m0.0588[0m,                       PP: [32m0.0000[0m,                         KPSS: [31m0.0001[0m
eth_twitter_followers --                      ADF: [31m0.9882[0m,                       PP: [31m1.0000[0m,                         KPSS: [31m0.0001[0m
eth_reddit_subscribers --                     ADF: [31m0.9968[0m,                       PP: [31m1.0000[0m,                         KPSS: [31m0.0001[0m
eth_reddit_accounts_active_48h --             ADF: [31m0.0206[0m,                       PP: [32m0.0015[0m,                         KPSS: [31m0.0045[0m
eth_forks --                                  ADF: [31m0.9702[0m,                       PP: [31m0.9986[0m,                         KPSS: [31m0.0001[0m
eth_stars --                                  ADF: [31m0.8805[0m,                       PP: [31m0.9731[0m,                         KPSS: [31m0.0001[0m
eth_subscribers --                            ADF: [31m0.

### Log difference non-stationary variables

In [7]:
btc_gtrends_vars = [
    'gtrends_bitcoin_relative_change',
    'gtrends_cryptocurrency_relative_change',
    'gtrends_blockchain_relative_change',
    'gtrends_investing_relative_change',
]

btc_stationary_vars = [
    'btc_indicator_PSAR_down',
    'btc_indicator_PSAR_up',
]

eth_gtrends_vars = [
    'gtrends_ethereum_relative_change',
    'gtrends_cryptocurrency_relative_change',
    'gtrends_blockchain_relative_change',
    'gtrends_investing_relative_change',
]

eth_stationary_vars = [
    'eth_indicator_PSAR_down',
    'eth_indicator_PSAR_up',
]

In [8]:
def log_difference_dataframe(df: pd.DataFrame, gtrends_vars: list, stationary_vars: list):
    ''' Returns dataframe where all non-stationary variables are differenced once. '''
    
    # create empty dataframe
    df_differenced = pd.DataFrame(index=df.index)

    # add first order log difference of all non-stationary variables suffixed with '_d'
    for column in [i for i in list(df.columns) if (i not in gtrends_vars and i not in stationary_vars)]:
        with np.errstate(divide='ignore', invalid='ignore'):
            df_differenced[column + '_d'] = np.diff(np.log(df[column] + 0.01), prepend=float('nan'))

    # add log of Google Trends variables, since they're already represented by a relative difference
    for column in gtrends_vars:
        with np.errstate(divide='ignore', invalid='ignore'):
            df_differenced[column + '_d'] = np.log(1 + (df[column] / 100))
            
    # add variables that are already stationary in unchanged form
    for column in stationary_vars:
        df_differenced[column] = df[column]

    return df_differenced

In [None]:
btc_differenced_data = log_difference_dataframe(btc_data, btc_gtrends_vars, btc_stationary_vars)
eth_differenced_data = log_difference_dataframe(eth_data, eth_gtrends_vars, eth_stationary_vars)

### Re-check stationarity

#### Perform heteroskedasticity tests

In [10]:
HeskedTesting.run_all_tests(btc_differenced_data, conf=0.01, tabsize=60)

Results of White, Breusch-Pagan and Goldfeld-Quandt tests by column (p-values):

btc_total_volume_d --                                        White: [31m0.0000[0m,                                    Breusch-Pagan: [31m0.0000[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_twitter_followers_d --                                   White: [32m0.6500[0m,                                    Breusch-Pagan: [32m0.4509[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_reddit_subscribers_d --                                  White: [32m0.0220[0m,                                    Breusch-Pagan: [32m0.9727[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_reddit_accounts_active_48h_d --                          White: [32m0.3457[0m,                                    Breusch-Pagan: [32m0.4697[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_forks_d --                                               White:

In [11]:
HeskedTesting.run_all_tests(eth_differenced_data, conf=0.01, tabsize=45)

Results of White, Breusch-Pagan and Goldfeld-Quandt tests by column (p-values):

eth_total_volume_d --                         White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_twitter_followers_d --                    White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_reddit_subscribers_d --                   White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_reddit_accounts_active_48h_d --           White: [31m0.0000[0m,                     Breusch-Pagan: [32m0.5520[0m,              Goldfeld-Quandt: [31m0.0009[0m
eth_forks_d --                                White: [32m0.1682[0m,                     Breusch-Pagan: [32m0.1442[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_stars_d --                                White: [32m0.1748[0m,          

#### Perform unit root tests

In [12]:
unit_root_testing(btc_differenced_data, conf=0.01, tabsize=60)

Results of ADF, PP and KPSS tests by column (p-values):



btc_total_volume_d --                                        ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.5685[0m
btc_twitter_followers_d --                                   ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.4588[0m
btc_reddit_subscribers_d --                                  ADF: [32m0.0002[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.0902[0m
btc_reddit_accounts_active_48h_d --                          ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.7130[0m
btc_forks_d --                                               ADF: [31m0.0112[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [31m0

  return self.model.endog - self.model.predict(


btc_indicator_BBW_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.9979[0m
btc_indicator_DCM_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.2527[0m
btc_indicator_DCW_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.9992[0m
btc_indicator_KCM_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.2183[0m
btc_indicator_KCW_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0

In [13]:
unit_root_testing(eth_differenced_data, conf=0.01, tabsize=45)

Results of ADF, PP and KPSS tests by column (p-values):



eth_total_volume_d --                         ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [32m0.1728[0m
eth_twitter_followers_d --                    ADF: [32m0.0061[0m,                       PP: [32m0.0000[0m,                         KPSS: [31m0.0045[0m
eth_reddit_subscribers_d --                   ADF: [32m0.0004[0m,                       PP: [32m0.0000[0m,                         KPSS: [31m0.0001[0m
eth_reddit_accounts_active_48h_d --           ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [32m0.3279[0m
eth_forks_d --                                ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [31m0.0001[0m
eth_stars_d --                                ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [31m0.0002[0m
eth_subscribers_d --                          ADF: [32m0.

### Difference non-stationary variables a second time

In [14]:
btc_diff_vars = [
    'btc_twitter_followers_d',
    'btc_reddit_subscribers_d',
    'btc_forks_d',
    'btc_stars_d',
    'btc_subscribers_d',
    'btc_total_issues_d',
    'btc_closed_issues_d',
    'btc_pull_requests_merged_d',
    'btc_pull_request_contributors_d',
    'btc_zero_balance_addresses_all_time_d',
    'btc_unique_addresses_all_time_d',
    'btc_current_supply_d',
]

eth_diff_vars = [
    'eth_twitter_followers_d',
    'eth_reddit_subscribers_d',
    'eth_forks_d',
    'eth_stars_d',
    'eth_subscribers_d',
    'eth_total_issues_d',
    'eth_closed_issues_d',
    'eth_pull_requests_merged_d',
    'eth_pull_request_contributors_d',
    'eth_zero_balance_addresses_all_time_d',
    'eth_unique_addresses_all_time_d',
    'eth_current_supply_d',
    'eth_staking_rate_d',
]

In [15]:
def second_order_difference_dataframe(df: pd.DataFrame, diff_vars: list):
    ''' Returns dataframe where all non-stationary variables are differenced once. '''
    
    # create empty dataframe
    df_differenced = pd.DataFrame(index=df.index)

    # add second order difference of non-stationary variables suffixed with '_d2'
    for column in diff_vars:
        df_differenced[column + '2'] = np.diff(df[column], prepend=float('nan'))

    # add stationary variables
    for column in [i for i in list(df.columns) if (i not in diff_vars)]:
        df_differenced[column] = df[column]

    return df_differenced

In [None]:
btc_differenced_data = second_order_difference_dataframe(btc_differenced_data, btc_diff_vars)
eth_differenced_data = second_order_difference_dataframe(eth_differenced_data, eth_diff_vars)

### Re-re-check stationarity

#### Perform heteroskedasticity tests

In [17]:
HeskedTesting.run_all_tests(btc_differenced_data, conf=0.01, tabsize=60)

Results of White, Breusch-Pagan and Goldfeld-Quandt tests by column (p-values):

btc_twitter_followers_d2 --                                  White: [32m0.4204[0m,                                    Breusch-Pagan: [32m0.2853[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_reddit_subscribers_d2 --                                 White: [32m0.1207[0m,                                    Breusch-Pagan: [32m0.1435[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_forks_d2 --                                              White: [32m0.0419[0m,                                    Breusch-Pagan: [32m0.0120[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_stars_d2 --                                              White: [31m0.0000[0m,                                    Breusch-Pagan: [31m0.0026[0m,                             Goldfeld-Quandt: [31m0.0000[0m
btc_subscribers_d2 --                                        White:

In [18]:
HeskedTesting.run_all_tests(eth_differenced_data, conf=0.01, tabsize=45)

Results of White, Breusch-Pagan and Goldfeld-Quandt tests by column (p-values):

eth_twitter_followers_d2 --                   White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_reddit_subscribers_d2 --                  White: [31m0.0000[0m,                     Breusch-Pagan: [31m0.0000[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_forks_d2 --                               White: [32m0.0292[0m,                     Breusch-Pagan: [32m0.0400[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_stars_d2 --                               White: [32m0.0312[0m,                     Breusch-Pagan: [32m0.0420[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_subscribers_d2 --                         White: [32m0.0223[0m,                     Breusch-Pagan: [32m0.0307[0m,              Goldfeld-Quandt: [31m0.0000[0m
eth_total_issues_d2 --                        White: [31m0.0017[0m,          

#### Perform unit root tests

In [19]:
unit_root_testing(btc_differenced_data, conf=0.01, tabsize=60)

Results of ADF, PP and KPSS tests by column (p-values):



btc_twitter_followers_d2 --                                  ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.5297[0m
btc_reddit_subscribers_d2 --                                 ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.9852[0m
btc_forks_d2 --                                              ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.1632[0m
btc_stars_d2 --                                              ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [36mInfeasibleTestException[0m
btc_subscribers_d2 --                                        ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                   

  return self.model.endog - self.model.predict(


btc_indicator_OBV_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.0292[0m
btc_indicator_VPT_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.7217[0m
btc_indicator_VWAP_d --                                      ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.4124[0m
btc_indicator_BBM_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0.2179[0m
btc_indicator_BBW_d --                                       ADF: [32m0.0000[0m,                                      PP: [32m0.0000[0m,                                        KPSS: [32m0

In [20]:
unit_root_testing(eth_differenced_data, conf=0.01, tabsize=45)

Results of ADF, PP and KPSS tests by column (p-values):

eth_twitter_followers_d2 --                   ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [32m0.5120[0m
eth_reddit_subscribers_d2 --                  ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [32m0.9834[0m
eth_forks_d2 --                               ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [32m0.6356[0m
eth_stars_d2 --                               ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [32m0.4898[0m
eth_subscribers_d2 --                         ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: [36mInfeasibleTestException[0m
eth_total_issues_d2 --                        ADF: [32m0.0000[0m,                       PP: [32m0.0000[0m,                         KPSS: 

### Remove specific outliers

In [21]:
btc_differenced_data.loc[1622851200, 'btc_twitter_followers_d2'] = float('nan')
btc_differenced_data.loc[1525392000, 'btc_twitter_followers_d2'] = float('nan')
btc_differenced_data.loc[1435276800, 'btc_total_issues_d2'] = float('nan')
btc_differenced_data.loc[1435190400, 'btc_total_issues_d2'] = float('nan')
btc_differenced_data.loc[1435363200, 'btc_total_issues_d2'] = float('nan')
btc_differenced_data.loc[1435276800, 'btc_closed_issues_d2'] = float('nan')
btc_differenced_data.loc[1435190400, 'btc_closed_issues_d2'] = float('nan')
btc_differenced_data.loc[1435363200, 'btc_closed_issues_d2'] = float('nan')
btc_differenced_data.loc[1533686400, 'btc_additions_d'] = float('nan')
btc_differenced_data.loc[1533686400, 'btc_deletions_d'] = float('nan')
btc_differenced_data.loc[1533859200, 'btc_additions_d'] = float('nan')
btc_differenced_data.loc[1533859200, 'btc_deletions_d'] = float('nan')
btc_differenced_data.loc[1438905600, 'btc_ETH_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1438905600, 'btc_ETH_volumeto_d'] = float('nan')
btc_differenced_data.loc[1315180800, 'btc_EUR_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1315180800, 'btc_EUR_volumeto_d'] = float('nan')
btc_differenced_data.loc[1515715200, 'btc_exchange_Kraken_volumeto_d'] = float('nan')
btc_differenced_data.loc[1515715200, 'btc_exchange_Kraken_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1515715200, 'btc_exchange_Kraken_volumetotal_d'] = float('nan')
btc_differenced_data.loc[1515801600, 'btc_exchange_Kraken_volumeto_d'] = float('nan')
btc_differenced_data.loc[1515801600, 'btc_exchange_Kraken_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1515801600, 'btc_exchange_Kraken_volumetotal_d'] = float('nan')
btc_differenced_data.loc[1464307200, 'btc_exchange_Coinbase_volumeto_d'] = float('nan')
btc_differenced_data.loc[1647734400, 'btc_exchange_BTSE_volumeto_d'] = float('nan')
btc_differenced_data.loc[1647734400, 'btc_exchange_BTSE_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1647734400, 'btc_exchange_BTSE_volumetotal_d'] = float('nan')
btc_differenced_data.loc[1518048000, 'btc_exchange_Binance_volumeto_d'] = float('nan')
btc_differenced_data.loc[1518048000, 'btc_exchange_Binance_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1518048000, 'btc_exchange_Binance_volumetotal_d'] = float('nan')
btc_differenced_data.loc[1518134400, 'btc_exchange_Binance_volumeto_d'] = float('nan')
btc_differenced_data.loc[1518134400, 'btc_exchange_Binance_volumefrom_d'] = float('nan')
btc_differenced_data.loc[1518134400, 'btc_exchange_Binance_volumetotal_d'] = float('nan')
btc_differenced_data.loc[1502928000, 'btc_balance_distribution_from_100000.0_totalVolume_d'] = float('nan')
btc_differenced_data.loc[1503014400, 'btc_balance_distribution_from_100000.0_totalVolume_d'] = float('nan')
btc_differenced_data.loc[1502928000, 'btc_balance_distribution_from_100000.0_addressesCount_d'] = float('nan')
btc_differenced_data.loc[1503014400, 'btc_balance_distribution_from_100000.0_addressesCount_d'] = float('nan')

In [22]:
eth_differenced_data.loc[1465516800, 'eth_forks_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_forks_d2'] = float('nan')
eth_differenced_data.loc[1465516800, 'eth_stars_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_stars_d2'] = float('nan')
eth_differenced_data.loc[1465516800, 'eth_subscribers_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_subscribers_d2'] = float('nan')
eth_differenced_data.loc[1465516800, 'eth_total_issues_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_total_issues_d2'] = float('nan')
eth_differenced_data.loc[1465516800, 'eth_closed_issues_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_closed_issues_d2'] = float('nan')
eth_differenced_data.loc[1465516800, 'eth_pull_requests_merged_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_pull_requests_merged_d2'] = float('nan')
eth_differenced_data.loc[1465516800, 'eth_pull_request_contributors_d2'] = float('nan')
eth_differenced_data.loc[1465430400, 'eth_pull_request_contributors_d2'] = float('nan')
eth_differenced_data.loc[1476403200, 'eth_zero_balance_addresses_all_time_d2'] = float('nan')
eth_differenced_data.loc[1476230400, 'eth_zero_balance_addresses_all_time_d2'] = float('nan')
eth_differenced_data.loc[1476403200, 'eth_unique_addresses_all_time_d2'] = float('nan')
eth_differenced_data.loc[1476230400, 'eth_unique_addresses_all_time_d2'] = float('nan')
eth_differenced_data.loc[1665964800, 'eth_current_supply_d2'] = float('nan')
eth_differenced_data.loc[1666051200, 'eth_current_supply_d2'] = float('nan')
eth_differenced_data.loc[1666051200, 'eth_staking_rate_d2'] = float('nan')
eth_differenced_data.loc[1635379200, 'eth_staking_rate_d2'] = float('nan')
eth_differenced_data.loc[1512518400, 'eth_total_volume_d'] = float('nan')
eth_differenced_data.loc[1512604800, 'eth_total_volume_d'] = float('nan')
eth_differenced_data.loc[1498694400, 'eth_exchange_Bitfinex_volumeto_d'] = float('nan')
eth_differenced_data.loc[1511654400, 'eth_exchange_Bitfinex_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1511740800, 'eth_exchange_Bitfinex_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1457913600, 'eth_exchange_Bitfinex_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1511654400, 'eth_exchange_Bitfinex_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1511740800, 'eth_exchange_Bitfinex_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1469577600, 'eth_exchange_Kraken_volumeto_d'] = float('nan')
eth_differenced_data.loc[1515801600, 'eth_exchange_Kraken_volumeto_d'] = float('nan')
eth_differenced_data.loc[1515801600, 'eth_exchange_Kraken_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1515801600, 'eth_exchange_Kraken_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1515715200, 'eth_exchange_Kraken_volumeto_d'] = float('nan')
eth_differenced_data.loc[1515715200, 'eth_exchange_Kraken_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1515715200, 'eth_exchange_Kraken_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1557532800, 'eth_exchange_Coinbase_volumeto_d'] = float('nan')
eth_differenced_data.loc[1678924800, 'eth_exchange_BTSE_volumeto_d'] = float('nan')
eth_differenced_data.loc[1678924800, 'eth_exchange_BTSE_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1678924800, 'eth_exchange_BTSE_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1647734400, 'eth_exchange_BTSE_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1647734400, 'eth_exchange_BTSE_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1501113600, 'eth_exchange_Binance_volumeto_d'] = float('nan')
eth_differenced_data.loc[1518134400, 'eth_exchange_Binance_volumeto_d'] = float('nan')
eth_differenced_data.loc[1518134400, 'eth_exchange_Binance_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1518134400, 'eth_exchange_Binance_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1518048000, 'eth_exchange_Binance_volumeto_d'] = float('nan')
eth_differenced_data.loc[1518048000, 'eth_exchange_Binance_volumefrom_d'] = float('nan')
eth_differenced_data.loc[1518048000, 'eth_exchange_Binance_volumetotal_d'] = float('nan')
eth_differenced_data.loc[1476921600, 'eth_new_addresses_d'] = float('nan')
eth_differenced_data.loc[1476230400, 'eth_new_addresses_d'] = float('nan')
eth_differenced_data.loc[1476921600, 'eth_active_addresses_d'] = float('nan')
eth_differenced_data.loc[1476230400, 'eth_active_addresses_d'] = float('nan')
eth_differenced_data.loc[1479945600, 'eth_active_addresses_d'] = float('nan')
eth_differenced_data.loc[1480550400, 'eth_active_addresses_d'] = float('nan')

### Limit timeframe to where all data is available

In [23]:
btc_differenced_data = btc_differenced_data.loc[1314662400:1678752000]
eth_differenced_data = eth_differenced_data.loc[1445472000:1678838400]

### Save to parquet

In [24]:
btc_differenced_data.to_parquet('btc_numeric_stationary_data.parquet.gzip',
                                compression='gzip')
eth_differenced_data.to_parquet('eth_numeric_stationary_data.parquet.gzip',
                                compression='gzip')