In [8]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# Load the Bitcoin data
daa_df = pd.read_csv("data/daa/btc_daa_log_transformed.csv", parse_dates=['datetime'])
tv_df = pd.read_csv("data/tv/btc_tv_log_transformed.csv", parse_dates=['datetime'])
vel_df = pd.read_csv("data/vel/btc_vel_log_transformed.csv", parse_dates=['datetime'])
price_df = pd.read_csv("data/price/btc_price_log_transformed.csv", parse_dates=['datetime'])

# Merge all datasets into a single DataFrame on the 'datetime' column, with suffixes for duplicate column names
df = daa_df.merge(tv_df, on='datetime', suffixes=('_daa', '_tv')) \
           .merge(vel_df, on='datetime', suffixes=('', '_vel')) \
           .merge(price_df[['datetime', 'log_value']], on='datetime', suffixes=('_vel', '_price'))

# Rename columns for easier interpretation
df.rename(columns={
    'log_value_daa': 'Log(DAA)',
    'log_value_tv': 'Log(TV)',
    'log_value_vel': 'Log(VEL)',
    'log_value_price': 'Log(Price)'  # Rename log_value of price to Log(Price)
}, inplace=True)

# List of columns to perform the ADF test
columns_to_test = ['Log(DAA)', 'Log(TV)', 'Log(VEL)', 'Log(Price)']

# Function to interpret the ADF test
def interpret_adf_results(column_name, adf_result):
    adf_stat = adf_result[0]
    p_value = adf_result[1]
    critical_values = adf_result[4]
    print(f"Results for {column_name}:")
    print(f"ADF Statistic: {adf_stat}")
    print(f"p-value: {p_value}")
    print("Critical Values:")
    for key, value in critical_values.items():
        print(f"{key}: {value}")
    
    # Interpretation
    if p_value < 0.05:
        print(f"The series '{column_name}' is stationary (Reject H0 at 5% significance level).")
    else:
        print(f"The series '{column_name}' is non-stationary (Fail to reject H0 at 5% significance level).")
    print("\n")

# Perform the ADF test for each column
print("ADF Test Results for Bitcoin Metrics:\n")
for column in columns_to_test:
    series = df[column].dropna()  # Remove NaN values
    adf_result = adfuller(series)  # Perform ADF test
    interpret_adf_results(column, adf_result)

ADF Test Results for Bitcoin Metrics:

Results for Log(DAA):
ADF Statistic: -3.42579840412938
p-value: 0.01011022200753273
Critical Values:
1%: -3.4349056408696814
5%: -2.863552005375758
10%: -2.5678411776130114
The series 'Log(DAA)' is stationary (Reject H0 at 5% significance level).


Results for Log(TV):
ADF Statistic: -2.6713256742567615
p-value: 0.07913296525561686
Critical Values:
1%: -3.4349024693573584
5%: -2.8635506057382325
10%: -2.5678404322793846
The series 'Log(TV)' is non-stationary (Fail to reject H0 at 5% significance level).


Results for Log(VEL):
ADF Statistic: -3.085872043097689
p-value: 0.027616541356816755
Critical Values:
1%: -3.434880391815318
5%: -2.8635408625359315
10%: -2.5678352438452814
The series 'Log(VEL)' is stationary (Reject H0 at 5% significance level).


Results for Log(Price):
ADF Statistic: -1.7196386133024275
p-value: 0.4210235316297345
Critical Values:
1%: -3.4348647527922824
5%: -2.863533960720434
10%: -2.567831568508802
The series 'Log(Price)' 