In [None]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

In [None]:
import numpy as np
from scipy.stats import ks_2samp
from statsmodels.tsa.stattools import acf, q_stat

#### ADF Test for Stationarity

In [5]:
# Function to perform the ADF test
def adf_test(series, column_name):
    result = adfuller(series.dropna())  # Drop NaN values if any
    print(f"ADF Test for {column_name}:")
    print(f"ADF Statistic: {result[0]}")
    print(f"p-value: {result[1]}")
    print("Critical Values:")
    for key, value in result[4].items():
        print(f"   {key}: {value}")
    if result[1] <= 0.05:
        print("Conclusion: Stationary (Reject H0)\n")
    else:
        print("Conclusion: Non-stationary (Fail to Reject H0)\n")

In [14]:
snp_sentiment_score = "../data/snp_gpt_overall.csv"  
df_snp = pd.read_csv(snp_sentiment_score)

snp_sentiment_columns = ["pos_sentiment", "neg_sentiment", "neutral_sentiment"]

for col in snp_sentiment_columns:
    adf_test(df_snp[col], col)

ADF Test for pos_sentiment:
ADF Statistic: -16.900553910755857
p-value: 1.0284963196761243e-29
Critical Values:
   1%: -3.430894015362794
   5%: -2.8617804268847506
   10%: -2.5668979741857716
Conclusion: Stationary (Reject H0)

ADF Test for neg_sentiment:
ADF Statistic: -19.095872514042448
p-value: 0.0
Critical Values:
   1%: -3.4308938796240076
   5%: -2.8617803669006627
   10%: -2.5668979422565483
Conclusion: Stationary (Reject H0)

ADF Test for neutral_sentiment:
ADF Statistic: -17.254928702116622
p-value: 5.991101260474892e-30
Critical Values:
   1%: -3.4308938796240076
   5%: -2.8617803669006627
   10%: -2.5668979422565483
Conclusion: Stationary (Reject H0)



In [15]:
tesla_sentiment_score = "../data/union_sentiment.csv"  
df_tesla = pd.read_csv(tesla_sentiment_score)

tesla_sentiment_columns = ["mean_neg_preamble_sentiment", "mean_pos_preamble_sentiment", "mean_neutral_preamble_sentiment"]

for col in tesla_sentiment_columns:
    adf_test(df_tesla[col], col)

ADF Test for mean_neg_preamble_sentiment:
ADF Statistic: -7.128487376125445
p-value: 3.568433012879894e-10
Critical Values:
   1%: -3.4331536417276274
   5%: -2.8627785955546137
   10%: -2.5674293588855925
Conclusion: Stationary (Reject H0)

ADF Test for mean_pos_preamble_sentiment:
ADF Statistic: -7.059259647266067
p-value: 5.275614087638958e-10
Critical Values:
   1%: -3.4331524402158027
   5%: -2.862778064998932
   10%: -2.567429076405341
Conclusion: Stationary (Reject H0)

ADF Test for mean_neutral_preamble_sentiment:
ADF Statistic: -7.258340175794562
p-value: 1.7073535587539137e-10
Critical Values:
   1%: -3.4331512397333626
   5%: -2.8627775348975866
   10%: -2.567428794167024
Conclusion: Stationary (Reject H0)



#### Test for IID

In [16]:
# Function to check identical distribution using KS test
def ks_test_identical_distribution(series, column_name):
    n = len(series)
    half_n = n // 2  # Split data into two halves
    sample1, sample2 = series[:half_n], series[half_n:]
    ks_stat, p_value = ks_2samp(sample1, sample2)
    
    print(f"KS Test for identical distribution in {column_name}:")
    print(f"KS Statistic: {ks_stat}")
    print(f"p-value: {p_value}")
    if p_value > 0.05:
        print("Conclusion: Cannot reject identical distribution assumption.\n")
    else:
        print("Conclusion: Data may not be identically distributed.\n")

# Function to check independence using autocorrelation and Ljung-Box test
def independence_test(series, column_name, lags=10):
    acf_values = acf(series.dropna(), nlags=lags)
    lb_stat, lb_p_value = q_stat(acf_values[1:], nobs=len(series))
    
    print(f"Ljung-Box Test for independence in {column_name}:")
    print(f"Test Statistic: {lb_stat[-1]}")
    print(f"p-value: {lb_p_value[-1]}")
    if lb_p_value[-1] > 0.05:
        print("Conclusion: Cannot reject independence assumption.\n")
    else:
        print("Conclusion: Data is likely dependent.\n")

In [17]:
for col in snp_sentiment_columns:
    print("="*50)
    ks_test_identical_distribution(df_snp[col], col)
    independence_test(df_snp[col], col)

KS Test for identical distribution in pos_sentiment:
KS Statistic: 0.025892116182572613
p-value: 0.03521988246928709
Conclusion: Data may not be identically distributed.

Ljung-Box Test for independence in pos_sentiment:
Test Statistic: 46.638073547424426
p-value: 1.0987000782564873e-06
Conclusion: Data is likely dependent.

KS Test for identical distribution in neg_sentiment:
KS Statistic: 0.0595850622406639
p-value: 1.0145835124734815e-09
Conclusion: Data may not be identically distributed.





Ljung-Box Test for independence in neg_sentiment:
Test Statistic: 65.16542919610323
p-value: 3.7684040059789875e-10
Conclusion: Data is likely dependent.

KS Test for identical distribution in neutral_sentiment:
KS Statistic: 0.06456431535269709
p-value: 2.436773108659317e-11
Conclusion: Data may not be identically distributed.

Ljung-Box Test for independence in neutral_sentiment:
Test Statistic: 130.26701572588493
p-value: 4.12059112903116e-23
Conclusion: Data is likely dependent.



In [None]:
for col in tesla_sentiment_columns:
    print("="*50)
    ks_test_identical_distribution(df_tesla[col], col)
    independence_test(df_tesla[col], col)

KS Test for identical distribution in mean_neg_preamble_sentiment:
KS Statistic: 0.2757213093543796
p-value: 3.884699155055812e-40
Conclusion: Data may not be identically distributed.

Ljung-Box Test for independence in mean_neg_preamble_sentiment:
Test Statistic: 39.62574305241431
p-value: 1.9718355013306428e-05
Conclusion: Data is likely dependent.

KS Test for identical distribution in mean_pos_preamble_sentiment:
KS Statistic: 0.24793418726010263
p-value: 1.9634519254659202e-32
Conclusion: Data may not be identically distributed.

Ljung-Box Test for independence in mean_pos_preamble_sentiment:
Test Statistic: 23.577504532620313
p-value: 0.008804368203267748
Conclusion: Data is likely dependent.

KS Test for identical distribution in mean_neutral_preamble_sentiment:
KS Statistic: 0.22479622202096003
p-value: 1.120301496275049e-26
Conclusion: Data may not be identically distributed.

Ljung-Box Test for independence in mean_neutral_preamble_sentiment:
Test Statistic: 69.34591013873639

