# Causality Analysis of Crypto Data

## Goal:
* Determine causal relationships between BTC and other major crypto currencies
* Analyze the stability of these relationships over time

In [1]:
import glob
import os
# Import causality analysis tools
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import seaborn as sns

# Add the project root to the python path
try:
    # This works when the script is run directly
    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
except NameError:
    # This works when run in an interactive environment like Jupyter
    # Assumes the notebook is in notebooks/python or notebooks/jupyter
    project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.analysis.granger_causality import GrangerCausalityAnalyzer
from src.analysis.time_varying_granger import (plot_tvgc_results,
                                               rolling_granger_causality,
                                               summarize_tvgc_results)

# Set style for better visualizations
sns.set_theme()
sns.set_palette("husl")
plt.rcParams["figure.figsize"] = [12, 6]

## Data Loading
First, let's load our data files containing the cryptocurrency data.

In [2]:
def load_all_crypto_data(data_dir=os.path.join(project_root, "data", "processed")):
    all_data = {}
    for file in glob.glob(os.path.join(data_dir, "*.parquet")):
        symbol = os.path.basename(file).split("_")[0]
        # Get symbol from filename
        print(f"Loading {file}...")
        df = pq.read_table(file).to_pandas()
        all_data[symbol] = df
    return all_data

 Additional Analysis: Summary Statistics

In [3]:
# Load the data

crypto_data = load_all_crypto_data()

Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/SOLUSDT_1m_2024-01-01_2024-02-01.parquet...
Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/ADAUSDT_1m_2024-01-01_2024-02-01.parquet...
Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/BTCUSDT_1m_2024-01-01_2024-02-01.parquet...
Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/DOGEUSDT_1m_2024-01-01_2024-02-01.parquet...
Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/XRPUSDT_1m_2024-01-01_2024-02-01.parquet...
Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/BNBUSDT_1m_2024-01-01_2024-02-01.parquet...
Loading /home/nfiroo/dev/crypto/crypto_causality/data/processed/ETHUSDT_1m_2024-01-01_2024-02-01.parquet...


In [4]:
# Calculate returns for each crypto
returns_data = {}
for symbol, df in crypto_data.items():
    returns = pd.DataFrame()
    returns["timestamp"] = df["timestamp"]
    returns["returns"] = np.log(df["close"].astype(float)).diff()
    returns_data[symbol] = returns

In [5]:
# Create a combined returns dataframe
combined_returns = pd.DataFrame()
for symbol, returns in returns_data.items():
    combined_returns[symbol] = returns["returns"]
combined_returns.index = list(returns_data.values())[0]["timestamp"]

## Pairwise Granger Causality
We will now perform pairwise Granger causality tests to identify potential causal relationships between the cryptocurrencies.

In [6]:
# Initialize the analyzer
analyzer = GrangerCausalityAnalyzer(combined_returns, max_lags=10)

# Run pairwise causality analysis
pairwise_results = analyzer.run_pairwise_causality()

# Print summary of significant results
print("Pairwise Granger Causality Results (Significant at 5%):")
print(pairwise_results[pairwise_results["significant"]])



Pairwise Granger Causality Results (Significant at 5%):
       cause    effect    min_p_value  optimal_lag  significant
0    SOLUSDT   ADAUSDT   2.020423e-19            5         True
1    ADAUSDT   SOLUSDT   2.606874e-76            8         True
2    SOLUSDT   BTCUSDT   2.982025e-09           10         True
3    BTCUSDT   SOLUSDT   1.550221e-26           10         True
4    SOLUSDT  DOGEUSDT   3.778410e-36           10         True
5   DOGEUSDT   SOLUSDT  1.278802e-193            8         True
6    SOLUSDT   XRPUSDT   7.221960e-63            8         True
7    XRPUSDT   SOLUSDT  1.567474e-222            9         True
8    SOLUSDT   BNBUSDT   1.929234e-16           10         True
9    BNBUSDT   SOLUSDT   2.520596e-77            8         True
10   SOLUSDT   ETHUSDT   1.755186e-10           10         True
11   ETHUSDT   SOLUSDT   1.558438e-42            6         True
12   ADAUSDT   BTCUSDT   4.481836e-17           10         True
13   BTCUSDT   ADAUSDT   2.243102e-18           

In [7]:
# Get summary statistics
summary_stats = analyzer.get_summary_statistics()
print("\nCausality Summary Statistics:")


Causality Summary Statistics:


## Multivariate Granger Causality
Now, let's perform a multivariate Granger causality test to see which variables Granger-cause Bitcoin (BTC) in a multivariate context.

In [8]:
# Run multivariate causality analysis for BTC
target_crypto = "BTCUSDT"
test_stats, coef_pvals, optimal_lag = analyzer.run_multivariate_causality(
    target=target_crypto
)

print(
    f"\nMultivariate Granger Causality for {target_crypto} (Optimal Lag: {optimal_lag}):"
)

# Create a DataFrame for the results
multivariate_results = pd.DataFrame({"P-Value": test_stats})


Multivariate Granger Causality for BTCUSDT (Optimal Lag: 10):


## Time-Varying Granger Causality
Finally, let's analyze the time-varying Granger causality between Bitcoin (BTC) and Ethereum (ETH) to see if the relationship is stable over time.

In [None]:
# Define the pair to test
pair_to_test = ("BTCUSDT", "SOLUSDT")

# Run time-varying Granger causality analysis
tvgc_results = rolling_granger_causality(
    y=combined_returns[pair_to_test[1]],
    x=combined_returns[pair_to_test[0]],
    window_size=500,
)

# Plot the results
plot_tvgc_results({f"{pair_to_test[0]} -> {pair_to_test[1]}": tvgc_results})

# Summarize the results
summary = summarize_tvgc_results(
    {f"{pair_to_test[0]} -> {pair_to_test[1]}": tvgc_results}
)
print("\nTime-Varying Granger Causality Summary:")
print(summary)