In [1]:
# Import libraries
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import sys


In [2]:
# Load the data
data_path = Path("../data/processed/fear_and_greed_history_5min.parquet")
assert data_path.exists(), f"File not found: {data_path}"

df = pl.read_parquet(data_path).drop_nulls()
df = df.with_columns([
    pl.col("open_actual_value").cast(pl.Float64),
    pl.col("close_bitcoin_price_usd").cast(pl.Float64)
])
df = df.filter(~pl.col("open_actual_value").is_null() & ~pl.col("close_bitcoin_price_usd").is_null())
df = df.sort("date")
df.head()


avg_actual_value,close_actual_value,close_bitcoin_price_usd,high_bitcoin_price_usd,interval_end_time,low_bitcoin_price_usd,open_actual_value,open_bitcoin_price_usd,date
f64,f64,f64,i64,str,i64,f64,i64,date
55.71,55.72,103319.0,103319,"""2025-06-20T23:55:00Z""",103295,55.72,103302,2025-06-20
55.73,55.74,103302.0,103302,"""2025-06-20T23:50:00Z""",103284,55.74,103284,2025-06-20
55.75,55.76,103287.0,103287,"""2025-06-20T23:45:00Z""",103272,55.76,103272,2025-06-20
55.73,55.74,103270.0,103276,"""2025-06-20T23:40:00Z""",103243,55.74,103243,2025-06-20
55.73,55.75,103234.0,103234,"""2025-06-20T23:35:00Z""",103196,55.75,103196,2025-06-20


# Fear &amp; Greed Index vs BTC Price Analysis

This notebook loads the processed dataset, fits a linear regression model (using the same logic as the training script), and visualizes the relationship between the fear &amp; greed index, BTC price, and the model's predicted price.

## Fit Linear Regression Model (reuse training logic)

We use the same features as in the training script to fit a model and generate predictions.

In [3]:
# Add src directory to sys.path for train import
src_path = Path().resolve().parent / "python_train" / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

from train import train_model

# Fit model using the reusable function from train.py
feature_cols = [
    "open_actual_value",
    "open_bitcoin_price_usd",
    "high_bitcoin_price_usd",
    "low_bitcoin_price_usd",
    "avg_actual_value",
]

model = train_model(df)
X = df.select(feature_cols).to_numpy()
df = df.with_columns([
    pl.Series("predicted_price", model.predict(X))
])
df.head()


avg_actual_value,close_actual_value,close_bitcoin_price_usd,high_bitcoin_price_usd,interval_end_time,low_bitcoin_price_usd,open_actual_value,open_bitcoin_price_usd,date,predicted_price
f64,f64,f64,i64,str,i64,f64,i64,date,f64
55.71,55.72,103319.0,103319,"""2025-06-20T23:55:00Z""",103295,55.72,103302,2025-06-20,103312.139325
55.73,55.74,103302.0,103302,"""2025-06-20T23:50:00Z""",103284,55.74,103284,2025-06-20,103301.585732
55.75,55.76,103287.0,103287,"""2025-06-20T23:45:00Z""",103272,55.76,103272,2025-06-20,103286.572535
55.73,55.74,103270.0,103276,"""2025-06-20T23:40:00Z""",103243,55.74,103243,2025-06-20,103275.792595
55.73,55.75,103234.0,103234,"""2025-06-20T23:35:00Z""",103196,55.75,103196,2025-06-20,103234.366962


## Visualize: Fear &amp; Greed Index vs BTC Price vs Predicted Price

In [4]:
plt.figure(figsize=(14, 6))
plt.plot(df["timestamp"].to_list(), df["close_bitcoin_price_usd"].to_list(), label="BTC Close Price", color="blue")
plt.plot(df["timestamp"].to_list(), df["predicted_price"].to_list(), label="Predicted Price", color="orange", linestyle="--")
plt.plot(df["timestamp"].to_list(), df["open_actual_value"].to_list(), label="Fear & Greed Index (Open)", color="green", alpha=0.5)
plt.legend()
plt.title("BTC Price, Predicted Price, and Fear & Greed Index Over Time")
plt.xlabel("Timestamp")
plt.ylabel("Value")
plt.tight_layout()
plt.show()


ColumnNotFoundError: "timestamp" not found

<Figure size 1400x600 with 0 Axes>

## Correlation Analysis

Let's check the correlation between the fear &amp; greed index and BTC price.

In [None]:
cor_fg_btc = np.corrcoef(df["open_actual_value"].to_numpy(), df["close_bitcoin_price_usd"].to_numpy())[0, 1]
cor_pred_btc = np.corrcoef(df["predicted_price"].to_numpy(), df["close_bitcoin_price_usd"].to_numpy())[0, 1]
print(f"Correlation (Fear & Greed, BTC Close): {cor_fg_btc:.3f}")
print(f"Correlation (Predicted, BTC Close): {cor_pred_btc:.3f}")
