In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [1]:
pip install nbformat ipython jupyterlab

Collecting nbformat
  Using cached nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)
Collecting jupyterlab
  Using cached jupyterlab-4.5.1-py3-none-any.whl.metadata (16 kB)
Collecting fastjsonschema>=2.15 (from nbformat)
  Using cached fastjsonschema-2.21.2-py3-none-any.whl.metadata (2.3 kB)
Collecting jsonschema>=2.6 (from nbformat)
  Using cached jsonschema-4.25.1-py3-none-any.whl.metadata (7.6 kB)
Collecting async-lru>=1.0.0 (from jupyterlab)
  Using cached async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting httpx<1,>=0.25.0 (from jupyterlab)
  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab)
  Using cached jupyter_lsp-2.3.0-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab)
  Using cached jupyter_server-2.17.0-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyterlab-server<3,>=2.28.0 (from jupyterlab)
  Using cached jupyterlab_server-2.28.0-py3-none-any.whl.metadata (5.9 kB)

In [3]:
btc_price = pd.read_csv(
    "../data/raw/btc_prices.csv",
    parse_dates = ["date"]
)

nifty_price = pd.read_csv(
    "../data/raw/nifty_prices.csv",
    parse_dates = ["date"]
)

sent = pd.read_csv(
    "../data/processed/text_with_sentiment.csv",
    parse_dates = ["timestamp"]
)

In [4]:
btc_price["date"] = btc_price["date"].dt.tz_localize(None)
nifty_price["date"] = nifty_price["date"].dt.tz_localize(None)
sent["date"] = sent["timestamp"].dt.tz_localize(None).dt.to_period("M").dt.to_timestamp()

In [5]:
btc_price = pd.read_csv(
    "../data/raw/btc_prices.csv",
    parse_dates=["date"]
)

nifty_price = pd.read_csv(
    "../data/raw/nifty_prices.csv",
    parse_dates=["date"]
)

btc_price["date"] = pd.to_datetime(btc_price["date"]).dt.tz_localize(None)
nifty_price["date"] = pd.to_datetime(nifty_price["date"]).dt.tz_localize(None)

btc_price.head(), nifty_price.head()

(        date        open        high         low       close    volume  \
 0 2015-01-02  314.079010  315.838989  313.565002  315.032013   7860650   
 1 2015-01-03  314.846008  315.149994  281.082001  281.082001  33054400   
 2 2015-01-04  281.145996  287.230011  257.612000  264.195007  55629100   
 3 2015-01-05  265.084015  278.341003  265.084015  274.473999  43962800   
 4 2015-01-06  274.610992  287.553009  272.696014  286.188995  23245700   
 
      return  
 0  0.002492  
 1 -0.107767  
 2 -0.060079  
 3  0.038907  
 4  0.042682  ,
         date         open         high          low        close  volume  \
 0 2010-01-05  5277.149902  5288.350098  5242.399902  5277.899902       0   
 1 2010-01-06  5278.149902  5310.850098  5260.049805  5281.799805       0   
 2 2010-01-07  5281.799805  5302.549805  5244.750000  5263.100098       0   
 3 2010-01-08  5264.250000  5276.750000  5234.700195  5244.750000       0   
 4 2010-01-11  5263.799805  5287.200195  5227.799805  5249.399902       

In [6]:
btc_monthly_price = (
    btc_price
    .set_index("date")
    .resample("ME")
    .agg(close_price=("close", "last"))
    .reset_index()
)

nifty_monthly_price = (
    nifty_price
    .set_index("date")
    .resample("ME")
    .agg(close_price=("close", "last"))
    .reset_index()
)

In [7]:
btc_monthly_sent = (
    sent[sent["asset"] == "BTC"]
    .set_index("date")
    .resample("ME")
    .agg(sentiment=("finbert_score", "mean"))
    .reset_index()
)

nifty_monthly_sent = (
    sent[sent["asset"] == "NIFTY"]
    .set_index("date")
    .resample("ME")
    .agg(sentiment=("finbert_score", "mean"))
    .reset_index()
)

In [8]:
btc_plot_df = pd.merge(
    btc_monthly_price,
    btc_monthly_sent,
    on="date",
    how="inner"
)

nifty_plot_df = pd.merge(
    nifty_monthly_price,
    nifty_monthly_sent,
    on="date",
    how="inner"
)

btc_plot_df.head()

Unnamed: 0,date,close_price,sentiment
0,2024-10-31,70215.1875,0.56058
1,2024-11-30,96449.054688,0.28603
2,2024-12-31,93429.203125,0.310285
3,2025-01-31,102405.023438,0.144828


In [9]:
from sklearn.preprocessing import MinMaxScaler

def scale_series(series):
    scaler = MinMaxScaler()
    return scaler.fit_transform(series.values.reshape(-1, 1)).flatten()

In [25]:
import plotly.graph_objects as go

import plotly.graph_objects as go

def plot_price_sentiment_daily(df, title):
    fig = go.Figure()

    # PRICE (left axis)
    fig.add_trace(go.Scatter(
        x=df["date"],
        y=df["price_smooth"],
        name="Price",
        line=dict(color="#2ecc71", width=2),
        fill="tozeroy",
        yaxis="y1"
    ))

    # SENTIMENT (right axis)
    fig.add_trace(go.Scatter(
        x=df["date"],
        y=df["sentiment_smooth"],
        name="Sentiment",
        line=dict(color="#f1c40f", width=2, dash="dash"),
        yaxis="y2"
    ))

    fig.update_layout(
        title=title,
        template="plotly_white",
        hovermode="x unified",
        xaxis=dict(title="Date"),
        yaxis=dict(
            title="Price",
            side="left",
            showgrid=False
        ),
        yaxis2=dict(
            title="Sentiment",
            overlaying="y",
            side="right",
            showgrid=False
        ),
        legend=dict(
            orientation="h",
            y=1.1,
            x=0.5,
            xanchor="center"
        )
    )

    fig.show()


In [26]:
btc_plot_df = btc_plot_df.sort_values("date").copy()

btc_plot_df["price_smooth"] = (
    btc_plot_df["close_price"]
    .rolling(window=7, min_periods=1)
    .mean()
)

btc_plot_df["sentiment_smooth"] = (
    btc_plot_df["sentiment"]
    .rolling(window=7, min_periods=1)
    .mean()
)

In [27]:
plot_price_sentiment_daily(
    btc_plot_df,
    title="Bitcoin — Price vs Public Sentiment",
)

In [19]:
plot_daily_price_sentiment(
    nifty_plot_df,
    title="NIFTY 50 — Price vs Public Sentiment",
)

In [13]:
btc_corr = btc_plot_df[["price_scaled", "sentiment_scaled"]].corr()
nifty_corr = nifty_plot_df[["price_scaled", "sentiment_scaled"]].corr()

btc_corr, nifty_corr
#END

KeyError: "None of [Index(['price_scaled', 'sentiment_scaled'], dtype='object')] are in the [columns]"

In [None]:
def pre_event_stats(df, lag):
    events = df[df["stress"] == 1]
    rows = []

    for _, row in events.iterrows():
        d = row["date"]
        prev = df[df["date"] == d - pd.Timedelta(days=lag)]
        if not prev.empty:
            rows.append(prev.iloc[0])

    return pd.DataFrame(rows)

In [None]:
btc_t1 = pre_event_stats(btc_me, 1)
btc_t3 = pre_event_stats(btc_me, 3)
btc_t7 = pre_event_stats(btc_me, 7)

In [None]:
btc_normal = btc_m[btc_m["stress"] == 0].sample(len(btc_t3), random_state=42)

In [None]:
comparison = pd.DataFrame({
    "Pre-Stress (t-3)": btc_t3["sent_surprise"].mean(),
    "Normal Days": btc_normal["sent_surprise"].mean()
}, index=["Sentiment Surprise"])
comparison

In [None]:
plt.figure(figsize=(10,4))
plt.hist(btc_t3["sent_surprise"], bins=30, alpha=0.6, label="Before Stress")
plt.hist(btc_normal["sent_surprise"], bins=30, alpha=0.6, label="Normal")
plt.legend()
plt.title("BTC — Sentiment Surprise Before Stress Events")
plt.show()