In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [5]:
btc_price = pd.read_csv(
    "../data/raw/btc_prices.csv",
    parse_dates = ["date"]
)

nifty_price = pd.read_csv(
    "../data/raw/nifty_prices.csv",
    parse_dates = ["date"]
)

sent = pd.read_csv(
    "../data/processed/text_with_sentiment.csv",
    parse_dates = ["timestamp"]
)

In [7]:
btc_price["date"] = btc_price["date"].dt.tz_localize(None)
nifty_price["date"] = nifty_price["date"].dt.tz_localize(None)
sent["date"] = sent["timestamp"].dt.tz_localize(None).dt.date
sent["date"] = pd.to_datetime(sent["date"])

In [8]:
btc_price = pd.read_csv(
    "../data/raw/btc_prices.csv",
    parse_dates=["date"]
)

nifty_price = pd.read_csv(
    "../data/raw/nifty_prices.csv",
    parse_dates=["date"]
)

btc_price["date"] = pd.to_datetime(btc_price["date"]).dt.tz_localize(None)
nifty_price["date"] = pd.to_datetime(nifty_price["date"]).dt.tz_localize(None)

btc_price.head(), nifty_price.head()

(        date        open        high         low       close    volume  \
 0 2015-01-02  314.079010  315.838989  313.565002  315.032013   7860650   
 1 2015-01-03  314.846008  315.149994  281.082001  281.082001  33054400   
 2 2015-01-04  281.145996  287.230011  257.612000  264.195007  55629100   
 3 2015-01-05  265.084015  278.341003  265.084015  274.473999  43962800   
 4 2015-01-06  274.610992  287.553009  272.696014  286.188995  23245700   
 
      return  
 0  0.002492  
 1 -0.107767  
 2 -0.060079  
 3  0.038907  
 4  0.042682  ,
         date         open         high          low        close  volume  \
 0 2010-01-05  5277.149902  5288.350098  5242.399902  5277.899902       0   
 1 2010-01-06  5278.149902  5310.850098  5260.049805  5281.799805       0   
 2 2010-01-07  5281.799805  5302.549805  5244.750000  5263.100098       0   
 3 2010-01-08  5264.250000  5276.750000  5234.700195  5244.750000       0   
 4 2010-01-11  5263.799805  5287.200195  5227.799805  5249.399902       

In [6]:
btc_monthly_price = (
    btc_price
    .set_index("date")
    .resample("ME")
    .agg(close_price=("close", "last"))
    .reset_index()
)

nifty_monthly_price = (
    nifty_price
    .set_index("date")
    .resample("ME")
    .agg(close_price=("close", "last"))
    .reset_index()
)

In [9]:
btc_daily_sent = (
    sent[sent["asset"] == "BTC"]
    .groupby("date", as_index=False)
    .agg(sentiment=("finbert_score", "mean"))
)

nifty_daily_sent = (
    sent[sent["asset"] == "NIFTY"]
    .groupby("date", as_index=False)
    .agg(sentiment=("finbert_score", "mean"))
)

In [10]:
btc_plot_df = pd.merge(
    btc_price[["date", "close"]],
    btc_daily_sent,
    on="date",
    how="inner"
).rename(columns={"close": "close_price"})

nifty_plot_df = pd.merge(
    nifty_price[["date", "close"]],
    nifty_daily_sent,
    on="date",
    how="inner"
).rename(columns={"close": "close_price"})

In [11]:
for df in [btc_plot_df, nifty_plot_df]:
    df.sort_values("date", inplace=True)

    df["price_smooth"] = df["close_price"].rolling(7, min_periods=1).mean()
    df["sentiment_smooth"] = df["sentiment"].rolling(7, min_periods=1).mean()

In [12]:
from sklearn.preprocessing import MinMaxScaler

def scale_series(series):
    scaler = MinMaxScaler()
    return scaler.fit_transform(series.values.reshape(-1, 1)).flatten()

In [13]:
import plotly.graph_objects as go

import plotly.graph_objects as go

def plot_price_sentiment_daily(df, title):
    fig = go.Figure()

    # PRICE (left axis)
    fig.add_trace(go.Scatter(
        x=df["date"],
        y=df["price_smooth"],
        name="Price",
        line=dict(color="#2ecc71", width=2),
        fill="tozeroy",
        yaxis="y1"
    ))

    # SENTIMENT (right axis)
    fig.add_trace(go.Scatter(
        x=df["date"],
        y=df["sentiment_smooth"],
        name="Sentiment",
        line=dict(color="#f1c40f", width=2, dash="dash"),
        yaxis="y2"
    ))

    fig.update_layout(
        title=title,
        template="plotly_white",
        hovermode="x unified",
        xaxis=dict(title="Date"),
        yaxis=dict(
            title="Price",
            side="left",
            showgrid=False
        ),
        yaxis2=dict(
            title="Sentiment",
            overlaying="y",
            side="right",
            showgrid=False
        ),
        legend=dict(
            orientation="h",
            y=1.1,
            x=0.5,
            xanchor="center"
        )
    )

    fig.show()


In [14]:
btc_plot_df = btc_plot_df.sort_values("date").copy()

btc_plot_df["price_smooth"] = (
    btc_plot_df["close_price"]
    .rolling(window=7, min_periods=1)
    .mean()
)

btc_plot_df["sentiment_smooth"] = (
    btc_plot_df["sentiment"]
    .rolling(window=7, min_periods=1)
    .mean()
)

nifty_plot_df = nifty_plot_df.sort_values("date").copy()

nifty_plot_df["price_smooth"] = (
    nifty_plot_df["close_price"]
    .rolling(window=7, min_periods=1)
    .mean()
)

nifty_plot_df["sentiment_smooth"] = (
    nifty_plot_df["sentiment"]
    .rolling(window=7, min_periods=1)
    .mean()
)

btc_plot_df.head()
nifty_plot_df.head()

Unnamed: 0,date,close_price,sentiment,price_smooth,sentiment_smooth
0,2024-10-03,25250.099609,0.397322,25250.099609,0.397322
1,2024-10-04,25014.599609,0.277918,25132.349609,0.33762
2,2024-10-07,24795.75,0.143285,25020.14974,0.272841
3,2024-10-08,25013.150391,0.827009,25018.399902,0.411383
4,2024-10-09,24981.949219,0.385222,25011.109766,0.406151


In [15]:
print(
    btc_plot_df["date"].min(),
    btc_plot_df["date"].max(),
    btc_plot_df["date"].nunique()
)

2024-10-03 00:00:00 2025-01-01 00:00:00 82


In [16]:
plot_price_sentiment_daily(
    btc_plot_df,
    title="Bitcoin — Price vs Public Sentiment",
)

In [17]:
plot_price_sentiment_daily(
    nifty_plot_df,
    title="NIFTY 50 — Price vs Public Sentiment",
)

In [18]:
btc_corr = btc_plot_df[["price_scaled", "sentiment_scaled"]].corr()
nifty_corr = nifty_plot_df[["price_scaled", "sentiment_scaled"]].corr()

btc_corr, nifty_corr
#END

KeyError: "None of [Index(['price_scaled', 'sentiment_scaled'], dtype='object')] are in the [columns]"