In [1]:
import pandas as pd

volatility_df = pd.read_csv("../data/filtered_volatility_df.csv", parse_dates=["timestamp"])
volatility_df.head()

Unnamed: 0,timestamp,price,tweet_id,handle,ticker,sentiment_label,sentiment_score,top_topic,Technology,Healthcare and Pharmaceuticals,...,Energy,Agriculture,Automotive,minutes_since_tweet,log_return,volatility,price_change_pct,average_volatility,price_spike,weighted_volatility
0,2016-12-05 12:15:00,99.002,805384490533212160,@realDonaldTrump,WYNN,Neutral,0.949813,Technology,0.464619,0.449575,...,0.318766,0.122376,0.381047,1441.658,,0.017418,,0.321163,0,0.017418
1,2016-12-05 12:15:00,27.458544,805384490533212160,@realDonaldTrump,FOX,Neutral,0.949813,Technology,0.464619,0.449575,...,0.318766,0.122376,0.381047,1441.658,-1.282463,0.004898,,0.05888,1,0.007542
2,2016-12-05 12:15:00,61.312195,805384490533212160,@realDonaldTrump,LVS,Neutral,0.949813,Technology,0.464619,0.449575,...,0.318766,0.122376,0.381047,1441.658,,0.016306,,0.12518,1,0.03406
3,2016-12-05 12:15:00,29.001725,805384490533212160,@realDonaldTrump,MGM,Neutral,0.949813,Technology,0.464619,0.449575,...,0.318766,0.122376,0.381047,1441.658,-0.748623,0.007139,,0.062956,1,0.011124
4,2016-12-05 12:16:00,98.930269,805384490533212160,@realDonaldTrump,WYNN,Neutral,0.949813,Technology,0.464619,0.449575,...,0.318766,0.122376,0.381047,1442.658,1.22706,0.036447,,0.321163,1,0.140217


# Apply **ARIMA** forecasting

In [12]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import matplotlib.pyplot as plt

In [3]:
def arima_forecast(df, column='weighted_volatility', steps=10):

    df.reset_index(inplace=True)
    df.set_index('timestamp', inplace=True)

    df = df.loc[~df.index.duplicated(keep='first')]

    df = df.asfreq('s')  # Set frequency to 's' (seconds)

    series = df[column].dropna()

    #fit ARIMA model
    model = ARIMA(series, order=(1, 1, 1))
    model_fit = model.fit()

    #forecast future values
    forecast = model_fit.forecast(steps=steps)

    # Create a date range
    forecast_dates = pd.date_range(df.index[-1], periods=steps + 1, freq='s')[1:]

    return pd.Series(forecast.values, index=forecast_dates, name=f'{column}_ARIMA_Forecast')

In [4]:
arima_forecasted = arima_forecast(volatility_df, column='weighted_volatility', steps=10)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [13]:
forecasted_values_smoothed = arima_forecasted.rolling(window=3).mean()

forecasted_values_smoothed

2020-12-30 16:00:01            NaN
2020-12-30 16:00:02            NaN
2020-12-30 16:00:03    1142.064110
2020-12-30 16:00:04    1370.719516
2020-12-30 16:00:05    1429.671086
2020-12-30 16:00:06    1444.869885
2020-12-30 16:00:07    1448.788416
2020-12-30 16:00:08    1449.798685
2020-12-30 16:00:09    1450.059151
2020-12-30 16:00:10    1450.126304
Freq: s, Name: weighted_volatility_ARIMA_Forecast, dtype: float64

# Apply **GARCH** forecasting

In [None]:
!pip install arch

Collecting arch
  Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (985 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m985.3/985.3 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-7.2.0


In [9]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
import matplotlib.pyplot as plt

In [26]:
def garch_forecast(df, column='weighted_volatility', steps=5):
    if 'timestamp' not in df.columns:
        df.reset_index(inplace=True)  # Ensure timestamp column is kept
    df.set_index('timestamp', inplace=True)

    df = df.loc[~df.index.duplicated(keep='first')]
    df = df.asfreq('s')  # Set frequency to 's' (seconds)


    returns = df[column].dropna()
    returns_scaled = returns * 0.0001

    # Fit GARCH(1, 1) model
    model = arch_model(returns_scaled, vol='Garch', p=1, q=1)
    model_fit = model.fit()

    # Forecast volatility
    forecast_volatility = model_fit.forecast(horizon=steps)

    # Get forecasted variance
    forecast_variance = forecast_volatility.variance.values[-1, :]
    forecast_volatility = pd.Series(np.sqrt(forecast_variance), index=pd.date_range(df.index[-1], periods=steps + 1, freq='s')[1:], name=f'{column}_GARCH_Forecast')

    return forecast_volatility


In [None]:
garch_forecasted = garch_forecast(volatility_df, column='weighted_volatility', steps=5)
volatility_df['GARCH_Forecast'] = garch_forecasted

volatility_df.head()

In [None]:
garch_forecasted

In [None]:
volatility_df.to_csv('../data/garch_forecasted.csv')