In [1]:
import pandas as pd
import warnings
from __future__ import annotations
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from arch.univariate.base import DataScaleWarning

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DataScaleWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=ConvergenceWarning)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

In [2]:
from typing import Optional
import pandas as pd
import numpy as np
from zoneinfo import ZoneInfo
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
import statsmodels.api as sm

###############################################################################
# -----------------------------  CONFIG  --------------------------------------
###############################################################################
MARKET_TZ       = ZoneInfo('US/Eastern')
TRADING_START   = '09:30'
TRADING_END     = '16:00'
TOPIC_THRESH    = 0.80
ARIMA_ORDER     = (1, 0, 0)        # AR(1)
KERNEL          = np.array([1.0])  # single‑minute impulse
RET_SCALE = 1000        # 1 ≤ RET_SCALE ≤ 1000 recommended by arch
###############################################################################

# ────────────────────────  helpers  ───────────────────────────────────────────
def to_market_time(utc_ts: pd.Timestamp) -> pd.Timestamp:
    """UTC → naive NY   (tz info stripped for speed)."""
    return utc_ts.tz_convert(MARKET_TZ).tz_localize(None)

def first_session_minute(ts_naive: pd.Timestamp,
                         price_index: pd.DatetimeIndex) -> Optional[pd.Timestamp]:
    """Map a timestamp to first trading minute at/after ts in price_index."""
    if ts_naive > price_index[-1]:
        return None
    if ts_naive in price_index:
        return ts_naive
    pos = price_index.get_indexer([ts_naive], method='bfill')[0]
    return price_index[pos] if pos != -1 else None

def prepare_price_df(raw: pd.DataFrame) -> pd.Series:
    """Return minutely log‑returns on a *regular* grid (NaNs for gaps)."""
    ts = (pd.to_datetime(raw['timestamp'])
            .dt.tz_localize(MARKET_TZ, nonexistent='shift_forward',
                            ambiguous='NaT')
            .dt.tz_localize(None))

    px = (raw.assign(timestamp=ts)
              .set_index('timestamp')
              .between_time(TRADING_START, TRADING_END)
              ['avg_price']
              .resample('1min')          # regular grid
              .last()
              .asfreq('1min')            # keep NaNs instead of dropping minutes
           )

    # convert to returns, drop NaNs from both resample and diff
    returns = np.log(px).diff().dropna()

    return returns

def fit_baseline_garch(returns: pd.Series) -> pd.Series:
    """ARIMA(1) + GARCH(1,1) → squared standardised residuals."""
    ar_res   = ARIMA(returns, order=ARIMA_ORDER, trend='n').fit()
    resid    = ar_res.resid
    garch    = arch_model(resid, mean='Zero', vol='Garch', p=1, q=1).fit(disp='off')
    return garch.std_resid ** 2

def impulse_series(event_ts: pd.Timestamp,
                   base_index: pd.DatetimeIndex,
                   kernel: np.ndarray = KERNEL) -> pd.Series:
    """Series aligned to base_index, populated with kernel starting at event_ts."""
    s = pd.Series(0.0, index=base_index, name='impulse')
    if event_ts in s.index:
        start = s.index.get_loc(event_ts)
        end   = min(start + len(kernel), len(s))
        s.iloc[start:end] = kernel[:end - start]
    return s

# ──────────────────────  MAIN FUNCTION  ──────────────────────────────────────
def run_event_study(tweets_df: pd.DataFrame,
                    prices_df: pd.DataFrame,
                    tickers_topics_df: pd.DataFrame,
                    use_finbert:  bool = True
                   ) -> pd.DataFrame:
    """
    Parameters
    ----------
    tweets_df            : tweets + topic columns (FinBERT, zero‑shot, etc.)
    prices_df            : *single big* TAQ‑style table with at least
                           ['timestamp', 'avg_price', <ticker column>]
    tickers_topics_df    : mapping table with columns ['ticker', 'topic']
    """
    topic_to_tickers = (tickers_topics_df.groupby('topic')['ticker']
                                        .apply(list)
                                        .to_dict())
    topic_cols = [c for c in tweets_df.columns if c in topic_to_tickers]

    results = []

    # 3) iterate over *each ticker* present in prices_df
    for ticker, raw_px in prices_df.groupby('ticker', sort=False):
        print(f'Evaluating stock {ticker}...')
        
        returns = prepare_price_df(raw_px)
        if returns.empty:
            continue
        returns_scaled = returns * RET_SCALE
        price_idx   = returns.index
        std_resid2  = fit_baseline_garch(returns)

        # select tweets whose high‑score topics map to THIS ticker
        def tweet_relevant(row) -> bool:
            high_topics = [t for t in topic_cols if row[t] >= TOPIC_THRESH]
            return any(ticker in topic_to_tickers.get(t, []) for t in high_topics)

        for _, tw in tweets_df.loc[tweets_df.apply(tweet_relevant, axis=1)].iterrows():
            evt_ts = first_session_minute(
                to_market_time(pd.to_datetime(tw['timestamp'])),
                price_idx
            )
            if evt_ts is None:
                continue

            imp = None
            if use_finbert:
                # Use signed FinBERT sentiment to weight the impulse
                if 'sentiment_label' in tw and 'sentiment_score' in tw:
                    label = tw['sentiment_label']
                    score = tw['sentiment_score']
                    if label == 'Positive':
                        weight = score
                    elif label == 'Negative':
                        weight = -score
                    else:
                        weight = 0.0
                else:
                    weight = 0.0
                
                imp = impulse_series(evt_ts, std_resid2.index, kernel=np.array([weight]))
            else:
                imp = impulse_series(evt_ts, std_resid2.index)

            df_reg = pd.concat({'y': std_resid2, 'impulse': imp}, axis=1).dropna()

            X   = sm.add_constant(df_reg['impulse'])
            ols = sm.OLS(df_reg['y'], X).fit()

            results.append({
                'handle'  : tw['handle'],
                'tweet_id': tw['id'],
                'ticker'  : ticker,
                'gamma'   : ols.params['impulse'],
                'pvalue'  : ols.pvalues['impulse']
            })

    return pd.DataFrame(results)


In [3]:
# --- 1. tweets ---------------------------------------------------------------
# tweets_df = pd.read_parquet('tweets.parquet')           # already has topic columns
tweets_df = pd.read_csv('../data/tweets_with_sentiment_and_topic.csv')

# --- 2. prices ---------------------------------------------------------------
prices_df = pd.read_parquet(
    '../data/taq/',
    engine='pyarrow' #,
    # filters=[
        # ('SYM_ROOT', '==', 'TSLA')
        # ('year', '==', 2024)
    # ]
)
prices_df.columns = ['timestamp', 'avg_price', 'ticker', 'year']
del prices_df['year']

# --- 3. ticker‑to‑topic map --------------------------------------------------
tickers_topics_df = pd.read_csv('../data/05_people_stock_link_simplified.csv', sep=';')   # columns: ticker,topic
tickers_topics_df = tickers_topics_df[['industry', 'ticker']]
tickers_topics_df.columns = ['topic', 'ticker']

## Evaluation with FinBERT sentiment

In [4]:
# --- 4. run study ------------------------------------------------------------
result_df = run_event_study(tweets_df, prices_df, tickers_topics_df)

# e.g. keep only strongly significant results
sig = result_df[result_df['pvalue'] < 0.05]

sig.sort_values('pvalue').head(20)

Evaluating stock AAL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AAPL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ABNB...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ADBE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AMC...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AMD...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AMZN...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ATVI...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock BA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock BAC...
Evaluating stock BAH...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock BYND...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock C...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock CAT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock COIN...
Evaluating stock CVX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DAL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DELL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DIS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DWAC...
Evaluating stock F...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock FOX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GME...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock HD...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock JNJ...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock JPM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock KO...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock LDOS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock LMT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock LVS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MGM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MMS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MRNA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MSFT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MSTR...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NFLX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NIO...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NKE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NOC...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NOK...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NVAX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NVDA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PEP...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PFE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PINS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PLTR...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PYPL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SHOP...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SNAP...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SPCE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SPOT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SQ...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TGT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TSLA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TSM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TTWO...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TWTR...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock WFC...
Evaluating stock WKHS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock WMT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock WYNN...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock XOM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ZM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Unnamed: 0,handle,tweet_id,ticker,gamma,pvalue
1425,elonmusk,1131393271400259584,XOM,124.701105,0.0
1173,richardbranson,1009057004554420224,SHOP,310.601282,0.0
1403,richardbranson,1174626364500119554,WYNN,287.054224,0.0
872,elonmusk,1127276856476987392,MSFT,74.658648,3.859427e-114
1252,elonmusk,1120872301178671105,SNAP,83.537864,2.022775e-51
1265,richardbranson,1152835756152500224,SNAP,75.517671,3.466923e-41
1352,elonmusk,1146392791062212608,TSLA,150.137612,9.571517000000001e-32
1147,elonmusk,1131393271400259584,SHOP,64.846419,5.268009e-30
550,elonmusk,1283498475238363138,FOX,125.367764,1.1016500000000001e-17
549,elonmusk,1283498482150641667,FOX,123.512248,1.1016500000000001e-17


In [5]:
sig.head()

Unnamed: 0,handle,tweet_id,ticker,gamma,pvalue
8,richardbranson,1187256464664154113,AAL,35.683927,6.843743e-12
54,elonmusk,1143678128423936000,AAPL,133.506942,1.011428e-05
62,elonmusk,1127276856476987392,AAPL,79.570779,0.0002182925
406,richardbranson,1174626364500119554,DAL,17.956193,0.0005099097
549,elonmusk,1283498482150641667,FOX,123.512248,1.1016500000000001e-17


In [6]:
len(sig)

39

In [7]:
sig.to_csv('../data/06_tweet_influence_over_stock.csv')

### Sentiment vs Volatility Impact

#### What it shows
Plots FinBERT sentiment score vs. γ (tweet impact on volatility), colored by sentiment polarity.

#### Why it matters
Shows whether stronger emotions lead to greater volatility.

In [20]:
import altair as alt
import altair as alt

def dark_theme():
    return {
        'config': {
            'background': '#1e1e1e',
            'title': {
                'color': '#ffffff'
            },
            'axis': {
                'labelColor': '#ffffff',
                'titleColor': '#ffffff',
                'gridColor': '#444444'
            },
            'legend': {
                'labelColor': '#ffffff',
                'titleColor': '#ffffff'
            },
            'header': {
                'labelColor': '#ffffff',
                'titleColor': '#ffffff'
            },
            'view': {
                'stroke': '#444444'
            }
        }
    }

alt.themes.register('dark_custom', dark_theme)
alt.themes.enable('dark_custom')

viz_df = result_df.merge(
    tweets_df[['id', 'sentiment_score', 'sentiment_label']],
    left_on='tweet_id', right_on='id'
).dropna(subset=['sentiment_score', 'gamma', 'pvalue'])

chart = alt.Chart(viz_df).mark_circle(size=60).encode(
    x=alt.X('sentiment_score:Q', title='FinBERT Sentiment Score'),
    y=alt.Y('gamma:Q', title='Estimated γ (Volatility Impact)'),
    color=alt.Color('sentiment_label:N', title='Sentiment'),
    tooltip=['handle', 'tweet_id', 'ticker', 'sentiment_score', 'gamma', 'pvalue']
).properties(
    title='Tweet Sentiment vs Estimated Volatility Impact (γ)',
    width=600,
    height=400
).interactive()

chart


### Volcano Plot – γ vs –log(p-value)

#### What it shows
Which tweets are strongly impactful (high γ) and statistically significant (low p).

#### Why it matters
Helps visually surface high-confidence volatility drivers.

In [21]:
viz_df['neglog_p'] = -np.log10(viz_df['pvalue'])

volcano = alt.Chart(viz_df).mark_circle(size=60).encode(
    x=alt.X('gamma:Q', title='Volatility Impact (γ)'),
    y=alt.Y('neglog_p:Q', title='–log10(p-value)'),
    color=alt.Color('sentiment_label:N'),
    tooltip=['handle', 'tweet_id', 'ticker', 'gamma', 'pvalue', 'sentiment_label']
).properties(
    title='Tweet Volatility Impact vs Significance',
    width=600,
    height=400
).interactive()

volcano

### Leaderboard of Most Volatile Tweets

#### What it shows
Ranks tweets by volatility impact (γ) — with tweet IDs and tickers.

#### Why it matters
Highlights which specific tweets had the greatest modeled impact.

In [22]:
top_tweets = viz_df.sort_values('gamma', ascending=False).head(10)

leaderboard = alt.Chart(top_tweets).mark_bar().encode(
    x=alt.X('gamma:Q', title='Estimated Volatility Impact (γ)'),
    y=alt.Y('tweet_id:N', title='Tweet ID', sort='-x'),
    color=alt.Color('ticker'),
    tooltip=['handle', 'ticker', 'gamma', 'pvalue']
).properties(
    title='Top 10 Tweets by Volatility Impact (γ)',
    width=600,
    height=400
)

leaderboard

### Volatility Around Tweet – Time Series View

#### What it shows
Rolling 5-minute volatility around the tweet (±30 minutes).

#### Why it matters
Gives a direct look at how volatility evolves before and after specific tweets.

In [25]:
def volatility_window_around_tweet(
    prices_df, tweet_ts, ticker, minutes_before=30, minutes_after=30, window=5
):
    ts = pd.to_datetime(prices_df['timestamp'])
    prices_df = prices_df.assign(
        timestamp=ts.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.tz_localize(None)
    )
    px = (
        prices_df[prices_df['ticker'] == ticker]
        .set_index('timestamp')
        .between_time('09:30', '16:00')
        ['avg_price']
        .resample('1min').last().asfreq('1min').ffill()
    )

    log_ret = np.log(px).diff()
    rolling_vol = log_ret.rolling(window).std()

    tweet_time = pd.to_datetime(tweet_ts).tz_convert('US/Eastern').tz_localize(None)
    event_ts = rolling_vol.index[rolling_vol.index.get_indexer([tweet_time], method='bfill')[0]]

    start = event_ts - pd.Timedelta(minutes=minutes_before)
    end = event_ts + pd.Timedelta(minutes=minutes_after)

    segment = rolling_vol.loc[start:end].copy()
    df_plot = segment.reset_index().rename(columns={0: 'volatility', 'avg_price': 'volatility'})
    df_plot['minute_offset'] = (df_plot['timestamp'] - event_ts).dt.total_seconds() // 60
    df_plot['minute_offset'] = df_plot['minute_offset'].astype(int)

    return df_plot

def plot_volatility_window(df_plot, tweet_info):
    return alt.Chart(df_plot).mark_line().encode(
        x=alt.X('minute_offset:Q', title='Minutes from Tweet'),
        y=alt.Y('volatility:Q', title='5-min Rolling Volatility'),
        tooltip=['minute_offset', 'volatility']
    ).properties(
        title=f"Volatility Around Tweet {tweet_info['tweet_id']} ({tweet_info['ticker']})",
        width=600,
        height=300
    ) + alt.Chart(pd.DataFrame({'x': [0]})).mark_rule(color='red').encode(x='x:Q')

tweet_row = result_df[result_df['pvalue'] < 0.05].iloc[0]
tweet_ts = tweets_df.loc[tweets_df['id'] == tweet_row['tweet_id'], 'timestamp'].iloc[0]
vol_df = volatility_window_around_tweet(prices_df, tweet_ts, tweet_row['ticker'])
plot_volatility_window(vol_df, tweet_row)


## Evaluation without FinBERT sentiment

In [160]:
# --- 4. run study ------------------------------------------------------------
result_nosentiment_df = run_event_study(tweets_df, prices_df, tickers_topics_df, use_finbert=False)

# e.g. keep only strongly significant results
sig_nosentiment = result_nosentiment_df[result_nosentiment_df['pvalue'] < 0.05]

sig_nosentiment.sort_values('pvalue').head(20)

Evaluating stock AAL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AAPL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ABNB...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ADBE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AMC...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AMD...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock AMZN...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ATVI...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock BA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock BAC...
Evaluating stock BAH...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock BYND...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock C...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock CAT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock COIN...
Evaluating stock CVX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DAL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DELL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DIS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock DWAC...
Evaluating stock F...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock FOX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GME...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock GS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock HD...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock JNJ...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock JPM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock KO...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock LDOS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock LMT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock LVS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MGM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MMS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MRNA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MSFT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock MSTR...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NFLX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NIO...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NKE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NOC...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NOK...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NVAX...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock NVDA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PEP...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PFE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PINS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PLTR...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock PYPL...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SHOP...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SNAP...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SPCE...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SPOT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock SQ...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TGT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TSLA...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TSM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TTWO...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock TWTR...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock WFC...
Evaluating stock WKHS...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock WMT...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock WYNN...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock XOM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Evaluating stock ZM...


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Unnamed: 0,handle,tweet_id,ticker,gamma,pvalue
1174,richardbranson,1009013331603152896,SHOP,252.310519,0.0
858,elonmusk,1158963624418746369,MSFT,118.901431,0.0
1173,richardbranson,1009057004554420224,SHOP,252.310519,0.0
1152,elonmusk,1124484529274494977,SHOP,258.05792,0.0
1097,realDonaldTrump,1325173848396898305,SHOP,276.944257,0.0
1403,richardbranson,1174626364500119554,WYNN,251.539964,0.0
874,elonmusk,1124484529274494977,MSFT,132.003911,0.0
995,realDonaldTrump,1161441560153743366,PINS,144.079196,0.0
1425,elonmusk,1131393271400259584,XOM,124.511086,0.0
938,realDonaldTrump,952183452366929920,NIO,257.486599,0.0


In [161]:
len(sig_nosentiment)

214

In [177]:
top_volatility_df = pd.read_csv('../data/top_volatility_tweets_v2.csv').rename(columns={'ticker': 'SYM_ROOT'})

top_volatility_df['timestamp'] = pd.to_datetime(top_volatility_df['timestamp'])

top_volatility_df['ticker_list'] = top_volatility_df['SYM_ROOT'].apply(lambda x: [t.strip() for t in x.split(',')])

top_volatility_df = top_volatility_df.explode('ticker_list').rename(columns={'ticker_list': 'ticker'})
del top_volatility_df['SYM_ROOT']

top_volatility_df.head()

Unnamed: 0,tweet_id,score,handle,timestamp,ticker
0,1243075957294055424,19.945234,@elonmusk,2020-03-26 06:56:00,AAPL
0,1243075957294055424,19.945234,@elonmusk,2020-03-26 06:56:00,BAC
0,1243075957294055424,19.945234,@elonmusk,2020-03-26 06:56:00,DIS
0,1243075957294055424,19.945234,@elonmusk,2020-03-26 06:56:00,KO
0,1243075957294055424,19.945234,@elonmusk,2020-03-26 06:56:00,LDOS


In [193]:
combined_df = result_df.merge(
    top_volatility_df[['tweet_id', 'score', 'ticker']],
    on=['tweet_id', 'ticker'],
    how='inner'
)

combined_df.head()

Unnamed: 0,handle,tweet_id,ticker,gamma,pvalue,score
0,richardbranson,1335873162907701253,AAL,0.0,,5.223565
1,richardbranson,1246087905593917441,AAL,-0.976909,0.844835,-0.044711
2,richardbranson,1187256464664154113,AAL,35.683927,6.843743e-12,0.468481
3,richardbranson,1181940457766559745,AAL,0.0,,4.520996
4,richardbranson,1174626364500119554,AAL,-0.526822,0.926312,1.702097


In [194]:
len(combined_df)

175