In [134]:
import polars as pl
import hvplot.polars

In [136]:
df_news = pl.read_csv("data/processed/financial_sentiment_analysis.csv")
df_news = df_news.unique()

In [137]:
df = pl.read_csv('data/processed/final_news_data.csv')
df = df.unique()

In [138]:
df

ITI(13D),ITI(impatient),ITI(patient),ITI(insider),ITI(short),date,permco,ret,prc,vol,on_rdq,vol_missing_flag,comnam,Article_title
f64,f64,f64,f64,f64,str,i64,f64,f64,f64,i64,i64,str,str
0.381397,0.633154,0.2785957,0.356651,0.3885045,"""2010-09-23""",655,-0.025629,85.92,85000.0,0,0,"""BIO RAD LABORATORIES INC""",
0.347359,0.386328,0.252111,,0.428968,"""2009-11-09""",51435,0.008023,13.82,347500.0,0,0,"""VENOCO INC""",
0.3701721,0.4467906,0.173399,0.464883,0.366852,"""2010-11-09""",21287,-0.003663,14.28,1.4192997e7,0,0,"""NEWS CORP""",
0.021718,0.316221,0.116282,0.320383,0.394667,"""2010-02-02""",14873,0.03525,30.25,506100.0,0,0,"""LAMAR ADVERTISING CO""",
0.8462791,0.7915054,0.793213,0.2627674,0.79369,"""2018-05-18""",20384,-0.123661,34.37,3.0688221e7,1,0,"""CAMPBELL SOUP CO""","""24 Stocks Moving In Friday's P…"
…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.151294,0.441002,0.146399,0.701226,0.439076,"""2016-07-05""",45380,-0.018958,59.51,248977.0,0,0,"""ENERSYS""",
0.312932,0.654783,0.2328684,0.4907559,0.477454,"""2016-08-22""",55341,0.000525,38.11,1.0704362e7,0,0,"""PAYPAL HOLDINGS INC""",
0.09741,0.301976,0.121458,0.6671246,0.4642193,"""2014-04-08""",53988,0.037204,66.63,332801.0,0,0,"""PROTO LABS INC""",
0.221794,0.375491,0.168288,0.297351,0.424511,"""2009-06-04""",1689,0.013673,49.67,1.5347e6,0,0,"""M & T BANK CORP""",


In [139]:
df_articles = df.filter(pl.col('Article_title').is_not_null()).select('date', 'comnam', 'Article_title')

In [140]:
news_df = df_articles.join(df_news, left_on=['date', 'comnam', 'Article_title'], right_on=['date', 'comnam', 'Headline'], how='inner').unique()

In [141]:
final_df = df.join(news_df, on=['date', 'comnam', 'Article_title'], how='left').unique(subset=['date', 'comnam', 'Article_title'])

In [142]:
filter_df = final_df.with_columns(
    pl.col("ret").shift(-1).alias("next_day_ret")
).select(['ITI(13D)', 'next_day_ret', 'permco', 'Positive', 'Negative', 'Neutral', 'date']).sort(['permco', 'date']).drop_nulls(subset=['ITI(13D)', 'next_day_ret'])

In [143]:
filter_df = filter_df.with_columns(pl.col('Positive').fill_null(0), pl.col('Negative').fill_null(0), pl.col('Neutral').fill_null(0))

In [144]:
filter_df = filter_df.with_columns(
    (
        (pl.col('Positive') - pl.col('Negative')) 
    ).alias('sentiment_score')
)


In [145]:
filter_df = filter_df.with_columns(
    pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date")
)

In [146]:
filter_df

ITI(13D),next_day_ret,permco,Positive,Negative,Neutral,date,sentiment_score
f64,f64,i64,f64,f64,f64,date,f64
0.152583,-0.015152,7,0.0,0.0,0.0,2009-05-27,0.0
0.261909,0.001487,7,0.0,0.0,0.0,2009-05-28,0.0
0.096691,-0.047904,7,0.0,0.0,0.0,2009-05-29,0.0
0.089552,-0.00382,7,0.0,0.0,0.0,2009-06-01,0.0
0.018411,-0.007244,7,0.0,0.0,0.0,2009-06-02,0.0
…,…,…,…,…,…,…,…
0.319228,0.014065,58620,0.0,0.0,0.0,2009-08-25,0.0
0.114453,-0.005307,58620,0.0,0.0,0.0,2009-08-26,0.0
0.215733,0.005515,58620,0.0,0.0,0.0,2009-08-27,0.0
0.225597,0.051969,58620,0.0,0.0,0.0,2009-08-28,0.0


In [147]:
n = 10 # EMA window size

filter_df = (
    filter_df
    .sort(["permco", "date"])
    .with_columns([
        pl.col("sentiment_score")
        .ewm_mean(alpha=2/(n+1), adjust=False)
        .over("permco")
        .alias(f"ema_sentiment_{n}")
    ])
)

In [148]:
filter_df

ITI(13D),next_day_ret,permco,Positive,Negative,Neutral,date,sentiment_score,ema_sentiment_10
f64,f64,i64,f64,f64,f64,date,f64,f64
0.152583,-0.015152,7,0.0,0.0,0.0,2009-05-27,0.0,0.0
0.261909,0.001487,7,0.0,0.0,0.0,2009-05-28,0.0,0.0
0.096691,-0.047904,7,0.0,0.0,0.0,2009-05-29,0.0,0.0
0.089552,-0.00382,7,0.0,0.0,0.0,2009-06-01,0.0,0.0
0.018411,-0.007244,7,0.0,0.0,0.0,2009-06-02,0.0,0.0
…,…,…,…,…,…,…,…,…
0.319228,0.014065,58620,0.0,0.0,0.0,2009-08-25,0.0,0.0
0.114453,-0.005307,58620,0.0,0.0,0.0,2009-08-26,0.0,0.0
0.215733,0.005515,58620,0.0,0.0,0.0,2009-08-27,0.0,0.0
0.225597,0.051969,58620,0.0,0.0,0.0,2009-08-28,0.0,0.0


In [149]:
filter_df = filter_df.filter(
    pl.col("sentiment_score").is_not_null() & (pl.col("sentiment_score") != 0)
)

In [150]:
filter_df.filter(
    pl.col("ema_sentiment_10").is_not_null() & (pl.col("ema_sentiment_10") != 0)
)


ITI(13D),next_day_ret,permco,Positive,Negative,Neutral,date,sentiment_score,ema_sentiment_10
f64,f64,i64,f64,f64,f64,date,f64,f64
0.030262,-0.001409,37,0.148186,0.012093,0.839721,2010-08-04,0.136093,0.024744
0.1824342,-0.005078,37,0.023458,0.032603,0.943939,2010-09-10,-0.009145,-0.001529
0.1824342,0.005797,37,0.032024,0.028116,0.9398598,2010-09-10,0.003908,-0.00054
0.481816,0.001097,37,0.068105,0.288555,0.6433402,2010-09-13,-0.220451,-0.040524
0.509262,0.023719,37,0.7095045,0.007535,0.282961,2010-09-28,0.70197,0.123174
…,…,…,…,…,…,…,…,…
0.082986,-0.024239,56668,0.035469,0.150262,0.8142691,2019-07-31,-0.114793,-0.056249
0.082986,-0.009885,56668,0.026923,0.497259,0.475818,2019-07-31,-0.470337,-0.131537
0.082986,0.007519,56668,0.069916,0.012613,0.9174706,2019-07-31,0.057303,-0.097203
0.082986,-0.016281,56668,0.142438,0.012201,0.845361,2019-07-31,0.130237,-0.05585


In [151]:
valid_permcos = (
    filter_df.group_by("permco")
    .agg(pl.len().alias("n_obs"))
    .filter(pl.col("n_obs") >= 2)
    .select("permco")
)

filter_df = filter_df.join(valid_permcos, on="permco", how="inner")

In [152]:
filter_df.group_by("permco").agg([
    pl.corr("ema_sentiment_10", "next_day_ret").alias("corr_sentiment_ret"),
    pl.corr("ITI(13D)", "next_day_ret").alias("corr_ITI_ret"),
])

permco,corr_sentiment_ret,corr_ITI_ret
i64,f64,f64
37,-0.051404,-0.132245
90,-0.002293,-0.010368
92,-0.044901,-0.003928
116,0.021265,0.040208
137,0.005138,0.014423
…,…,…
56518,0.223855,0.433345
56550,-0.212958,0.01007
56616,0.322841,-0.106389
56662,0.021158,0.240828


In [153]:
filter_df.describe()

statistic,ITI(13D),next_day_ret,permco,Positive,Negative,Neutral,date,sentiment_score,ema_sentiment_10
str,f64,f64,f64,f64,f64,f64,str,f64,f64
"""count""",1046008.0,1046008.0,1046008.0,1046008.0,1046008.0,1046008.0,"""1046008""",1046008.0,1046008.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,"""0""",0.0,0.0
"""mean""",0.350087,0.000812,26929.516742,0.276517,0.199291,0.524192,"""2015-09-13 02:30:26.170354""",0.077226,0.047095
"""std""",0.195996,0.035191,18347.567803,0.329549,0.301612,0.369577,,0.512403,0.173392
"""min""",0.000663,-0.928571,37.0,0.00607,0.005645,0.008015,"""2009-05-27""",-0.96954,-0.927976
"""25%""",0.200453,-0.010526,12305.0,0.034653,0.018544,0.103952,"""2013-09-10""",-0.083208,-0.047886
"""50%""",0.315906,0.000442,21068.0,0.088214,0.04014,0.6284781,"""2015-10-21""",0.033937,0.041899
"""75%""",0.4669854,0.011655,47220.0,0.4989567,0.21436,0.8982955,"""2018-01-22""",0.384807,0.155274
"""max""",0.9997168,4.148082,56668.0,0.961015,0.9773637,0.9589839,"""2019-07-31""",0.946633,0.926176


In [154]:
corr_global = filter_df.select(
    pl.corr("ITI(13D)", "next_day_ret").alias("corr_ITI_ret_global")
)
print(corr_global)

shape: (1, 1)
┌─────────────────────┐
│ corr_ITI_ret_global │
│ ---                 │
│ f64                 │
╞═════════════════════╡
│ 0.000351            │
└─────────────────────┘


In [155]:
corr_by_firm = (
    filter_df
    .group_by("permco")
    .agg(pl.corr("ITI(13D)", "next_day_ret").alias("corr_ITI_ret"))
    .drop_nulls()  # supprime les corrélations NaN
)

# Compter combien d'entreprises ont une corrélation proche de -1, 0, ou 1
corr_distribution = corr_by_firm.select([
    pl.col("corr_ITI_ret").mean().alias("mean_corr"),
    pl.col("corr_ITI_ret").std().alias("std_corr"),
    pl.col("corr_ITI_ret").min().alias("min_corr"),
    pl.col("corr_ITI_ret").max().alias("max_corr"),
    ((pl.col("corr_ITI_ret").abs() < 0.1).sum()).alias("near_zero"),   # |corr| < 0.1
    ((pl.col("corr_ITI_ret") > 0.7).sum()).alias("near_pos1"),         # corr > 0.7
    ((pl.col("corr_ITI_ret") < -0.7).sum()).alias("near_neg1"),        # corr < -0.7
    pl.len().alias("total_firms")
])

print(corr_distribution)



shape: (1, 8)
┌───────────┬──────────┬──────────┬──────────┬───────────┬───────────┬───────────┬─────────────┐
│ mean_corr ┆ std_corr ┆ min_corr ┆ max_corr ┆ near_zero ┆ near_pos1 ┆ near_neg1 ┆ total_firms │
│ ---       ┆ ---      ┆ ---      ┆ ---      ┆ ---       ┆ ---       ┆ ---       ┆ ---         │
│ f64       ┆ f64      ┆ f64      ┆ f64      ┆ u32       ┆ u32       ┆ u32       ┆ u32         │
╞═══════════╪══════════╪══════════╪══════════╪═══════════╪═══════════╪═══════════╪═════════════╡
│ NaN       ┆ NaN      ┆ -1.0     ┆ 1.0      ┆ 1677      ┆ 16        ┆ 6         ┆ 2037        │
└───────────┴──────────┴──────────┴──────────┴───────────┴───────────┴───────────┴─────────────┘


In [156]:
filter_df = filter_df.with_columns(pl.col("ITI(13D)").qcut(10).alias("ITI_decile"))

iti_decile_stats = (
    filter_df.group_by("ITI_decile")
    .agg(pl.col("next_day_ret").mean().alias("avg_next_day_ret"))
    .sort("ITI_decile")
)
print(iti_decile_stats)


shape: (10, 2)
┌─────────────────────────────────┬──────────────────┐
│ ITI_decile                      ┆ avg_next_day_ret │
│ ---                             ┆ ---              │
│ cat                             ┆ f64              │
╞═════════════════════════════════╪══════════════════╡
│ (-inf, 0.12405825950000002]     ┆ 0.000795         │
│ (0.12405825950000002, 0.177019… ┆ 0.000749         │
│ (0.17701939, 0.22324158]        ┆ 0.000691         │
│ (0.22324158, 0.268507752000000… ┆ 0.000972         │
│ (0.26850775200000004, 0.315905… ┆ 0.000851         │
│ (0.31590566, 0.367987279999999… ┆ 0.000728         │
│ (0.3679872799999999, 0.4300912… ┆ 0.000829         │
│ (0.4300912249999999, 0.51051]   ┆ 0.000924         │
│ (0.51051, 0.6314921000000001]   ┆ 0.000706         │
│ (0.6314921000000001, inf]       ┆ 0.000874         │
└─────────────────────────────────┴──────────────────┘


In [157]:
filter_df = filter_df.with_columns([
    # Colonne indiquant si le signe du retour a changé par rapport à la veille
    (pl.col("next_day_ret").sign() != pl.col("next_day_ret").shift(1).sign())
        .cast(pl.Int8)
        .alias("ret_sign_change")
])

In [158]:
filter_df.select([
    pl.col("next_day_ret"),
    pl.col("ret_sign_change"),
    pl.col("ITI(13D)")
]).head(10)

next_day_ret,ret_sign_change,ITI(13D)
f64,i8,f64
-0.001409,,0.030262
-0.005078,0.0,0.1824342
0.005797,1.0,0.1824342
0.001097,0.0,0.481816
0.023719,0.0,0.509262
-0.055327,1.0,0.509262
-0.015453,0.0,0.185055
0.003088,1.0,0.174374
0.002114,0.0,0.2680761
-0.003624,1.0,0.311135


In [159]:
corr_sign_iti = filter_df.select(pl.corr("ret_sign_change", "ITI(13D)")).item()
print(f"Corrélation entre changement de signe et ITI(13D): {corr_sign_iti:.4f}")

Corrélation entre changement de signe et ITI(13D): -0.0002


In [160]:
# Corrélation ITI - changement de signe du return, par firme
corr_by_firm_sign = (
    filter_df
    .group_by("permco")
    .agg(pl.corr("ITI(13D)", "ret_sign_change").alias("corr_ITI_sign"))
    .drop_nulls()
)

# Statistiques de distribution des corrélations
corr_sign_distribution = corr_by_firm_sign.select([
    pl.col("corr_ITI_sign").mean().alias("mean_corr"),
    pl.col("corr_ITI_sign").std().alias("std_corr"),
    pl.col("corr_ITI_sign").min().alias("min_corr"),
    pl.col("corr_ITI_sign").max().alias("max_corr"),
    ((pl.col("corr_ITI_sign").abs() < 0.1).sum()).alias("near_zero"),   # |corr| < 0.1
    ((pl.col("corr_ITI_sign") > 0.7).sum()).alias("near_pos1"),         # corr > 0.7
    ((pl.col("corr_ITI_sign") < -0.7).sum()).alias("near_neg1"),        # corr < -0.7
    pl.len().alias("total_firms")
])

print(corr_sign_distribution)


shape: (1, 8)
┌───────────┬──────────┬──────────┬──────────┬───────────┬───────────┬───────────┬─────────────┐
│ mean_corr ┆ std_corr ┆ min_corr ┆ max_corr ┆ near_zero ┆ near_pos1 ┆ near_neg1 ┆ total_firms │
│ ---       ┆ ---      ┆ ---      ┆ ---      ┆ ---       ┆ ---       ┆ ---       ┆ ---         │
│ f64       ┆ f64      ┆ f64      ┆ f64      ┆ u32       ┆ u32       ┆ u32       ┆ u32         │
╞═══════════╪══════════╪══════════╪══════════╪═══════════╪═══════════╪═══════════╪═════════════╡
│ NaN       ┆ NaN      ┆ -1.0     ┆ 1.0      ┆ 1662      ┆ 23        ┆ 7         ┆ 2037        │
└───────────┴──────────┴──────────┴──────────┴───────────┴───────────┴───────────┴─────────────┘


In [161]:
# Corrélation entre ema_sentiment_10 et next_day_ret, par entreprise
corr_by_firm_sent = (
    filter_df
    .group_by("permco")
    .agg(pl.corr("ema_sentiment_10", "next_day_ret").alias("corr_sentiment_ret"))
    .drop_nulls()  # supprime les corrélations NaN
)

# Statistiques globales sur la distribution des corrélations
corr_sent_distribution = corr_by_firm_sent.select([
    pl.col("corr_sentiment_ret").mean().alias("mean_corr"),
    pl.col("corr_sentiment_ret").std().alias("std_corr"),
    pl.col("corr_sentiment_ret").min().alias("min_corr"),
    pl.col("corr_sentiment_ret").max().alias("max_corr"),
    ((pl.col("corr_sentiment_ret").abs() < 0.1).sum()).alias("near_zero"),   # |corr| < 0.1
    ((pl.col("corr_sentiment_ret") > 0.7).sum()).alias("near_pos1"),         # corr > 0.7
    ((pl.col("corr_sentiment_ret") < -0.7).sum()).alias("near_neg1"),        # corr < -0.7
    pl.len().alias("total_firms")
])

print(corr_sent_distribution)


shape: (1, 8)
┌───────────┬──────────┬──────────┬──────────┬───────────┬───────────┬───────────┬─────────────┐
│ mean_corr ┆ std_corr ┆ min_corr ┆ max_corr ┆ near_zero ┆ near_pos1 ┆ near_neg1 ┆ total_firms │
│ ---       ┆ ---      ┆ ---      ┆ ---      ┆ ---       ┆ ---       ┆ ---       ┆ ---         │
│ f64       ┆ f64      ┆ f64      ┆ f64      ┆ u32       ┆ u32       ┆ u32       ┆ u32         │
╞═══════════╪══════════╪══════════╪══════════╪═══════════╪═══════════╪═══════════╪═════════════╡
│ 0.003919  ┆ 0.141728 ┆ -1.0     ┆ 1.0      ┆ 1664      ┆ 15        ┆ 9         ┆ 2037        │
└───────────┴──────────┴──────────┴──────────┴───────────┴───────────┴───────────┴─────────────┘


In [162]:
import polars as pl

# 1. Détecter le changement de signe du sentiment (EMA)
filter_df = filter_df.sort(["permco", "date"]).with_columns([
    pl.col("ema_sentiment_10").shift(1).over("permco").alias("prev_sentiment"),
])

filter_df = filter_df.with_columns([
    (
        (pl.col("prev_sentiment") * pl.col("ema_sentiment_10") < 0)
    ).cast(pl.Int8).alias("sentiment_regime_change")
])

# 2. Définir si l’ITI est "haut"
high_threshold = 0.8  # ajustable selon la distribution de l'ITI
filter_df = filter_df.with_columns([
    (pl.col("ITI(13D)") > high_threshold).cast(pl.Int8).alias("high_ITI")
])

# 3. Créer une variable d’interaction
filter_df = filter_df.with_columns([
    (pl.col("high_ITI") * pl.col("sentiment_regime_change")).alias("signal_change_high_ITI")
])

# 4. Corrélation par firme
corr_by_firm = (
    filter_df
    .group_by("permco")
    .agg(pl.corr("signal_change_high_ITI", "next_day_ret").alias("corr_signal_ret"))
    .drop_nulls()
    .sort("corr_signal_ret", descending=True)
)

# 5. Statistiques descriptives
corr_stats = corr_by_firm.select([
    pl.col("corr_signal_ret").mean().alias("mean_corr"),
    pl.col("corr_signal_ret").std().alias("std_corr"),
    pl.col("corr_signal_ret").min().alias("min_corr"),
    pl.col("corr_signal_ret").max().alias("max_corr"),
    ((pl.col("corr_signal_ret").abs() < 0.1).sum()).alias("near_zero"),
    ((pl.col("corr_signal_ret") > 0.7).sum()).alias("near_pos1"),
    ((pl.col("corr_signal_ret") < -0.7).sum()).alias("near_neg1"),
    pl.len().alias("total_firms")
])

print(corr_stats)


shape: (1, 8)
┌───────────┬──────────┬───────────┬──────────┬───────────┬───────────┬───────────┬─────────────┐
│ mean_corr ┆ std_corr ┆ min_corr  ┆ max_corr ┆ near_zero ┆ near_pos1 ┆ near_neg1 ┆ total_firms │
│ ---       ┆ ---      ┆ ---       ┆ ---      ┆ ---       ┆ ---       ┆ ---       ┆ ---         │
│ f64       ┆ f64      ┆ f64       ┆ f64      ┆ u32       ┆ u32       ┆ u32       ┆ u32         │
╞═══════════╪══════════╪═══════════╪══════════╪═══════════╪═══════════╪═══════════╪═════════════╡
│ NaN       ┆ NaN      ┆ -0.716175 ┆ 0.428936 ┆ 1184      ┆ 745       ┆ 1         ┆ 2037        │
└───────────┴──────────┴───────────┴──────────┴───────────┴───────────┴───────────┴─────────────┘


In [163]:
filter_df.describe()

statistic,ITI(13D),next_day_ret,permco,Positive,Negative,Neutral,date,sentiment_score,ema_sentiment_10,ITI_decile,ret_sign_change,prev_sentiment,sentiment_regime_change,high_ITI,signal_change_high_ITI
str,f64,f64,f64,f64,f64,f64,str,f64,f64,str,f64,f64,f64,f64,f64
"""count""",1046008.0,1046008.0,1046008.0,1046008.0,1046008.0,1046008.0,"""1046008""",1046008.0,1046008.0,"""1046008""",1046007.0,1043971.0,1043971.0,1046008.0,1043971.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,"""0""",0.0,0.0,"""0""",1.0,2037.0,2037.0,0.0,2037.0
"""mean""",0.350087,0.000812,26929.516742,0.276517,0.199291,0.524192,"""2015-09-13 02:30:26.170354""",0.077226,0.047095,,0.514347,0.047122,0.171295,0.02921,0.004646
"""std""",0.195996,0.035191,18347.567803,0.329549,0.301612,0.369577,,0.512403,0.173392,,0.499794,0.173399,0.376767,0.168395,0.068001
"""min""",0.000663,-0.928571,37.0,0.00607,0.005645,0.008015,"""2009-05-27""",-0.96954,-0.927976,,0.0,-0.927976,0.0,0.0,0.0
"""25%""",0.200453,-0.010526,12305.0,0.034653,0.018544,0.103952,"""2013-09-10""",-0.083208,-0.047886,,0.0,-0.047843,0.0,0.0,0.0
"""50%""",0.315906,0.000442,21068.0,0.088214,0.04014,0.6284781,"""2015-10-21""",0.033937,0.041899,,1.0,0.041936,0.0,0.0,0.0
"""75%""",0.4669854,0.011655,47220.0,0.4989567,0.21436,0.8982955,"""2018-01-22""",0.384807,0.155274,,1.0,0.1553,0.0,0.0,0.0
"""max""",0.9997168,4.148082,56668.0,0.961015,0.9773637,0.9589839,"""2019-07-31""",0.946633,0.926176,,1.0,0.926176,1.0,1.0,1.0


In [164]:
corr_global = filter_df.select(
    pl.corr("ITI(13D)", "sentiment_score").alias("corr_ITI_sentiment")
)

print(corr_global)

shape: (1, 1)
┌────────────────────┐
│ corr_ITI_sentiment │
│ ---                │
│ f64                │
╞════════════════════╡
│ -0.075102          │
└────────────────────┘


In [165]:
corr_by_firm = (
    filter_df
    .group_by("permco")
    .agg(pl.corr("ITI(13D)", "sentiment_score").alias("corr_ITI_sentiment"))
    .drop_nulls()
    .sort("corr_ITI_sentiment", descending=True)
)

print(corr_by_firm)


shape: (2_037, 2)
┌────────┬────────────────────┐
│ permco ┆ corr_ITI_sentiment │
│ ---    ┆ ---                │
│ i64    ┆ f64                │
╞════════╪════════════════════╡
│ 3133   ┆ NaN                │
│ 11064  ┆ NaN                │
│ 14750  ┆ NaN                │
│ 15912  ┆ NaN                │
│ 41584  ┆ NaN                │
│ …      ┆ …                  │
│ 36338  ┆ -0.974238          │
│ 12954  ┆ -1.0               │
│ 8586   ┆ -1.0               │
│ 21776  ┆ -1.0               │
│ 37958  ┆ -1.0               │
└────────┴────────────────────┘


In [166]:
corr_by_firm.describe()

statistic,permco,corr_ITI_sentiment
str,f64,f64
"""count""",2037.0,2037.0
"""null_count""",0.0,0.0
"""mean""",30806.594502,
"""std""",20036.920525,
"""min""",37.0,-1.0
"""25%""",12289.0,-0.127218
"""50%""",21869.0,-0.071801
"""75%""",53667.0,-0.018128
"""max""",56668.0,1.0
