In [1]:

import pandas as pd
import scipy.stats as stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd


In [4]:
# Load your dataset

df = pd.read_csv("vader.csv", usecols=["Holiday", "Source", "content_compound"])
df = df.dropna()
print(df.head())

  Holiday Source  content_compound
0     MLK    FOX            0.9916
1     MLK    FOX           -0.8316
2     MLK    FOX            0.6543
3     MLK    FOX            0.8745
4     MLK    FOX            0.9109


In [11]:
def run_anova_for_holiday(holiday_name, df):
    print(f"\nRunning ANOVA for {holiday_name}...\n")
    
    # Filter data for the given holiday
    holiday_df = df[df["Holiday"] == holiday_name]
    print(holiday_df)
    
    # Ensure at least 2 sources exist
    sources = holiday_df["Source"].unique()
    if len(sources) < 2:
        print(f"Not enough sources for {holiday_name} ANOVA.")
        return
    print(sources)
    
    # Run ANOVA
    f_stat, p_anova = stats.f_oneway(
        *[holiday_df[holiday_df["Source"] == source]["content_compound"] for source in sources]
    )
    print(f"ANOVA F-statistic: {f_stat:.4f}, p-value: {p_anova:.4f}")





In [12]:
run_anova_for_holiday("MLK", df)



Running ANOVA for MLK...

   Holiday Source  content_compound
0      MLK    FOX            0.9916
1      MLK    FOX           -0.8316
2      MLK    FOX            0.6543
3      MLK    FOX            0.8745
4      MLK    FOX            0.9109
5      MLK    FOX            0.5859
6      MLK    FOX            0.9827
7      MLK    FOX           -0.3532
8      MLK    FOX            0.9955
9      MLK     AP            0.9618
10     MLK     AP           -0.9713
11     MLK     AP            0.8829
12     MLK     AP            0.9785
13     MLK     AP           -0.9884
14     MLK     AP            0.9879
15     MLK     AP           -0.9958
16     MLK     AP            0.8842
17     MLK     AP           -0.4782
18     MLK    CNN           -0.2071
19     MLK    CNN            0.9975
20     MLK    CNN            0.9912
21     MLK    CNN            0.9997
22     MLK    CNN            0.9804
23     MLK    CNN            0.9218
24     MLK    CNN            0.9981
25     MLK    CNN            0.8555
2

In [13]:
run_anova_for_holiday("Veterans", df)



Running ANOVA for Veterans...

      Holiday Source  content_compound
62   Veterans    FOX           -0.9881
63   Veterans    FOX            0.9997
64   Veterans    FOX            0.9998
65   Veterans    FOX            0.9987
66   Veterans    FOX            0.9715
67   Veterans    FOX            0.9280
68   Veterans    FOX            0.7579
69   Veterans    FOX            0.9983
70   Veterans    FOX            0.8591
71   Veterans     AP            0.9001
72   Veterans     AP            0.9636
73   Veterans     AP            0.9690
74   Veterans    CNN            0.7994
75   Veterans    CNN            0.8495
76   Veterans    CNN            0.9887
77   Veterans    CNN            0.9877
78   Veterans    CNN           -0.9809
79   Veterans    CNN            0.9840
80   Veterans    CNN            0.8442
81   Veterans    CNN            0.9969
82   Veterans    CNN            0.9535
83   Veterans    CNN            0.9934
84   Veterans    CNN            0.9933
121  Veterans    WSJ            

In [14]:
run_anova_for_holiday("July4th", df=df)


Running ANOVA for July4th...

     Holiday Source  content_compound
27   July4th    FOX            0.9600
28   July4th    FOX            0.9608
29   July4th    FOX            0.9959
30   July4th    FOX           -0.9968
31   July4th    FOX            0.9986
32   July4th    FOX           -0.9360
33   July4th    FOX            0.9978
34   July4th    FOX            0.8461
35   July4th    FOX           -0.9999
36   July4th    FOX           -0.9934
37   July4th    FOX            0.5919
38   July4th    FOX            0.7457
39   July4th     AP            0.9180
40   July4th     AP           -0.9856
41   July4th     AP            0.9008
42   July4th     AP            0.9596
43   July4th     AP           -0.9946
44   July4th     AP           -0.9990
45   July4th     AP            0.9950
46   July4th     AP           -0.8760
47   July4th     AP            0.9778
48   July4th    CNN           -0.9515
49   July4th    CNN            0.8338
50   July4th    CNN           -0.9988
51   July4th    CNN