In [None]:
import pandas as pd

articles = pd.read_csv('Data/scored_articles.csv')

In [None]:
articles["date"] = pd.to_datetime(dict(
    year=articles["annee"],
    month=articles["mois"],
    day=articles["jour"]
))

articles["post_metoo"] = articles["date"] >= "2017-10-01"

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

label_cols = [ "score",
    "voc_violence", "hierar", "portrait_victime",
    "portrait_auteur", "relation", "meanisme_violence", "stat"
]

pre = articles[articles["post_metoo"] == False]
post = articles[articles["post_metoo"] == True]


vars_to_compare = [col + "_final" for col in label_cols]

results = []

for var in vars_to_compare:
    pre_vals = pre[var].dropna()
    post_vals = post[var].dropna()
    
    mean_pre = pre_vals.mean()
    mean_post = post_vals.mean()
    diff = mean_post - mean_pre
    
    tstat, pval = ttest_ind(post_vals, pre_vals, equal_var=False)
    
    results.append({
        "variable": var,
        "mean_pre": round(mean_pre, 3),
        "mean_post": round(mean_post, 3),
        "diff": round(diff, 3),
        "p_value": round(pval, 4)
    })

mean_year_results = pd.DataFrame(results)


In [None]:
mean_year_results

In [None]:
mean_year = articles.drop(columns  = ['Unnamed: 0.1',	'Unnamed: 0',	'identifiant',	'journal_clean', 'titre', 'texte',	'keywords', 'theme_final', 'theme',
       'score', 'voc_violence', 'hierar', 'portrait_victime',
       'portrait_auteur', 'relation', 'meanisme_violence', 'stat',
       'texte_total', 'index', 'voc_violence_pred', 'hierar_pred',
       'portrait_victime_pred', 'portrait_auteur_pred', 'relation_pred',
       'meanisme_violence_pred', 'stat_pred', 'score_pred','post_metoo'])

In [None]:
mean_year

In [None]:
articles['annee'] = articles['annee'].astype(int)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

articles_bis = articles.drop(columns  = ['Unnamed: 0.1',	'Unnamed: 0',	'identifiant',	'journal_clean', 'titre', 'texte',	'keywords', 'theme_final', 'theme',
       'score', 'voc_violence', 'hierar', 'portrait_victime',
       'portrait_auteur', 'relation', 'meanisme_violence', 'stat',
       'texte_total', 'index', 'voc_violence_pred', 'hierar_pred',
       'portrait_victime_pred', 'portrait_auteur_pred', 'relation_pred',
       'meanisme_violence_pred', 'stat_pred', 'score_pred','post_metoo'])
mean_year = articles_bis.groupby('annee').mean(numeric_only=True).reset_index()

colors = {
    'score_final': '#1b1b1b',                
    'portrait_victime_final': '#005f73',      
    'relation_final': '#0a9396',             
    'meanisme_violence_final': '#ae2012',     
    'stat_final': '#6a4c93',                  
    'portrait_auteur_final': '#ca6702',       
    'hierar_final': '#94a1b2'                 
}
plt.figure(figsize=(14, 6))
plt.plot(mean_year['annee'], mean_year['score_final'], label='Score final', color=colors['score_final'], linewidth=3)
plt.plot(mean_year['annee'], mean_year['portrait_victime_final'], label='Portrait victime', color=colors['portrait_victime_final'])
plt.plot(mean_year['annee'], mean_year['relation_final'], label='Relation de pouvoir', color=colors['relation_final'])
plt.plot(mean_year['annee'], mean_year['meanisme_violence_final'], label='Mécanisme de violence', color=colors['meanisme_violence_final'])
plt.plot(mean_year['annee'], mean_year['stat_final'], label='Statistiques', color=colors['stat_final'])
plt.plot(mean_year['annee'], mean_year['portrait_auteur_final'], label='Portrait auteur', color=colors['portrait_auteur_final'])
plt.plot(mean_year['annee'], mean_year['hierar_final'], label="Hiérarchie de l'information", color=colors['hierar_final'])
plt.axvline(x=2017, color='red', linestyle='--', label='Début MeToo (2017)')
plt.title("Évolution annuelle des dimensions du traitement médiatique des VSS")
plt.xlabel("Année")
plt.ylabel("Score moyen (-1 à 1)")
plt.grid(True)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.savefig('plot.png')
plt.show()


In [None]:
import statsmodels.formula.api as smf

model = smf.ols("score_final ~ C(annee, Treatment(reference=2017))", 
                data=articles).fit(cov_type='cluster', cov_kwds={'groups': articles['journal_clean']})

print(model.summary())