In [179]:
import random
import numpy as np
import pandas as pd
import time
import re
import datetime
import os
import matplotlib.pyplot as plt
import scipy
from scipy import stats

In [214]:
def f(p,n=4):
    return '{0:.{1}f}'.format(p, n)

In [38]:
dfbbc = pd.read_csv(f'LIWCresults/LIWC-22 Results - BBC_BUCHA_07-13-2022 - LIWC Analysis.csv', index_col=0)  
dfsputnik = pd.read_csv(f'LIWCresults/LIWC-22 Results - Sputnik_BUCHA_07-13-2022 - LIWC Analysis.csv', index_col=0)  

In [296]:
bbcchars = [len(text) for text in dfbbc['text']]
sputnikchars = [len(text) for text in dfsputnik['text']]
print(sum(bbcchars),'chars in BBC Bucha dataset')
print(sum(sputnikchars),'chars in Sputnik Bucha dataset')

print(sum(bbcchars)/len(bbcchars),'average chars in BBC Bucha dataset')
print(sum(sputnikchars)/len(sputnikchars),'average chars in Sputnik Bucha dataset')

37148 chars in BBC Bucha dataset
43901 chars in Sputnik Bucha dataset
3377.090909090909 average chars in BBC Bucha dataset
3991.0 average chars in Sputnik Bucha dataset


## BBC/Sputnik stats
- 11 articles from each outlet, every article containing Bucha in title or text
- 37,148 chars in BBC Bucha dataset (average 3,377)
- 43,901 chars in Sputnik Bucha dataset (average 3,991)

## Stats on LIWC

In [119]:
LIWCcolumns = list(dfsputnik.columns[8:])

### independent t-test

The indepentent T-test is a parametric test used to test for a statistically significant difference in the means between 2 groups.

In [280]:
allCoeffs = []
for category in LIWCcolumns:
    bbca = np.array(dfbbc[category])
    sputnika = np.array(dfsputnik[category])
    coeff = scipy.stats.ttest_ind(bbca, sputnika, axis=0)
    allCoeffs.append({
        'cat': category,
        'bbcAvg' : dfbbc[category].mean(),
        'sputnikAvg': dfsputnik[category].mean(),
        'bbcArray' : bbca,
        'sputnikArray' : sputnika,
        't_statistic': coeff.statistic,
        'pvalue' : coeff.pvalue
    })

In [248]:
df = pd.DataFrame.from_records(allCoeffs)
df.to_excel('sputnik_bbc_preanalysis.xlsx')

#### Statistically significant LWC Categories

In [246]:
mostsig = []
for c in allCoeffs:
    if c['pvalue'] <= .005:
        mostsig.append(c['cat'])
print('Most significant (<= .005):', mostsig)

sig = []
for c in allCoeffs:
    if c['pvalue'] > .005 and c['pvalue'] <= .05:
        sig.append(c['cat'])
print('\nSignificant (<= .05):', sig)
print()

Most significant (<= .005): ['WPS', 'BigWords', 'auxverb', 'verb', 'quantity', 'socrefs', 'male', 'Culture', 'politic', 'Physical', 'focuspast', 'Period']

Significant (<= .05): ['Analytic', 'Clout', 'Dic', 'Linguistic', 'pronoun', 'ppron', 'shehe', 'article', 'prep', 'tentat', 'differ', 'Social', 'home', 'death', 'reward', 'motion', 'visual', 'Comma']



#### Printing news outlet averages of significant LIWC categories

In [247]:
# Average comparisons
print('Most significant'.upper())
for c in allCoeffs:
    if c['pvalue'] <= .005:
        print(f"\t{c['cat']} (t-statistic: {c['t_statistic']}, p-value: {c['pvalue']}):")
        print(f"\t\tAverages: BBC: {f(dfbbc[c['cat']].mean())}\tSputnik: {f(dfsputnik[c['cat']].mean())}\n")
print('Significant'.upper())
for c in allCoeffs:
    if c['pvalue'] > .005 and c['pvalue'] <= .05:
        print(f"\t{c['cat']} (t-statistic: {c['t_statistic']}, p-value: {c['pvalue']}):")
        print(f"\t\tAverages: BBC: {f(dfbbc[c['cat']].mean())}\tSputnik: {f(dfsputnik[c['cat']].mean())}\n")

MOST SIGNIFICANT
	WPS (t-statistic: -5.9262308656569, p-value: 8.519590191218455e-06):
		Averages: BBC: 19.8345	Sputnik: 27.5255

	BigWords (t-statistic: -5.558932635602999, p-value: 1.9289863297549646e-05):
		Averages: BBC: 25.6455	Sputnik: 32.6755

	auxverb (t-statistic: 3.3833152777556443, p-value: 0.0029527688862996822):
		Averages: BBC: 7.6536	Sputnik: 4.4718

	verb (t-statistic: 4.10580598801902, p-value: 0.0005493144425936519):
		Averages: BBC: 14.7809	Sputnik: 9.2773

	quantity (t-statistic: 3.3813128025212107, p-value: 0.0029664248532812686):
		Averages: BBC: 3.9127	Sputnik: 2.2518

	socrefs (t-statistic: 3.7238175925217027, p-value: 0.001340731761145559):
		Averages: BBC: 7.8100	Sputnik: 4.3609

	male (t-statistic: 3.672683037206621, p-value: 0.0015102751903958787):
		Averages: BBC: 1.6200	Sputnik: 0.4691

	Culture (t-statistic: -4.80200319035267, p-value: 0.00010850272592156059):
		Averages: BBC: 4.1209	Sputnik: 7.5382

	politic (t-statistic: -5.962067452637259, p-value: 7.8

In [237]:
print('RANDOM BBC ARTICLE:')
print('\t',random.choice(dfbbc['text']))

RANDOM BBC ARTICLE:
	 Ukraine refugees face frustrating visa wait A family hoping to house three generations of Ukraine refugees say they are racing against time to get visas. Sarah O'Brien, from Denbighshire, wants to help eight of the Palirua family. They have fled Bucha, where scores of atrocities by Russian soldiers against civilians have been uncovered. The family, which includes grandparents, two of their daughters and four children, escaped to Poland and have been staying in Warsaw. Mrs O'Brien, from Llangollen, said they had approached friends in Ukraine offering to help those fleeing the invasion, when the Homes for Ukraine scheme was launched by the UK government on 14 March. They were then put in touch with the Palirua family. But after three weeks and hours working on applications, only five of the family have received their visa papers to come to Wales. Mrs O'Brien said the Ukraine family have been told they must leave their current accommodation in Warsaw by Sunday, and n

In [281]:
print('RANDOM SPUTNIK ARTICLE:')
print('\t',random.choice(dfsputnik['text']))

RANDOM SPUTNIK ARTICLE:
	 NATO allies will discuss increasing arms deliveries to Ukraine when the bloc's ministers of foreign affairs meet on 6 and 7 April, NATO secretary-general Jens Stoltenberg told the press on Tuesday. Does this mean the alliance is determined to fuel the conflict to the last Ukrainian? "Everything about NATO is hypocritical," says Bruce Gagnon, coordinator of the Global Network Against Weapons and Nuclear Power in Space. "They declare they are a 'peace alliance' yet their history is nothing but war. Yugoslavia, Iraq, Afghanistan, Libya, Syria and now Ukraine all reveal that NATO is in fact the pirate forces of corporate globalisation. NATO's job is to force submission to western corporate demands." NATO's Foreign Ministers meeting in Brussels this week will be focused on the Russian special operation launched on 24 February to demilitarise and de-Nazify Ukraine. According to the alliance's official website, the summit will be joined by foreign ministers from Ukra