# Compare Experiments

In [1]:
# load packages
import pickle
import numpy as np
from sklearn.svm import SVC

## Train Classifier

In [2]:
# load ICA embeddings
embedded_tweets = np.load('tweet_embed_250.npy')

In [3]:
# instantiate classification algorithm

# round 1 winner
svc = SVC(C = 1, kernel = 'rbf', probability = True)

class1_train_indices = list(range(100))
class0_train_indices = list(range(280,380))

train_X = embedded_tweets[[class1_train_indices + class0_train_indices],:][0]

hundred_ones = [1]*100
hundred_zeros = [0]*100
train_Y = hundred_ones + hundred_zeros

# fit SVC model on training subset of tweet embeddings
svc.fit(train_X, train_Y)

SVC(C=1, probability=True)

## Disaggregate correct/incorrect predictions

In [4]:
# get test set
test_indices = list(range(100,280))
test_X = embedded_tweets[[test_indices],:][0]

# model predictions for test set
y_hat = svc.predict(test_X)

In [5]:
# correct or not
y_bool = y_hat==1

## Load list of explanations

In [6]:
# read in explanations list
with open('explanation_list', 'rb') as f:
    explanations = pickle.load(f)

In [7]:
explanations[0]

[('cases', -0.20405363785689484),
 ('new', -0.155064717978664),
 ('deaths', -0.10207417820647408),
 ('coronavirus', -0.06698967143404387),
 ('60', 0.05479825610293619),
 ('buy', 0.037730810152627504),
 ('stocks', 0.030866492206280807),
 ('back', 0.02862044153289094),
 ('portfolio', 0.019228230730050148),
 ('virus', -0.01764508279082087),
 ('Reasons', 0.01762324495375404),
 ('markets', 0.017075460328202865),
 ('1500', 0.015832371859270025),
 ('recovered', 0.015536797813199194),
 ('2700', 0.015119529843808394),
 ('to', -0.002003116629506429),
 ('today', 0.0003091368069083442)]

In [8]:
y_bool

array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True, False,  True,  True, False,  True,  True,  True,
        True,  True,

In [9]:
y_boolF = y_bool==False
y_boolF

array([ True, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True, False, False, False,
       False, False,  True, False, False,  True, False, False, False,
       False, False,

In [10]:
# separate by correctness of prediction
from itertools import compress
correctPred = list(compress(explanations, y_bool))
wrongPred = list(compress(explanations, y_boolF))

In [11]:
wrongPred[0]

[('cases', -0.20405363785689484),
 ('new', -0.155064717978664),
 ('deaths', -0.10207417820647408),
 ('coronavirus', -0.06698967143404387),
 ('60', 0.05479825610293619),
 ('buy', 0.037730810152627504),
 ('stocks', 0.030866492206280807),
 ('back', 0.02862044153289094),
 ('portfolio', 0.019228230730050148),
 ('virus', -0.01764508279082087),
 ('Reasons', 0.01762324495375404),
 ('markets', 0.017075460328202865),
 ('1500', 0.015832371859270025),
 ('recovered', 0.015536797813199194),
 ('2700', 0.015119529843808394),
 ('to', -0.002003116629506429),
 ('today', 0.0003091368069083442)]

In [12]:
# get unique words from all explanations
vocab_list = []
for subList in explanations:
    for el in subList:
        if el[0] not in vocab_list:
            vocab_list.append(el[0])
len(vocab_list)

1659

In [13]:
# unique words from explanations for correctly predicted unreliable tweets
vocab_correct = []
for subList in correctPred:
    for el in subList:
        if el[0] not in vocab_correct:
            vocab_correct.append(el[0])
len(vocab_correct)

1615

In [14]:
# unique words from explanations for incorrectly predicted unreliable tweets
vocab_wrong = []
for subList in wrongPred:
    for el in subList:
        if el[0] not in vocab_wrong:
            vocab_wrong.append(el[0])
len(vocab_wrong)

120

## Experiment 1: Table 1 words

In [15]:
# define dictionary
t1 = [
    'blame',
    'accuse',
    'refuse',
    'catastrophe',
    'chaos',
    'evil',
    'fight',
    'danger',
    'hysteria',
    'panic',
    'paranoia',
    'laugh',
    'stupidity',
    'hear',
    'see',
    'feel',
    'suppose',
    'perceive',
    'look',
    'appear',
    'suggest',
    'believe',
    'pretend',
    'martial',
    'kill',
    'die',
    'weapon',
    'weaponizing',
    'ussr',
    'japan',
    'fukushima',
    'chernobyl',
    'wuhan',
    'china',
    'foreigners',
    'cats',
    'dogs',
    'i',
    'me',
    'mine',
    'my',
    'you',
    'your',
    'we',
    'our',
    'propaganda',
    'fake',
    'conspiracy',
    'claim',
    'misleading',
    'hoax',
    'cure',
    'breakthrough',
    'bitch',
    'wtf',
    'dogbreath',
    'zombie',
    'junkies',
    'hell',
    'screwed',
    'secular',
    'bible',
    'maga',
    'magat',
    'genetic',
    'hillary',
    'chinese',
    'fundamentalist',
    'market',
    'communist',
    'nazi',
    'stock',
    'economy',
    'money',
    'cost',
    'costs',
    'election',
    'campaign',
    'presidential',
    'impeachment',
    'rallies',
    'base',
    'trump',
    'war',
    'iran'
]

### Disaggregated by correctness of prediction

#### Correctly predicted

In [16]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_correct if el.lower() in t1]

In [17]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [18]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(correctPred), len(vocab_correct)))

for i in range(len(correctPred)):
    expl = correctPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [19]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [20]:
totalNonZero

228

In [21]:
np.array(tweet_scores).mean()

0.5341130604288499

In [22]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [23]:
np.array(tweet_scores).mean()

0.3313840155945419

#### Incorrectly predicted

In [24]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_wrong if el.lower() in t1]

In [25]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [26]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(wrongPred), len(vocab_wrong)))

for i in range(len(wrongPred)):
    expl = wrongPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [27]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [28]:
np.array(tweet_scores).mean()

0.2777777777777778

In [29]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [30]:
np.array(tweet_scores).mean()

0.2222222222222222

### Aggregated

In [31]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_list if el.lower() in t1]

In [32]:
filtered_vocab

['you',
 'we',
 'Trump',
 'You',
 'your',
 'TRUMP',
 'look',
 'We',
 'My',
 'fight',
 'evil',
 'Iran',
 'chaos',
 'refuse',
 'me',
 'Misleading',
 'junkies',
 'Zombie',
 'market',
 'Market',
 'stock',
 'Wuhan',
 'China',
 'conspiracy',
 'panic',
 'Hell',
 'Stupidity',
 'Paranoia',
 'hoax',
 'weaponizing',
 'campaign',
 'my',
 'I',
 'our',
 'base',
 'rallies',
 'screwed',
 'weapon',
 'Rallies',
 'laugh',
 'cure',
 'DIE',
 'Hoax',
 'Chinese',
 'economy',
 'see',
 'pretend',
 'suggest',
 'cost',
 'Communist',
 'kill',
 'hysteria',
 'Fake',
 'war',
 'impeachment',
 'Cure',
 'Conspiracy',
 'FAKE',
 'fake',
 'STOCK',
 'MARKET',
 'ECONOMY',
 'money',
 'Japan',
 'Fukushima',
 'Chernobyl',
 'USSR',
 'KILL',
 'MY',
 'feel',
 'believe',
 'MAGA',
 'Propaganda']

In [33]:
len(filtered_vocab)

73

In [34]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [35]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(explanations), len(vocab_list)))

for i in range(len(explanations)):
    expl = explanations[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [36]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [37]:
totalNonZero

232

In [38]:
np.array(tweet_scores).mean()

0.5212962962962963

In [39]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [40]:
np.array(tweet_scores).mean()

0.3259259259259259

## Experiment 2:  Table 1 + manual additions

In [41]:
# define dictionary
t1PlusManual = [
    'blame',
    'accuse',
    'refuse',
    'catastrophe',
    'emergency',
    'chaos',
    'crisis',
    'evil',
    'fight',
    'danger',
    'hysteria',
    'panic',
    'paranoia',
    'fear',
    'fears',
    'laugh',
    'stupidity',
    'hear',
    'see',
    'feel',
    'suppose',
    'perceive',
    'look',
    'appear',
    'suggest',
    'believe',
    'believed',
    'pretend',
    'martial',
    'kill',
    'killing',
    'kills',
    'killed',
    'die',
    'death',
    'dies',
    'dying',
    'dead',
    'died',
    'threat',
    'weapon',
    'weaponize',
    'weaponizing',
    'knife',
    'ussr',
    'japan',
    'chernobyl',
    'wuhan',
    'china',
    'foreigners',
    'cat',
    'cats',
    'dog',
    'dogs',
    'i',
    'me',
    'mine',
    'my',
    'you',
    'yours',
    'your',
    'we',
    'our',
    'propaganda',
    'fake',
    'conspiracy',
    'claim',
    'claims',
    'claiming',
    'claimed',
    'misleading',
    'hoax',
    'cure',
    'breakthrough',
    'bitch',
    'wtf',
    'dogbreath',
    'zombie',
    'junkies',
    'hell',
    'screwed',
    'fuck',
    'fucking',
    'fucked',
    'fuckin',
    'wth',
    'secular',
    'bible',
    'maga',
    'magat',
    'genetic',
    'hillary',
    'clinton',
    'fundamentalist',
    'market',
    'communist',
    'nazi',
    'stock',
    'bank',
    'economy',
    'economic',
    'money',
    'cost',
    'costs',
    'election',
    'campaign',
    'presidential',
    'impeachment',
    'rally',
    'rallies',
    'base',
    'president',
    'trump',
    'war',
    'wwiii',
    'asteroid',
    'banknotes',
    'dangerous',
    'invent',
    'invented',
    'iran',
    'lie',
    'lies',
    'lying',
    'lied',
    'liar',
    'liars',
    'lmfao',
    'lmfaoooooo',
    'misinformation',
    'news',
    'media',
    'financial',
    'propagandawars',
    'antidote'
]

### Disaggregated by correctness of prediction

#### Correctly predicted

In [42]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_correct if el.lower() in t1PlusManual]

In [43]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [44]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(correctPred), len(vocab_correct)))

for i in range(len(correctPred)):
    expl = correctPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [45]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [46]:
totalNonZero

331

In [47]:
np.array(tweet_scores).mean()

0.5933723196881092

In [48]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [49]:
np.array(tweet_scores).mean()

0.35633528265107217

#### Incorrectly predicted

In [50]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_wrong if el.lower() in t1PlusManual]

In [51]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [52]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(wrongPred), len(vocab_wrong)))

for i in range(len(wrongPred)):
    expl = wrongPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [53]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [54]:
np.array(tweet_scores).mean()

0.31481481481481477

In [55]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [56]:
np.array(tweet_scores).mean()

0.07407407407407408

### Aggregated

In [57]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_list if el.lower() in t1PlusManual]

In [58]:
filtered_vocab

['you',
 'we',
 'Trump',
 'President',
 'You',
 'your',
 'TRUMP',
 'media',
 'look',
 'asteroid',
 'WWIII',
 'dying',
 'We',
 'BANKNOTES',
 'dangerous',
 'economic',
 'My',
 'believed',
 'knife',
 'fight',
 'EMERGENCY',
 'NEWS',
 'evil',
 'invented',
 'Iran',
 'chaos',
 'refuse',
 'lying',
 'me',
 'Misleading',
 'junkies',
 'Zombie',
 'market',
 'Market',
 'stock',
 'claims',
 'Wuhan',
 'China',
 'conspiracy',
 'killing',
 'lied',
 'LMFAOOOOOO',
 'threat',
 'crisis',
 'Bank',
 'panic',
 'Hell',
 'Stupidity',
 'Death',
 'died',
 'Claims',
 'Fears',
 'Paranoia',
 'hoax',
 'claiming',
 'weaponizing',
 'Fear',
 'campaign',
 'Kills',
 'death',
 'my',
 'fucked',
 'I',
 'our',
 'base',
 'rallies',
 'emergency',
 'screwed',
 'weapon',
 'Rallies',
 'Clinton',
 'DEAD',
 'laugh',
 'cure',
 'DIE',
 'lie',
 'Hoax',
 'lies',
 'economy',
 'dog',
 'see',
 'news',
 'pretend',
 'suggest',
 'misinformation',
 'cost',
 'Communist',
 'fuckin',
 'kill',
 'fear',
 'financial',
 'hysteria',
 'fucking',
 'kill

In [59]:
len(filtered_vocab)

125

In [60]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [61]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(explanations), len(vocab_list)))

for i in range(len(explanations)):
    expl = explanations[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [62]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [63]:
totalNonZero

341

In [64]:
np.array(tweet_scores).mean()

0.5794444444444445

In [65]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [66]:
np.array(tweet_scores).mean()

0.3422222222222222

## Experiment 3: Table 1 words + stemming

In [67]:
from nltk.stem.porter import *

In [68]:
stemmer = PorterStemmer()

In [69]:
# define dictionary
stems = [stemmer.stem(word) for word in t1PlusManual]
stems = list(set(stems))

In [70]:
stems

['die',
 'econom',
 'evil',
 'secular',
 'dog',
 'trump',
 'screw',
 'wuhan',
 'claim',
 'liar',
 'campaign',
 'stupid',
 'fundamentalist',
 'pretend',
 'stock',
 'money',
 'clinton',
 'japan',
 'fear',
 'blame',
 'wwiii',
 'we',
 'mislead',
 'invent',
 'economi',
 'junki',
 'breakthrough',
 'kill',
 'news',
 'appear',
 'believ',
 'hysteria',
 'martial',
 'panic',
 'fuckin',
 'hear',
 'media',
 'threat',
 'knife',
 'bank',
 'paranoia',
 'weapon',
 'propaganda',
 'wtf',
 'hell',
 'lie',
 'feel',
 'conspiraci',
 'emerg',
 'zombi',
 'cat',
 'maga',
 'ralli',
 'communist',
 'our',
 'fight',
 'danger',
 'fuck',
 'propagandawar',
 'magat',
 'lmfaoooooo',
 'misinform',
 'lmfao',
 'china',
 'base',
 'i',
 'wth',
 'hillari',
 'you',
 'look',
 'crisi',
 'laugh',
 'me',
 'ussr',
 'bibl',
 'iran',
 'cost',
 'catastroph',
 'suggest',
 'perceiv',
 'dead',
 'foreign',
 'fake',
 'mine',
 'my',
 'asteroid',
 'see',
 'dogbreath',
 'suppos',
 'bitch',
 'refus',
 'antidot',
 'hoax',
 'accus',
 'chernobyl'

### Disaggregated by correctness of prediction

#### Correctly predicted

In [71]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_correct if stemmer.stem(el).lower() in stems]

In [72]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [73]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(correctPred), len(vocab_correct)))

for i in range(len(correctPred)):
    expl = correctPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [74]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [75]:
totalNonZero

349

In [76]:
np.array(tweet_scores).mean()

0.618978000556948

In [77]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [78]:
np.array(tweet_scores).mean()

0.37830687830687837

#### Incorrectly predicted

In [79]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_wrong if stemmer.stem(el).lower() in stems]

In [80]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [81]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(wrongPred), len(vocab_wrong)))

for i in range(len(wrongPred)):
    expl = wrongPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [82]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [83]:
np.array(tweet_scores).mean()

0.40740740740740733

In [84]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [85]:
np.array(tweet_scores).mean()

0.14814814814814814

### Aggregated

In [86]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_list if stemmer.stem(el).lower() in stems]

In [87]:
filtered_vocab

['deaths',
 'stocks',
 'markets',
 'you',
 'we',
 'Trump',
 'President',
 'You',
 'your',
 'TRUMP',
 'media',
 'look',
 'asteroid',
 'WWIII',
 'dying',
 'We',
 'BANKNOTES',
 'dangerous',
 'suggests',
 'economic',
 'My',
 'refused',
 'believed',
 'knife',
 'fight',
 'EMERGENCY',
 'evil',
 'invented',
 'Iran',
 'chaos',
 'refuse',
 'lying',
 'me',
 'Misleading',
 'junkies',
 'Zombie',
 'market',
 'Market',
 'stock',
 'Fundamentalists',
 'claims',
 'Wuhan',
 'China',
 'conspiracy',
 'killing',
 'lied',
 'LMFAOOOOOO',
 'threat',
 'crisis',
 'Bank',
 'panic',
 'Hell',
 'Stupidity',
 'Death',
 'died',
 'Claims',
 'Impeach',
 'Fears',
 'Paranoia',
 'hoax',
 'claiming',
 'weaponizing',
 'Fear',
 'campaign',
 'Kills',
 'death',
 'my',
 'fucked',
 'I',
 'refuses',
 'our',
 'base',
 'rallies',
 'emergency',
 'screwed',
 'weapon',
 'Rallies',
 'Clinton',
 'DEAD',
 'laugh',
 'cure',
 'DIE',
 'lie',
 'Hoax',
 'lies',
 'economy',
 'dog',
 'see',
 'news',
 'pretend',
 'foreign',
 'suggest',
 'misinfor

In [88]:
len(filtered_vocab)

140

In [89]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [90]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(explanations), len(vocab_list)))

for i in range(len(explanations)):
    expl = explanations[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [91]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [92]:
totalNonZero

365

In [93]:
np.array(tweet_scores).mean()

0.6083994708994709

In [94]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [95]:
np.array(tweet_scores).mean()

0.3667989417989418

## Experiment 4: Table 1 words + slang dictionary

In [96]:
# define dictionary
slang = [
    
]

### Disaggregated by correctness of prediction

#### Correctly predicted

In [97]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_correct if el.lower() in slang]

In [98]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [99]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(correctPred), len(vocab_correct)))

for i in range(len(correctPred)):
    expl = correctPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [100]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [101]:
totalNonZero

0

In [102]:
np.array(tweet_scores).mean()

0.0

In [103]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [104]:
np.array(tweet_scores).mean()

0.0

#### Incorrectly predicted

In [105]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_wrong if el.lower() in slang]

In [106]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [107]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(wrongPred), len(vocab_wrong)))

for i in range(len(wrongPred)):
    expl = wrongPred[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [108]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [109]:
np.array(tweet_scores).mean()

0.0

In [110]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [111]:
np.array(tweet_scores).mean()

0.0

### Aggregated

In [112]:
# get filtered vocab based on analysis words
filtered_vocab = [el for el in vocab_list if el.lower() in slang]

In [113]:
# make dictionary from filtered vocab
myDict = dict()

for i in range(len(filtered_vocab)):
    myDict[filtered_vocab[i]] = i

In [114]:
# matrix of class associations of filtered vocab
my_matrix = np.zeros((len(explanations), len(vocab_list)))

for i in range(len(explanations)):
    expl = explanations[i]
    for j in range(len(expl)):
        word = expl[j][0]
        val = expl[j][1]
        
        if word in filtered_vocab:
            if val > 0:
                my_matrix[i, myDict[word]] = 1

            if val < 0:
                my_matrix[i, myDict[word]] = -1

In [115]:
# no penalty
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        if el > 0:
            rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [116]:
totalNonZero

0

In [117]:
np.array(tweet_scores).mean()

0.0

In [118]:
# penalty for wrong class association of word
tweet_scores = []
totalNonZero = 0
for row in my_matrix:
    rowSum = 0
    nonZero = 0
    for el in row:
        rowSum += el
        if el != 0:
            nonZero += 1
            totalNonZero += 1
    
    if nonZero > 0:
        score = rowSum/nonZero
    else:
        score = 0
    
    tweet_scores.append(score)

In [119]:
np.array(tweet_scores).mean()

0.0