## Computing final score by unifying output of all models with the possible labels limited to "positive", "negative", and "neutral"

In [13]:
import pickle
def save_final(model_name, data):
    with open(f'data/final_scores_{model_name}.pkl', 'wb') as f:
        pickle.dump(data, f)

def open_raw(model_name):
    with open(f'data/raw_scores_{model_name}.pkl', 'rb') as f:
        data = pickle.load(f)
    
    return data

## Defining proper transofrmation for each model

In [14]:
def do_leia(raw_scores):
    mp = {'Happiness': 1, 'Affection': 1, 'Sadness': 0, 'Fear': 0, 'Anger': 0}
    sentiments = ['negative', 'positive']

    result = []
    for item in raw_scores:
        max_label = max(item, key=lambda x: x['score'])['label']
        result.append(sentiments[mp[max_label]])
    return result

def do_pysentimiento_senti(raw_scores):
    mp_pysentimiento = {'POS': 'positive', 'NEU': 'neutral', 'NEG': 'negative'}
    
    result = []
    for item in raw_scores:
        result.append(mp_pysentimiento[item.output])
    return result

def do_pysentimiento_emo(raw_scores):
    mp = {'positive': ['joy'], 'negative': ['sadness', 'anger', 'disgust', 'fear'], 'neutral': ['surprise', 'others']}
    reverse = {}
    for x in mp:
        for y in mp[x]:
            reverse[y] = x
    result = []
    for item in raw_scores:     
        result.append(reverse[item.output])
   
    return result

def do_hartmann(raw_scores):
    mp = {'positive': ['joy'], 'negative': ['sadness', 'anger', 'disgust', 'fear'], 'neutral': ['surprise', 'neutral']}
    reverse = {}
    for x in mp:
        for y in mp[x]:
            reverse[y] = x

    result = []
    for item in raw_scores:
        max_label = max(item, key=lambda x: x['score'])['label']
        result.append(reverse[max_label])
    
    return result

def do_cardif(raw_scores):
    result = []
    for item in raw_scores:
        max_label = max(item, key=lambda x: x['score'])['label']
        result.append(max_label)
    return result

def do_siebert(raw_scores):
    result = []
    for item in raw_scores:
        max_label = max(item, key=lambda x: x['score'])['label']
        result.append(max_label.lower())
    return result

def do_vader(raw_scores):
    result = []
    for item in raw_scores:
        if item['compound'] >= 0.05:
            result.append('positive')
        elif item['compound'] <= -0.05:
            result.append('negative')
        else:
            result.append('neutral')
    return result

def do_nrc(raw_scores):
    label_map = {'fear':'negative','anger':'negative','anticipation':'neutral',
            'anticip':'neutral','trust':'positive','surprise':'neutral','positive':'positive','negative':'negative',
            'sadness':'negative','disgust':'negative','joy':'positive'}

    result = []
    for item in raw_scores:
        mx = item[0]
        all_items = [label_map[i[0]] for i in item if i[1] == mx[1]]
        pos = all_items.count('positive')
        neg = all_items.count('negative')
        if pos > neg:
            result.append('positive')
        elif neg > pos:
            result.append('negative')
        else:
            result.append('neutral')
    return result

In [15]:
models = [
    ('nrc', do_nrc),
    ('vader', do_vader),
    ('pysentimiento_senti',do_pysentimiento_senti),
    ('pysentimiento_emo',do_pysentimiento_emo),
    ('hartmann',do_hartmann),
    ('cardif',do_cardif),
    ('siebert',do_siebert),
    ('leia', do_leia),
    ]

In [16]:
for model_name, model_function in models:
    print(model_name)
    raw_scores = open_raw(model_name)

    func_name = 'do_' + model_name

    result = model_function(list(raw_scores.values()))

    final_scores = {}
    for key, score in zip(raw_scores.keys(), result):
        final_scores[key] = score

    save_final(model_name, final_scores)

nrc
vader
pysentimiento_senti
pysentimiento_emo
hartmann
cardif
siebert
leia
