In [21]:
def create_gold_combinations(classes, sentiments):
    r = []
    for c in classes:
        for s in sentiments:
            r.append((c, s))
            
    return r
create_gold_combinations(['a', 'b'], ['n/a', 'neg', 'pos', 'neu'])

[('a', 'n/a'),
 ('a', 'neg'),
 ('a', 'pos'),
 ('a', 'neu'),
 ('b', 'n/a'),
 ('b', 'neg'),
 ('b', 'pos'),
 ('b', 'neu')]

In [22]:
# 'def create_eval_entries_from_gold_and_predictions(golds, predictions):
#     eval_entries = []
#     for gold_doc, pred_doc in zip(golds, predictions):
#         # only works for the small example where only sentiment is misclassified
#         for (g_cls, g_sent), (p_cls, p_sent) in zip(gold_doc, pred_doc):
#             if g_cls
#         eval_entries.extend(zip(gold_doc, pred_doc))
#     return eval_entries

# golds = [
#     [ # Doc 1
#         ('a', 'neg'),
#     ], 
#     [ # Doc 2
#         ('a', 'pos'),
#         ('b', 'pos')
#     ]
# ]

# predictions = [
#     [ # Doc 1
#         ('a', 'neg'),
#     ], 
#     [ # Doc 2
#         ('a', 'pos'),
#         ('b', 'negative')
#     ]
# ]

# eval_entries = create_eval_entries_from_gold_and_predictions(golds, predictions)

# eval_entries'
eval_entries = [
    (('a', 'neu'),('a', 'neu')),
    (('a', 'neu'), ('a', 'neu')),
    (('b', 'neu'), ('b', 'n/a')),
    (('b', 'n/a'), ('b', 'pos'))
]

In [23]:
def get_metrics(classes, sentiments, eval_entries):
    combinations = create_gold_combinations(classes, sentiments)
    
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    
    for cls, sent in combinations:
        for (g_cls, g_sent), (p_cls, p_sent) in eval_entries:
            if g_cls == cls and g_sent == sent:
                if g_cls == p_cls and g_sent == p_sent:
                    tp += 1
                else:
                    fn += 1
            else:
                if p_cls == cls and p_sent == sent:
                    fp += 1
                else:
                    tn += 1
                    
    return (tp, fp, fn, tn)

get_metrics(['a', 'b'], ['n/a', 'neg', 'pos', 'neu'], eval_entries)

(2, 2, 2, 26)

In [53]:
def calculate_micro_f1(classes, sentiments, eval_entries):
    tp, fp, fn, _ = get_metrics(classes, sentiments, eval_entries)
    if tp == 0 or (tp + fp) == 0 or (tp + fn) == 0:
        return 0.0
    micro_precission = tp / (tp + fp)
    micro_recall = tp / (tp + fn)
    
    micro_f1 = 2.0 * micro_precission * micro_recall / (micro_precission + micro_recall)
    return micro_f1

calculate_micro_f1(['a', 'b'], ['n/a', 'neg', 'pos', 'neu'], eval_entries)

0.0

## Adaption to tensors

In [25]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix


In [51]:
prediction = torch.LongTensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])


target = torch.LongTensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

# per aspect
target = torch.t(target)
prediction = torch.t(prediction)
print('PREDICTION:\n' + str(prediction))
print('\n\nTARGET:\n' + str(target))

PREDICTION:
tensor([[0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0]])


TARGET:
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0,

In [27]:
print('\t\t Samples')
for i, aspect in enumerate(prediction):
    print(f'Aspect {i} -\t {aspect}')

		 Samples
Aspect 0 -	 tensor([0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0])
Aspect 1 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 2 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 3 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 4 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 5 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 6 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 7 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 8 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 9 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 10 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 11 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 12 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 13 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 14 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 15 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Aspect 16 -	 tensor([0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0])
Aspect 17 -	 

In [28]:
def get_tensor_eval_entries(prediction, target):
    eval_entries = []
    # (('a', 'neu'),('a', 'neu')),
    for aspect_index, (prediction_aspect, target_aspect) in enumerate(zip(prediction, target)):
        #print(f'\n{aspect_index} Target:\t{target_aspect}\n{aspect_index} Prediction\t{prediction_aspect}')

        for y_hat, y in zip(prediction_aspect, target_aspect):
                # y is applicable
                if y != y_hat and y > 0:
                    eval_entries.append(((aspect_index, y), (aspect_index, 0)))

                    if y_hat > 0:
                        eval_entries.append(((aspect_index, 0), (aspect_index, y_hat)))

                elif y == y_hat and y > 0:
                    eval_entries.append(((aspect_index, y), (aspect_index, y_hat)))
                elif y_hat > 0:
                        #print(f'else Pred {y_hat} - Y {y}')
                        eval_entries.append(((aspect_index, 0), (aspect_index, y_hat)))


    return eval_entries
eval_entries = get_tensor_eval_entries(prediction, target)
eval_entries

[((0, 0), (0, tensor(1))),
 ((0, 0), (0, tensor(1))),
 ((0, 0), (0, tensor(1))),
 ((0, 0), (0, tensor(1))),
 ((0, 0), (0, tensor(1))),
 ((16, tensor(2)), (16, 0)),
 ((16, tensor(2)), (16, tensor(2))),
 ((17, tensor(1)), (17, 0)),
 ((19, 0), (19, tensor(2)))]

In [29]:
get_metrics(range(19), [0, 1, 2, 3], eval_entries)

(1, 7, 7, 669)

In [30]:
calculate_micro_f1(range(19), [0, 1, 2, 3], eval_entries)

0.125

## Sanity check with example from above

## Two samples, Two apsects

#### GOLD
```xml
<Documents>
    <Document id="1">
        <Opinions>
            <Opinion category="Allgemein" polarity="neutral"/>
        </Opinions>
        <text>COMMENT</text>
    </Document>
    <Document id="2">
        <Opinions>
            <Opinion category="Allgemein" polarity="neutral"/>
			<Opinion category="Ticketkauf" polarity="neutral"/>
        </Opinions>
        <text>COMMENT</text>
    </Document>
</Documents>
```

#### Prediction
```xml
<Documents>
    <Document id="1">
        <Opinions>
            <Opinion category="Allgemein" polarity="neutral"/>
        </Opinions>
        <text>COMMENT</text>
    </Document>
    <Document id="2">
        <Opinions>
            <Opinion category="Allgemein" polarity="neutral"/>
			<Opinion category="Ticketkauf" polarity="positive"/>
        </Opinions>
        <text>COMMENT</text>
    </Document>
</Documents>
```

In [31]:
target = torch.tensor(
       [[1., 1.], # aspect a
        [0., 1.]  # aspect b
       ])

prediction = torch.tensor(
       [[1., 1.], # aspect a
        [0., 2.]  # aspect b
       ])


# per aspect
print('PREDICTION:\n' + str(prediction))
print('\n\nTARGET:\n' + str(target))

PREDICTION:
tensor([[1., 1.],
        [0., 2.]])


TARGET:
tensor([[1., 1.],
        [0., 1.]])


In [32]:
eval_entries = get_tensor_eval_entries(prediction, target)
eval_entries

[((0, tensor(1.)), (0, tensor(1.))),
 ((0, tensor(1.)), (0, tensor(1.))),
 ((1, tensor(1.)), (1, 0)),
 ((1, 0), (1, tensor(2.)))]

In [33]:
get_metrics(range(2), [0, 1, 2, 3], eval_entries)

(2, 2, 2, 26)

In [34]:
calculate_micro_f1(range(19), [0, 1, 2, 3], eval_entries)

0.5

In [35]:
list(range(4))

[0, 1, 2, 3]

#### Only affecting one aspect at a time

In [36]:
target = torch.tensor(
       [[1., 1.], # aspect a
       ])

prediction = torch.tensor(
       [[1., 1.], # aspect a
       ])
eval_entries = get_tensor_eval_entries(prediction, target)


In [37]:
get_metrics(range(19), [0, 1, 2, 3], eval_entries)

(2, 0, 0, 150)

In [38]:
target = torch.tensor([[0., 1.]])

prediction = torch.tensor([[0., 2.]])
eval_entries = get_tensor_eval_entries(prediction, target)


In [39]:
get_metrics(range(19), [0, 1, 2, 3], eval_entries)

(0, 2, 2, 148)

# Case 2

In [40]:
prediction = torch.Tensor([[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2],
        [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])


target = torch.Tensor([[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

# per aspect
target = torch.t(target)
prediction = torch.t(prediction)
print('PREDICTION:\n' + str(prediction))
print('\n\nTARGET:\n' + str(target))

PREDICTION:
tensor([[0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0.],
  

In [41]:
eval_entries = get_tensor_eval_entries(prediction, target)
eval_entries

[((0, 0), (0, tensor(1.))),
 ((0, 0), (0, tensor(1.))),
 ((0, 0), (0, tensor(1.))),
 ((0, 0), (0, tensor(1.))),
 ((0, 0), (0, tensor(1.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(2.)), (1, tensor(2.))),
 ((1, tensor(1.)), (1, 0)),
 ((1, 0), (1, tensor(2.))),
 ((1, tensor(1.)), (1, 0)),
 ((1, 0), (1, tensor(2.))),
 ((1, tensor(1.)), (1, 0)),
 ((1, 0), (1, tensor(2.))),
 ((1, 0), (1, tensor(2.))),
 ((1, 0), (1, tensor(2.))),
 ((16, tensor(2.)), (16, 0)),
 ((16, tensor(2.)), (16, tensor(2.))),
 ((17, tensor(1.)), (17, 0)),
 ((19, 0), (19, tensor(2.)))]

In [42]:
get_metrics(range(20), [0, 1, 2, 3], eval_entries)

(8, 16, 16, 1880)

In [43]:
calculate_micro_f1(range(20), [0, 1, 2, 3], eval_entries)

0.3333333333333333

In [44]:
import pickle

In [45]:
with open('all_predictions.pkl', 'rb') as f:
    all_predictions = pickle.load(f)
    
all_predictions

FileNotFoundError: [Errno 2] No such file or directory: 'all_predictions.pkl'

In [46]:
with open('all_targets.pkl', 'rb') as f:
    all_targets = pickle.load(f)
    
all_targets

FileNotFoundError: [Errno 2] No such file or directory: 'all_targets.pkl'

In [47]:
predictions = None
targets = None
for preds, y in zip(all_predictions, all_targets):
    if predictions is None:
        predictions = preds
        targets = y
    else:
        predictions = torch.cat((predictions, preds))
        targets = torch.cat((targets, y))

predictions = torch.t(predictions)
targets = torch.t(targets)


predictions.shape

NameError: name 'all_predictions' is not defined

In [48]:
eval_entries = get_tensor_eval_entries(predictions, targets)
len(eval_entries)

TypeError: zip argument #1 must support iteration

In [49]:
get_metrics(range(20), [0, 1, 2, 3], eval_entries)

(8, 16, 16, 1880)

# Random

In [55]:
mean_score = 0.0
for i in range(1000):
    prediction = torch.randint(0, 4, (12, 20))
    prediction = torch.t(prediction)
    target = torch.randint(0, 4, (12, 20))
    target = torch.t(target)
    eval_entries = get_tensor_eval_entries(prediction, target)
    mean_score += calculate_micro_f1(range(20), [0, 1, 2, 3], eval_entries)
print(f'Mean micro f1: {mean_score/1000}')



Mean micro f1: 0.14301008196206047
