# Análise dos resultados da avaliação de descrições de alinhamentos
---
São Carlos, 01/12/2019

In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from app.eval_module.desc_models import DescEval

def print_metrics(df):
    """

    :param df: pandas data-frame 
    :return: 
    """
    total = df.values.sum()
    print(f"Total:\t\t\t{df.values.sum()}")
    print(f"Valid:\t\t\t{df.loc[:, 'Correct':'Incorrect'].values.sum()} |\t{df.loc[:, 'Correct':'Incorrect'].values.sum()*100./total:.2f} %")
    print(f"Correct:\t\t{df.loc[:, 'Correct'].values.sum()} |\t{df.loc[:, 'Correct'].values.sum() * 100./total:.2f} %")
    print(f"Partially correct:\t{df.loc[:, 'Partially correct'].values.sum()} |\t{df.loc[:, 'Partially correct'].values.sum() * 100./total:.2f} %")
    print(f"Incorrect:\t\t{df.loc[:, 'Incorrect'].values.sum()} |\t{df.loc[:, 'Incorrect'].values.sum() * 100./total:.2f} %")
    print(f"Invalid:\t\t{df.loc[:, 'Invalid'].values.sum()} |\t{df.loc[:, 'Invalid'].values.sum() * 100./total:.2f} %")
    print()
    print('Valid descriptions != alignment')
    df1: pd.DataFrame = df.loc[:, 'Correct':'Incorrect']
    # df1.loc[:, 'Correct'].sum()
    total = df1.values.sum() - df1.loc['Equal'].sum()
    correct = df1.loc[:, 'Correct'].sum() - df1.loc['Equal', 'Correct'].sum()
    p_correct = df1.loc[:, 'Partially correct'].sum() - df1.loc['Equal', 'Partially correct'].sum()
    incorrect = df1.loc[:, 'Incorrect'].sum() - df1.loc['Equal', 'Incorrect'].sum()
    print(f"{correct}\tValid NE. Correct:\t\t{correct * 100./total:.2f} %")
    print(f"{p_correct}\tValid NE. Partially Correct:\t{p_correct * 100./total:.2f} %")
    print(f"{incorrect}\tValid NE. Incorrect:\t\t{incorrect * 100./total:.2f} %")
    print()
    print('Valid descriptions != alignment')
    df2 = df
    total = df2.values.sum()
    better = df2.loc['Better'].sum()
    equal = df2.loc['Equal'].sum()
    worse = df2.loc['Worse'].sum()
    print(f"{better}\tBetter:\t{better * 100./total:.2f} %")
    print(f"{equal}\tEqual:\t{equal * 100./total:.2f} %")
    print(f"{worse}\tWorse:\t{worse * 100./total:.2f} %")
    print()
    print(df.to_latex())

## 1. Análise geral

In [38]:
possible_approval = [2, 1, 0, -1]
possible_comparing = [2, 1, 0]

df = pd.DataFrame([
    [
        len([de for de in DescEval.query.filter_by(approval=pa, compare_baseline=pc)]) for pa in possible_approval
    ] for pc in possible_comparing
], columns=[DescEval.APPROVAL_TABLE[pa] for pa in possible_approval],
index=[DescEval.COMPARE_BASELINE_TABLE[pc] for pc in possible_comparing])

print_metrics(df)
df

Total:			596
Valid:			318 |	53.36 %
Correct:		78 |	13.09 %
Partially correct:	65 |	10.91 %
Incorrect:		175 |	29.36 %
Invalid:		278 |	46.64 %

Valid descriptions != alignment
29	Valid NE. Correct:		10.90 %
62	Valid NE. Partially Correct:	23.31 %
175	Valid NE. Incorrect:		65.79 %

Valid descriptions != alignment
190	Better:	31.88 %
106	Equal:	17.79 %
300	Worse:	50.34 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &       29 &                 62 &          0 &       99 \\
Equal  &       49 &                  3 &          0 &       54 \\
Worse  &        0 &                  0 &        175 &      125 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,29,62,0,99
Equal,49,3,0,54
Worse,0,0,175,125


## 2. Análise por modelo

In [39]:
from app.desc_module.models import Description
df_modelos = {}

for key, value in Description.METHOD_TABLE.items():
    df_modelos[value] = pd.DataFrame([
        [
            len([de for de in DescEval.query.filter_by(approval=pa, compare_baseline=pc) if de.get_method() == value]) for pa in possible_approval
        ] for pc in possible_comparing
    ], columns=[DescEval.APPROVAL_TABLE[pa] for pa in possible_approval],
    index=[DescEval.COMPARE_BASELINE_TABLE[pc] for pc in possible_comparing])

### 2.1. Model1

In [40]:
print_metrics(df_modelos['Model1'])
df_modelos['Model1']

Total:			98
Valid:			54 |	55.10 %
Correct:		7 |	7.14 %
Partially correct:	13 |	13.27 %
Incorrect:		34 |	34.69 %
Invalid:		44 |	44.90 %

Valid descriptions != alignment
7	Valid NE. Correct:		13.21 %
12	Valid NE. Partially Correct:	22.64 %
34	Valid NE. Incorrect:		64.15 %

Valid descriptions != alignment
33	Better:	33.67 %
4	Equal:	4.08 %
61	Worse:	62.24 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &        7 &                 12 &          0 &       14 \\
Equal  &        0 &                  1 &          0 &        3 \\
Worse  &        0 &                  0 &         34 &       27 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,7,12,0,14
Equal,0,1,0,3
Worse,0,0,34,27


### 2.2. Model2

In [41]:
print_metrics(df_modelos['Model2'])
df_modelos['Model2']

Total:			98
Valid:			52 |	53.06 %
Correct:		23 |	23.47 %
Partially correct:	8 |	8.16 %
Incorrect:		21 |	21.43 %
Invalid:		46 |	46.94 %

Valid descriptions != alignment
6	Valid NE. Correct:		17.14 %
8	Valid NE. Partially Correct:	22.86 %
21	Valid NE. Incorrect:		60.00 %

Valid descriptions != alignment
34	Better:	34.69 %
32	Equal:	32.65 %
32	Worse:	32.65 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &        6 &                  8 &          0 &       20 \\
Equal  &       17 &                  0 &          0 &       15 \\
Worse  &        0 &                  0 &         21 &       11 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,6,8,0,20
Equal,17,0,0,15
Worse,0,0,21,11


### 2.3. Model3

In [42]:
print_metrics(df_modelos['Model3'])
df_modelos['Model3']

Total:			100
Valid:			53 |	53.00 %
Correct:		4 |	4.00 %
Partially correct:	12 |	12.00 %
Incorrect:		37 |	37.00 %
Invalid:		47 |	47.00 %

Valid descriptions != alignment
4	Valid NE. Correct:		7.69 %
11	Valid NE. Partially Correct:	21.15 %
37	Valid NE. Incorrect:		71.15 %

Valid descriptions != alignment
30	Better:	30.00 %
4	Equal:	4.00 %
66	Worse:	66.00 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &        4 &                 11 &          0 &       15 \\
Equal  &        0 &                  1 &          0 &        3 \\
Worse  &        0 &                  0 &         37 &       29 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,4,11,0,15
Equal,0,1,0,3
Worse,0,0,37,29


### 2.4. Model4

In [43]:
print_metrics(df_modelos['Model4'])
df_modelos['Model4']

Total:			100
Valid:			53 |	53.00 %
Correct:		20 |	20.00 %
Partially correct:	10 |	10.00 %
Incorrect:		23 |	23.00 %
Invalid:		47 |	47.00 %

Valid descriptions != alignment
4	Valid NE. Correct:		10.81 %
10	Valid NE. Partially Correct:	27.03 %
23	Valid NE. Incorrect:		62.16 %

Valid descriptions != alignment
32	Better:	32.00 %
31	Equal:	31.00 %
37	Worse:	37.00 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &        4 &                 10 &          0 &       18 \\
Equal  &       16 &                  0 &          0 &       15 \\
Worse  &        0 &                  0 &         23 &       14 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,4,10,0,18
Equal,16,0,0,15
Worse,0,0,23,14


### 2.5. Model5

In [44]:
print_metrics(df_modelos['Model5'])
df_modelos['Model5']

Total:			100
Valid:			53 |	53.00 %
Correct:		4 |	4.00 %
Partially correct:	12 |	12.00 %
Incorrect:		37 |	37.00 %
Invalid:		47 |	47.00 %

Valid descriptions != alignment
4	Valid NE. Correct:		7.69 %
11	Valid NE. Partially Correct:	21.15 %
37	Valid NE. Incorrect:		71.15 %

Valid descriptions != alignment
30	Better:	30.00 %
4	Equal:	4.00 %
66	Worse:	66.00 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &        4 &                 11 &          0 &       15 \\
Equal  &        0 &                  1 &          0 &        3 \\
Worse  &        0 &                  0 &         37 &       29 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,4,11,0,15
Equal,0,1,0,3
Worse,0,0,37,29


### 2.6. Model6

In [45]:
print_metrics(df_modelos['Model6'])
df_modelos['Model6']

Total:			100
Valid:			53 |	53.00 %
Correct:		20 |	20.00 %
Partially correct:	10 |	10.00 %
Incorrect:		23 |	23.00 %
Invalid:		47 |	47.00 %

Valid descriptions != alignment
4	Valid NE. Correct:		10.81 %
10	Valid NE. Partially Correct:	27.03 %
23	Valid NE. Incorrect:		62.16 %

Valid descriptions != alignment
31	Better:	31.00 %
31	Equal:	31.00 %
38	Worse:	38.00 %

\begin{tabular}{lrrrr}
\toprule
{} &  Correct &  Partially correct &  Incorrect &  Invalid \\
\midrule
Better &        4 &                 10 &          0 &       17 \\
Equal  &       16 &                  0 &          0 &       15 \\
Worse  &        0 &                  0 &         23 &       15 \\
\bottomrule
\end{tabular}



Unnamed: 0,Correct,Partially correct,Incorrect,Invalid
Better,4,10,0,17
Equal,16,0,0,15
Worse,0,0,23,15
