# 3.4.4 Analyse der Testkampagne
In diesem Notebook werden die Ergebnisse der Toloka Testkampagne bearbeitet.

---

### Package- und Datenimport

In [19]:
import pandas as pd
import numpy as np
from irrCAC.raw import CAC

In [7]:
toloka_res = pd.read_csv('data/results_test_campaign_toloka.tsv', sep="\t")

---

### Vorverarbeitung der Toloka-Ergebnisse
Dieser Abschnitt beinhaltet die vorverarbeitung der Toloka Daten.


In [8]:
def one_hot_encode(df, field):
    one_hot = pd.get_dummies(df[field])
    # Drop original column as it is now encoded
    df = df.drop(field,axis = 1)
    # Join the encoded df
    return df.join(one_hot) 

In [9]:
def preprocess_toloka_results(df):
    # drop unnecessary columns, convert values to int
    df = df.drop(columns=['ASSIGNMENT:submitted', 'ASSIGNMENT:started','ASSIGNMENT:link','HINT:text','GOLDEN:senti_score','ASSIGNMENT:task_id','ASSIGNMENT:assignment_id','ASSIGNMENT:worker_id','ACCEPT:verdict','ACCEPT:comment','HINT:default_language'])
    df = df.dropna(subset=['OUTPUT:senti_score'])
    df['OUTPUT:senti_score'] = df['OUTPUT:senti_score'].astype(int)
    # delete score if result was rejected
    df['OUTPUT:senti_score'] = df.apply(lambda x: np.nan if x['ASSIGNMENT:status'] == 'REJECTED' else x['OUTPUT:senti_score'], axis=1)
    # one hot encode scores and rejection status
    df = one_hot_encode(df, 'OUTPUT:senti_score')
    df = one_hot_encode(df, 'ASSIGNMENT:status')
    # group results by sentence
    grp = df.groupby(by=['INPUT:sentence'], as_index=True).sum()
    return grp

In [10]:
grp = preprocess_toloka_results(toloka_res)

---

### Überprüfung valider Ergebnisse

In diesem Abschnitt wird ermittelt, wie viele Sätze übereinstimmende Antworten für jede Klasse erhalten haben.

In [15]:
# negatively labeled sentences with a majority > 2
neg = grp[grp[-1.0] > 1]
len(neg)

67

In [16]:
# positively  labeled sentences with a majority > 2
pos = grp[grp[1.0] > 1]
len(pos)

134

In [17]:
# neutrally labeled sentences with a majority > 2
neu = grp[grp[0.0] > 1]
len(neu)

115

In [18]:
accepted = pd.concat([neg, pos, neu])
len(accepted) / len(toloka_res)

0.09572856710087853

In [28]:
def create_evaluation_df(df):
    ratings_list = []
    for index, row in df.iterrows():
        res_list = []
        if row[-1.0] > 0:
            for count_neg in np.arange(0,row[-1.0]):
                res_list.append(1)
        if row[0.0] > 0:
            for count_neu in np.arange(0,row[0.0]):
                res_list.append(0)
        if row[1.0] > 0:
            for count_pos in np.arange(0,row[1.0]):
                res_list.append(-1)
        if row['REJECTED'] > 0:
            for count_rej in np.arange(0,row['REJECTED']):
                res_list.append(np.nan)
        if (len(res_list) == 3):
            ratings_list.append(res_list)
    return pd.DataFrame(ratings_list, columns=['1','2','3'])

In [29]:
# calculation of AC2 for original results
cac_obj = CAC(create_evaluation_df(grp),weights='ordinal')
cac_obj.gwet()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.ratings.replace(to_replace="", value=np.nan, inplace=True)


{'est': {'coefficient_value': 0.22242,
  'coefficient_name': 'AC2',
  'confidence_interval': (0.15549, 0.28934),
  'p_value': 1.1445444592084186e-10,
  'z': 6.52222,
  'se': 0.0341,
  'pa': 0.70321,
  'pe': 0.61832},
 'weights': array([[1.        , 0.66666667, 0.        ],
        [0.66666667, 1.        , 0.66666667],
        [0.        , 0.66666667, 1.        ]]),
 'categories': [-1.0, 0.0, 1.0]}

In [30]:
# calculation of AC2 for consolidated results
cac_obj = CAC(create_evaluation_df(accepted),weights='ordinal')
cac_obj.gwet()

{'est': {'coefficient_value': 0.71537,
  'coefficient_name': 'AC2',
  'confidence_interval': (0.6627, 0.76804),
  'p_value': 0.0,
  'z': 26.72306,
  'se': 0.02677,
  'pa': 0.88924,
  'pe': 0.61087},
 'weights': array([[1.        , 0.66666667, 0.        ],
        [0.66666667, 1.        , 0.66666667],
        [0.        , 0.66666667, 1.        ]]),
 'categories': [-1.0, 0.0, 1.0]}