# Replication Experiment

The following code was used to conduct the replication experiment as reported in the following paper:

> C. van Son, R. Morante, L. Aroyo, and P. Vossen (2018). Scoring and Classifying Implicit Positive Interpretations: A Challenge of Class Imbalance. In *Proceedings of the 27th International Conference on Computational Linguistics (COLING 2018)*, Santa Fe, New Mexico.

The work that is being replicated is the following:
> E. Blanco and Z. Sarabi (2016). Automatic generation and scoring of positive interpretations from negated statements. In *Proceedings of NAACL-HLT*, pages 1431–1441.





In [2]:
import csv
import os
from itertools import combinations
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.svm import SVR, SVC
from scipy.stats import pearsonr
from scipy.stats.mstats import spearmanr
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, mean_squared_error, classification_report
from sklearn.metrics import precision_recall_fscore_support as score
from math import sqrt
import utils.experiment as utils

In [3]:
# Create output directories
figures_tables = "../results/figures_and_tables"
results_regr = "../results/regression_5_scores"
results_tertiary = "../results/classification_3_classes"
results_binary = "../results/classification_2_classes"

for outdir in [results_regr, results_tertiary, results_binary, figures_tables]:
    if not os.path.exists(outdir):
        os.makedirs(outdir)

## 1. Read data

First, we read the two datasets necessary for this experiment:

1. The dataset with positive interpretations 
2. The CoNLL-2011 dataset with gold annotations

In [None]:
ontonotes = "../data/conll2011-gold/all_gold_conll"
pos_interpretations = "../data/all.tsv"

In [None]:
# Positive interpretations dataset
df = pd.read_csv(pos_interpretations, sep="\t")
df_train = df.loc[df["dataset"] == "train"]
df_test = df.loc[df["dataset"] == "test"]
df.head()

In [None]:
# OntoNotes dataset (CoNLL-2011)
df_onto_all = pd.read_csv(ontonotes, sep='\t')
df_onto_all.head()

## 2. Extract features

In [None]:
for index, row in tqdm(df.iterrows(), total=len(df)):
    
    features = dict()
    df_onto = df_onto_all.loc[df_onto_all["file_id"] == row["file_id"]]
    
    # Get basic info: sentence, predicate, role, semantic roles
    sent_id_file = row["sent_id_file"]
    sent_df = df_onto.loc[df_onto["sent_id_file"] == sent_id_file]
    sent_tokens = list(sent_df.word_form)
    pred_span = tuple(map(int, row["verb_span"].split('-')))
    pred_start, pred_end = pred_span
    role_span = tuple(map(int, row["role_span"].split('-')))
    role_start, role_end = role_span
    pred_df = sent_df.loc[(sent_df["token_id"] >= pred_start) & (sent_df["token_id"] <= pred_end)]
    semrole_col = [col for col in pred_df.columns if "(V*" in str(pred_df[col])][0]
    ptree = utils.get_ptree(sent_df)
    all_semroles = utils.get_semroles(sent_df, semrole_col)
    
    # Get sentence features
    semrole_feat, semrole_tokens = utils.get_semrole_features(all_semroles, role_span)
    semrole_synt_feat = utils.get_syntactic_features_semrole(ptree, role_start)
    verbarg_struct_feat = utils.get_verbarg_struct_features(all_semroles)
    predicate_feat = utils.get_predicate_features(pred_df)
    verb_semrole_feat = utils.get_verb_semrole_features(ptree, role_start, pred_start)
     
    # Add each feature set to all features and update the dataframe
    feature_dictionaries = [predicate_feat, semrole_feat, semrole_synt_feat, verb_semrole_feat, verbarg_struct_feat]    
    for feat_dict in feature_dictionaries:
        features.update(feat_dict)
    for name, value in features.items():
        df.set_value(index,name,value)
    
role_present_cols = [col for col in df.columns if col.endswith("_present")]
df[role_present_cols] = df[role_present_cols].fillna(value=0)

In [None]:
print(list(df.columns))
df.head()

In [None]:
df_train = df.loc[df["dataset"] == "train"]
df_test = df.loc[df["dataset"] == "test"]
df.to_csv("../results/all_features.tsv", sep="\t", index=False)
df_train.to_csv("../results/train_features.tsv", sep="\t", index=False)
df_test.to_csv("../results/test_features.tsv", sep="\t", index=False)

## 3. Loading and encoding features

In [4]:
df = pd.read_csv("../results/all_features.tsv", sep="\t")
df_train = df.loc[df["dataset"] == "train"]
df_test = df.loc[df["dataset"] == "test"]

In [5]:
feature_categories = ("predicate_", "semrole_", "verb-semrole_", "verbarg-struct_")
continuous_prefixes = ("semrole_num_tokens")
df = pd.read_csv("../results/all_features.tsv", sep="\t")
df_enc = utils.encode_features(df, feature_categories, continuous_prefixes)
df_train = df_enc.loc[df["dataset"] == "train"]
df_test = df_enc.loc[df["dataset"] == "test"]

In [6]:
df_enc.head()

Unnamed: 0,file_id,sent_id_file,part_id,sent_id_part,predicate,negation,positive_interpretation,verb_wf,verb_pos,verb_span,...,verbarg-struct_ARG3_head_pos#NN,verbarg-struct_ARG3_head_pos#PRP,verbarg-struct_ARGM-PRD_present#0.0,verbarg-struct_ARGM-PRD_present#1.0,verbarg-struct_ARGM-PRD_head_wf#business,verbarg-struct_ARGM-PRD_head_wf#indicted,verbarg-struct_ARGM-PRD_head_wf#remote,verbarg-struct_ARGM-PRD_head_pos#JJ,verbarg-struct_ARGM-PRD_head_pos#NN,verbarg-struct_ARGM-PRD_head_pos#VBN
0,bc/phoenix/00/phoenix_0002,208,5,27,represent.01,"I really wrote that I represented the dead, I ...","I really wrote that I represented the dead, I ...",represent,VB,12-12,...,0,0,1,0,0,0,0,0,0,0
1,bc/phoenix/00/phoenix_0002,208,5,27,represent.01,"I really wrote that I represented the dead, I ...","I really wrote that I represented the dead, {s...",represent,VB,12-12,...,0,0,1,0,0,0,0,0,0,0
2,bc/phoenix/00/phoenix_0002,208,5,27,represent.01,"I really wrote that I represented the dead, I ...","I really wrote that I represented the dead, I ...",represent,VB,12-12,...,0,0,1,0,0,0,0,0,0,0
3,mz/sinorama/10/ectb_1060,77,4,7,forget.01,Chang Kwang-chih taught for decades at Harvard...,Chang Kwang-chih taught for decades at Harvard...,forgot,VBD,11-11,...,0,0,1,0,0,0,0,0,0,0
4,mz/sinorama/10/ectb_1060,77,4,7,forget.01,Chang Kwang-chih taught for decades at Harvard...,{someone / some people / something} taught for...,forgot,VBD,11-11,...,0,0,1,0,0,0,0,0,0,0


## 4. Prediction & Evaluation

In [7]:
print(len(df_train), len(df_test))

1510 378


### 4.1 Regression Task: score from 0-5

In [None]:
model = SVR()
results = list()
feature_categories = ("predicate_", "semrole_", "semrole_label", "verb-semrole_", "verbarg-struct_")
for r in range(1,len(feature_categories)+1):
    for comb in combinations(feature_categories, r): 
        if "semrole_" in comb and "semrole_label" in comb:
            continue
        print(comb)
        final_features = [column for column in df_train.columns if column.startswith(comb)]
        
        # Get gold labels of train + test set
        y_train = pd.to_numeric(df_train.label)
        y_test = pd.to_numeric(df_test.label)

        # Get features of train + test set
        X_train = df_train[final_features]
        X_test = df_test[final_features]

        # Fit model and predict
        model.fit(X_train, y_train)   
        y_pred = model.predict(X_test)
        
        # Evaluation
        pearson_r, pearson_p = pearsonr(y_test, y_pred)
        spearman_r, spearman_p = spearmanr(y_test, y_pred, use_ties=True)
        rmse = sqrt(mean_squared_error(y_test, y_pred))
        #print("Pearson's", pearson_r, pearson_p)
        #print("Spearman's", spearman_r, spearman_p)
        #print("RMSE", rmse)
        #print()
        
        # Add to dictionary
        my_dict = {"Features": comb,
                   "Pearson's R": pearson_r,
                   "Pearson's P": pearson_p,
                   "Spearman's R": spearman_r,
                   "Spearman's P": spearman_p,
                   "RMSE": rmse}
        results.append(my_dict)
        
        df_pred = df.loc[df["dataset"] == "test"]
        df_pred["prediction"] = y_pred
        features = "#".join(comb) 
        df_pred.to_csv(os.path.join(results_regr, f"{features}.csv"), sep="\t", index=False)
#df_pred.head()

Summarize results of regression task and write to LaTeX table:

In [20]:
df_results = pd.DataFrame(results)
#df_results["Pearson's R"] = df_results.apply (lambda row: utils.mark_significance(row, "Pearson's R", "Pearson's P"),axis=1)
#df_results["Spearman's R"] = df_results.apply (lambda row: utils.mark_significance(row, "Spearman's R", "Spearman's P"),axis=1)
#df_results["Pearson's R: Blanco"] = ""
#df_results = df_results[["Features", "Pearson's R: Blanco", "Pearson's R", "Spearman's R", "RMSE"]].round(3)
df_results = df_results[["Features", "Pearson's R", "Spearman's R", "RMSE"]].round(3)
latex_file = os.path.join(figures_tables, "latex_results_regression.tex")
df_results.to_latex(latex_file, index=False)
csv_file = os.path.join(figures_tables, "results_regression.csv")
df_results.to_csv(csv_file, index=False, sep="\t")
df_results

Unnamed: 0,Features,Pearson's R,Spearman's R,RMSE
0,"(predicate_,)",0.078,0.061,1.76
1,"(semrole_,)",0.641,0.636,1.39
2,"(semrole_label,)",0.656,0.639,1.282
3,"(verb-semrole_,)",0.603,0.595,1.357
4,"(verbarg-struct_,)",0.148,0.14,1.747
5,"(predicate_, semrole_)",0.641,0.633,1.397
6,"(predicate_, semrole_label)",0.633,0.637,1.383
7,"(predicate_, verb-semrole_)",0.585,0.605,1.436
8,"(predicate_, verbarg-struct_)",0.141,0.136,1.749
9,"(semrole_, verb-semrole_)",0.636,0.643,1.395


### 4.2 Classification Task: 3 classes

In [None]:
model = SVC()
results = list()
feature_categories = ("predicate_", "semrole_", "semrole_label", "verb-semrole_", "verbarg-struct_")
for r in range(1,len(feature_categories)+1):
    for comb in combinations(feature_categories, r): 
        if "semrole_" in comb and "semrole_label" in comb:
            continue
        print(comb)
        final_features = [column for column in df_train.columns if column.startswith(comb)]
        
        # Get gold labels of train + test set
        y_train = pd.to_numeric(df_train.class_tertiary)
        y_test = pd.to_numeric(df_test.class_tertiary)

        # Get features of train + test set
        X_train = df_train[final_features]
        X_test = df_test[final_features]

        # Fit model and predict
        model.fit(X_train, y_train)   
        y_pred = model.predict(X_test)
        
        # Evaluation
        p, r, f, support = score(y_test, y_pred, labels= [0,1,2], average="weighted")
        class_report = classification_report(y_test, y_pred)
        #print("Precision, Recall, F1", p, r, f)
        #print(class_report)
        #print(rmse)
        #print()
        
        # Append to dictionary
        my_dict = {"Features": comb,
                   "Precision": p,
                   "Recall": r,
                   "F1": f}
        results.append(my_dict)
        
        df_pred = df.loc[df["dataset"] == "test"]
        df_pred["prediction"] = y_pred
        features = "#".join(comb) 
        csv_file = os.path.join(results_tertiary, f"{features}.csv")
        df_pred.to_csv(csv_file, sep="\t", index=False)
#df_pred.head()

Summarize results of tertiary task:

In [13]:
df_results_3 = pd.DataFrame(results)
df_results_3 = df_results_3[["Features", "Precision", "Recall", "F1"]]
df_results_3 = df_results_3.round(3).sort_values(["F1"], ascending=False)
df_results_3

Unnamed: 0,Features,Precision,Recall,F1
2,"(semrole_label,)",0.702,0.759,0.721
11,"(semrole_label, verb-semrole_)",0.681,0.759,0.716
3,"(verb-semrole_,)",0.681,0.759,0.716
6,"(predicate_, semrole_label)",0.681,0.759,0.716
7,"(predicate_, verb-semrole_)",0.681,0.759,0.716
9,"(semrole_, verb-semrole_)",0.681,0.759,0.716
16,"(predicate_, semrole_label, verb-semrole_)",0.681,0.759,0.716
14,"(predicate_, semrole_, verb-semrole_)",0.619,0.714,0.663
21,"(predicate_, semrole_, verb-semrole_, verbarg-...",0.424,0.651,0.513
20,"(semrole_label, verb-semrole_, verbarg-struct_)",0.424,0.651,0.513


### 4.3 Classification Task: 2 classes

In [14]:
model = SVC()
results = list()
feature_categories = ("predicate_", "semrole_", "semrole_label", "verb-semrole_", "verbarg-struct_")
for r in range(1,len(feature_categories)+1):
    for comb in combinations(feature_categories, r): 
        if "semrole_" in comb and "semrole_label" in comb:
            continue
        print(comb)
        final_features = [column for column in df_train.columns if column.startswith(comb)]
        
        # Get gold labels of train + test set
        y_train = pd.to_numeric(df_train.class_binary)
        y_test = pd.to_numeric(df_test.class_binary)

        # Get features of train + test set
        X_train = df_train[final_features]
        X_test = df_test[final_features]

        # Fit model and predict
        model.fit(X_train, y_train)   
        y_pred = model.predict(X_test)
        
        # Evaluation
        p, r, f, support = score(y_test, y_pred, labels= [0,1], average="weighted")
        class_report = classification_report(y_test, y_pred)
        #print("Precision, Recall, F1", p, r, f)
        #print(class_report)
        #print()
        
        # Add to dictionary
        my_dict = {"Features": comb,
                   "Precision": p,
                   "Recall": r,
                   "F1": f}
        results.append(my_dict)
        
        df_pred = df.loc[df["dataset"] == "test"]
        df_pred["prediction"] = y_pred
        features = "#".join(comb) 
        csv_file = os.path.join(results_binary, f"{features}.csv")
        df_pred.to_csv(csv_file, sep="\t", index=False)
#df_pred.head()

('predicate_',)


  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


('semrole_',)
('semrole_label',)
('verb-semrole_',)
('verbarg-struct_',)
('predicate_', 'semrole_')
('predicate_', 'semrole_label')
('predicate_', 'verb-semrole_')
('predicate_', 'verbarg-struct_')
('semrole_', 'verb-semrole_')
('semrole_', 'verbarg-struct_')
('semrole_label', 'verb-semrole_')
('semrole_label', 'verbarg-struct_')
('verb-semrole_', 'verbarg-struct_')
('predicate_', 'semrole_', 'verb-semrole_')
('predicate_', 'semrole_', 'verbarg-struct_')
('predicate_', 'semrole_label', 'verb-semrole_')
('predicate_', 'semrole_label', 'verbarg-struct_')
('predicate_', 'verb-semrole_', 'verbarg-struct_')
('semrole_', 'verb-semrole_', 'verbarg-struct_')
('semrole_label', 'verb-semrole_', 'verbarg-struct_')
('predicate_', 'semrole_', 'verb-semrole_', 'verbarg-struct_')
('predicate_', 'semrole_label', 'verb-semrole_', 'verbarg-struct_')


Summarize results of binary task:

In [16]:
df_results_2 = pd.DataFrame(results)
df_results_2 = df_results_2[["Features", "Precision", "Recall", "F1"]]
df_results_2 = df_results_2.round(3).sort_values(["F1"], ascending=False)
df_results_2

Unnamed: 0,Features,Precision,Recall,F1
2,"(semrole_label,)",0.853,0.828,0.834
11,"(semrole_label, verb-semrole_)",0.832,0.825,0.828
3,"(verb-semrole_,)",0.832,0.825,0.828
7,"(predicate_, verb-semrole_)",0.832,0.825,0.828
16,"(predicate_, semrole_label, verb-semrole_)",0.832,0.825,0.828
9,"(semrole_, verb-semrole_)",0.77,0.78,0.772
14,"(predicate_, semrole_, verb-semrole_)",0.514,0.717,0.599
21,"(predicate_, semrole_, verb-semrole_, verbarg-...",0.514,0.717,0.599
20,"(semrole_label, verb-semrole_, verbarg-struct_)",0.514,0.717,0.599
19,"(semrole_, verb-semrole_, verbarg-struct_)",0.514,0.717,0.599


Merge the results of the binary and tertiary classification task and write to LaTeX table:

In [18]:
df_merged = pd.merge(df_results_3, df_results_2, on=["Features"], suffixes=["_tertiary", "_binary"])
df_merged = df_merged.round(3).sort_values(["F1_tertiary", "F1_binary"], ascending=False)
latex_file = os.path.join(figures_tables, "latex_results_binary_tertiary.tex")
df_merged.to_latex(latex_file, index=False)
csv_file = os.path.join(figures_tables, "results_binary_tertiary.csv")
df_merged.to_csv(latex_file, index=False, sep="\t")
df_merged

Unnamed: 0,Features,Precision_tertiary,Recall_tertiary,F1_tertiary,Precision_binary,Recall_binary,F1_binary
0,"(semrole_label,)",0.702,0.759,0.721,0.853,0.828,0.834
1,"(semrole_label, verb-semrole_)",0.681,0.759,0.716,0.832,0.825,0.828
2,"(verb-semrole_,)",0.681,0.759,0.716,0.832,0.825,0.828
4,"(predicate_, verb-semrole_)",0.681,0.759,0.716,0.832,0.825,0.828
6,"(predicate_, semrole_label, verb-semrole_)",0.681,0.759,0.716,0.832,0.825,0.828
5,"(semrole_, verb-semrole_)",0.681,0.759,0.716,0.77,0.78,0.772
3,"(predicate_, semrole_label)",0.681,0.759,0.716,0.514,0.717,0.599
7,"(predicate_, semrole_, verb-semrole_)",0.619,0.714,0.663,0.514,0.717,0.599
8,"(predicate_, semrole_, verb-semrole_, verbarg-...",0.424,0.651,0.513,0.514,0.717,0.599
9,"(semrole_label, verb-semrole_, verbarg-struct_)",0.424,0.651,0.513,0.514,0.717,0.599


### 4.4 Baseline

#### Regression: 5 scores

In [21]:
# Apply baseline and save results to file
y_pred = utils.apply_baseline(df_test, df_train, approach="mean", pred_label="label")
y_test = df_test.label
df_test["prediction"] = y_pred
csv_file = os.path.join(results_regr, "BASELINE_MEAN.csv")
df_test.to_csv(csv_file, sep="\t", index=False)

# Evaluation
pearson_r, pearson_p = pearsonr(y_test, y_pred)
spearman_r, spearman_p = spearmanr(y_test, y_pred)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print("Pearson", pearson_r, pearson_p)
print("Spearman", spearman_r, spearman_p)
print("RMSE", rmse)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Pearson 0.6787137458008529 2.3570488898291746e-52
Spearman 0.6251030687973844 2.277078525582571e-42
RMSE 1.2385736855161693


#### Classification: 3 classes

In [22]:
# Apply baseline and save results to file
y_pred = utils.apply_baseline(df_test, df_train, approach="mf", pred_label="class_tertiary")
y_test = df_test.class_tertiary
df_test["prediction"] = y_pred
csv_file = os.path.join(results_tertiary, "BASELINE_MF.csv")
df_test.to_csv(csv_file, sep="\t", index=False)

# Evaluation
scores = score(y_test, y_pred, labels= [0,1,2], average="weighted")
class_report = classification_report(y_test, y_pred)
print(scores)
print(class_report)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(0.7114668856048166, 0.7619047619047619, 0.7254723536851448, None)
             precision    recall  f1-score   support

          0       0.54      0.91      0.68        86
          1       0.00      0.00      0.00        46
          2       0.91      0.85      0.88       246

avg / total       0.71      0.76      0.73       378



#### Classification: 2 classes

In [23]:
# Apply baseline and save results to file
y_pred = utils.apply_baseline(df_test, df_train, approach="mf", pred_label="class_binary")
y_test = df_test.class_binary
df_test["prediction"] = y_pred
csv_file = os.path.join(results_binary, "BASELINE_MF.csv")
df_test.to_csv(csv_file, sep="\t", index=False)

# Evaluation
scores = score(y_test, y_pred, labels= [0,1], average="weighted")
class_report = classification_report(y_test, y_pred)
print(scores)
print(class_report)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(0.8640371595024406, 0.8333333333333334, 0.8397179505313311, None)
             precision    recall  f1-score   support

          0       0.65      0.89      0.75       107
          1       0.95      0.81      0.87       271

avg / total       0.86      0.83      0.84       378

