In [2]:
import json
import evaluate
import pandas as pd
from pandas import read_parquet

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
train_data = read_parquet("data/merge/train.parquet")
dev_data = read_parquet("data/merge/dev.parquet")
test_data = read_parquet("data/merge/test.parquet")

with open("data/merge/tags_2_idx.json", "r") as f:
    tags2idx = json.load(f)

idx2tags = {idx: tag for tag, idx in tags2idx.items()}

In [4]:
dev_tags = dev_data["ner_tags"].values.tolist()
dev_tags = [x.tolist() for x in dev_tags]
dev_tags = [[idx2tags[idx] for idx in tags] for tags in dev_tags]

test_tags = test_data["ner_tags"].values.tolist()
test_tags = [x.tolist() for x in test_tags]
test_tags = [[idx2tags[idx] for idx in tags] for tags in test_tags]

In [5]:
result_path = "model_prediction_files"
seqeval = evaluate.load("seqeval")

# BiLSTM Model

In [9]:
dev_result = pd.read_csv(result_path + "/BiLSTM_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.7927988083620114,
  'recall': 0.8008197571858462,
  'f1': 0.7967890973853341,
  'number': 19274},
 'ORG': {'precision': 0.6547136756815942,
  'recall': 0.7437189054726369,
  'f1': 0.6963838583823444,
  'number': 16080},
 'PER': {'precision': 0.8282334480053111,
  'recall': 0.8289338568408335,
  'f1': 0.8285835044076801,
  'number': 16555},
 'overall_precision': 0.7571633765468474,
 'overall_recall': 0.7920977094530813,
 'overall_f1': 0.77423667535989,
 'overall_accuracy': 0.9060073944565635}

In [42]:
test_result = pd.read_csv(result_path + "/BiLSTM_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.7791982000409081,
  'recall': 0.7786805662016455,
  'f1': 0.7789392971246006,
  'number': 19569},
 'ORG': {'precision': 0.6256896013323618,
  'recall': 0.7112767719796473,
  'f1': 0.6657437146970872,
  'number': 16902},
 'PER': {'precision': 0.8182443272977309,
  'recall': 0.8302325581395349,
  'f1': 0.8241948516680134,
  'number': 17200},
 'overall_precision': 0.7388566753228274,
 'overall_recall': 0.7739747722233609,
 'overall_f1': 0.7560081169865231,
 'overall_accuracy': 0.9041126316132109}

# BiLSTM + CharCNN

In [18]:
dev_result = pd.read_csv(result_path + "/BiLSTM_CNN_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.7975469489541452,
  'recall': 0.8130642316073466,
  'f1': 0.8052308403771548,
  'number': 19274},
 'ORG': {'precision': 0.6985315798785162,
  'recall': 0.7366293532338308,
  'f1': 0.7170747949269001,
  'number': 16080},
 'PER': {'precision': 0.8134045485871813,
  'recall': 0.8555119299305346,
  'f1': 0.8339270468395795,
  'number': 16555},
 'overall_precision': 0.771576141286238,
 'overall_recall': 0.8029243483788938,
 'overall_f1': 0.7869381744031267,
 'overall_accuracy': 0.9068956114461696}

In [44]:
test_result = pd.read_csv(result_path + "/BiLSTM_CNN_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.7767096677790507,
  'recall': 0.7968726046297716,
  'f1': 0.7866619583312314,
  'number': 19569},
 'ORG': {'precision': 0.6679731879118382,
  'recall': 0.6957164832564193,
  'f1': 0.6815626267895438,
  'number': 16902},
 'PER': {'precision': 0.8112227990721308,
  'recall': 0.8539534883720931,
  'f1': 0.8320398799070979,
  'number': 17200},
 'overall_precision': 0.7535985086131177,
 'overall_recall': 0.783309422220566,
 'overall_f1': 0.7681667854336822,
 'overall_accuracy': 0.9059699120568137}

# CharCNN + Attention (Encoder)

In [47]:
dev_result = pd.read_csv(result_path + "/BiLSTM_CNN_Attention_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.7457205559619605,
  'recall': 0.7933485524540832,
  'f1': 0.7687976067774455,
  'number': 19274},
 'ORG': {'precision': 0.6221147504946142,
  'recall': 0.7039800995024875,
  'f1': 0.660520480802894,
  'number': 16080},
 'PER': {'precision': 0.7863664719559916,
  'recall': 0.8375717305949865,
  'f1': 0.8111618111618112,
  'number': 16555},
 'overall_precision': 0.7185181240458692,
 'overall_recall': 0.7797684409254657,
 'overall_f1': 0.7478913186072078,
 'overall_accuracy': 0.9060535095598614}

In [48]:
test_result = pd.read_csv(result_path + "/BiLSTM_CNN_Attention_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.7301221036438804,
  'recall': 0.7761255046246615,
  'f1': 0.7524212925119517,
  'number': 19569},
 'ORG': {'precision': 0.6006036217303823,
  'recall': 0.6711040113596024,
  'f1': 0.6338996311612831,
  'number': 16902},
 'PER': {'precision': 0.7724093403637147,
  'recall': 0.8346511627906976,
  'f1': 0.8023249315374728,
  'number': 17200},
 'overall_precision': 0.7016336616672959,
 'overall_recall': 0.7618080527659258,
 'overall_f1': 0.7304837196837732,
 'overall_accuracy': 0.9040915696700359}

# BiLSTM + CRF

In [17]:
dev_result = pd.read_csv(result_path + "/BiLSTM_CRF_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.8778469058993446,
  'recall': 0.8199128359447961,
  'f1': 0.8478914046571521,
  'number': 19274},
 'ORG': {'precision': 0.7683598716473936,
  'recall': 0.7892412935323383,
  'f1': 0.778660612939841,
  'number': 16080},
 'PER': {'precision': 0.9007547169811321,
  'recall': 0.8651162790697674,
  'f1': 0.8825758742874749,
  'number': 16555},
 'overall_precision': 0.8492036732184296,
 'overall_recall': 0.824828064497486,
 'overall_f1': 0.8368384020014072,
 'overall_accuracy': 0.9023663063005262}

In [46]:
test_result = pd.read_csv(result_path + "/BiLSTM_CRF_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.8680806752164858,
  'recall': 0.8093924063569932,
  'f1': 0.8377099034774559,
  'number': 19569},
 'ORG': {'precision': 0.748600388944546,
  'recall': 0.7515678617915039,
  'f1': 0.7500811903988662,
  'number': 16902},
 'PER': {'precision': 0.8839637274788211,
  'recall': 0.861453488372093,
  'f1': 0.8725634532713031,
  'number': 17200},
 'overall_precision': 0.8341958943378802,
 'overall_recall': 0.8078664455665071,
 'overall_f1': 0.8208200817810086,
 'overall_accuracy': 0.9026153189256877}

# Attention + CRF

In [49]:
dev_result = pd.read_csv(result_path + "/ATTENTION_CRF_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.8439826647405267,
  'recall': 0.7881083324686106,
  'f1': 0.8150890749087787,
  'number': 19274},
 'ORG': {'precision': 0.7308071559374559,
  'recall': 0.645273631840796,
  'f1': 0.6853821256357752,
  'number': 16080},
 'PER': {'precision': 0.8187373917170998,
  'recall': 0.8335246149199638,
  'f1': 0.8260648328294771,
  'number': 16555},
 'overall_precision': 0.8025484199796127,
 'overall_recall': 0.7583463368587335,
 'overall_f1': 0.7798215117027705,
 'overall_accuracy': 0.8665569260875144}

In [50]:
test_result = pd.read_csv(result_path + "/ATTENTION_CRF_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.827580530099325,
  'recall': 0.7706576728499157,
  'f1': 0.7981054191363252,
  'number': 19569},
 'ORG': {'precision': 0.709853392430958,
  'recall': 0.6159034433794817,
  'f1': 0.6595495295720214,
  'number': 16902},
 'PER': {'precision': 0.808072255151002,
  'recall': 0.8322674418604651,
  'f1': 0.819991407704425,
  'number': 17200},
 'overall_precision': 0.786633203565006,
 'overall_recall': 0.7416668219336326,
 'overall_f1': 0.7634885014481079,
 'overall_accuracy': 0.8690789612249626}

# Attention (Encoder) + CRF

In [14]:
dev_result = pd.read_csv(result_path + "/Attention_Encoder_CRF_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.8261148883773222,
  'recall': 0.800612223721075,
  'f1': 0.8131636497773562,
  'number': 19274},
 'ORG': {'precision': 0.7881140598531903,
  'recall': 0.6944029850746268,
  'f1': 0.7382967468923566,
  'number': 16080},
 'PER': {'precision': 0.8471808320419597,
  'recall': 0.8585925702204772,
  'f1': 0.8528485284852848,
  'number': 16555},
 'overall_precision': 0.8223879093198992,
 'overall_recall': 0.7862027779383152,
 'overall_f1': 0.8038883526700417,
 'overall_accuracy': 0.8892275118696266}

In [52]:
test_result = pd.read_csv(result_path + "/Attention_Encoder_CRF_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.8056837248322147,
  'recall': 0.7852215238387246,
  'f1': 0.7953210320643876,
  'number': 19569},
 'ORG': {'precision': 0.7616485013623978,
  'recall': 0.6615193468228612,
  'f1': 0.7080615540497751,
  'number': 16902},
 'PER': {'precision': 0.840104998858708,
  'recall': 0.8559302325581395,
  'f1': 0.8479437852781937,
  'number': 17200},
 'overall_precision': 0.8048404711755988,
 'overall_recall': 0.7689254904883457,
 'overall_f1': 0.7864731721726205,
 'overall_accuracy': 0.889286938340204}

# Multi-Channels

In [8]:
dev_result = pd.read_csv(result_path + "/Channel_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.7254286909242994,
  'recall': 0.7199335892912733,
  'f1': 0.7226706942346751,
  'number': 19274},
 'ORG': {'precision': 0.5612784204228161,
  'recall': 0.7000621890547264,
  'f1': 0.6230352003542174,
  'number': 16080},
 'PER': {'precision': 0.770806432679892,
  'recall': 0.7932950770160072,
  'f1': 0.7818890840353646,
  'number': 16555},
 'overall_precision': 0.6806232435701327,
 'overall_recall': 0.7371746710589686,
 'overall_f1': 0.7077711294633362,
 'overall_accuracy': 0.8871142371358912}

In [7]:
test_result = pd.read_csv(result_path + "/Channel_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.6968503937007874,
  'recall': 0.6964586846543002,
  'f1': 0.6966544841158279,
  'number': 19569},
 'ORG': {'precision': 0.5338313665778455,
  'recall': 0.663767601467282,
  'f1': 0.5917506197584261,
  'number': 16902},
 'PER': {'precision': 0.7674640610961365,
  'recall': 0.7945930232558139,
  'f1': 0.780792961608775,
  'number': 17200},
 'overall_precision': 0.6597067589325477,
 'overall_recall': 0.7176128635576009,
 'overall_f1': 0.6874425495078221,
 'overall_accuracy': 0.8855972688403868}

# Dilated CNN

In [9]:
dev_result = pd.read_csv(result_path + "/DCNN_dev.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.6770175121863152,
  'recall': 0.7782504928919788,
  'f1': 0.724112961622013,
  'number': 19274},
 'ORG': {'precision': 0.5287422037422037,
  'recall': 0.6326492537313433,
  'f1': 0.5760475651189128,
  'number': 16080},
 'PER': {'precision': 0.6784286630804915,
  'recall': 0.8303835699184536,
  'f1': 0.7467543049595307,
  'number': 16555},
 'overall_precision': 0.6312136103407451,
 'overall_recall': 0.7497736423356258,
 'overall_f1': 0.685404339250493,
 'overall_accuracy': 0.9023121711792634}

In [10]:
test_result = pd.read_csv(result_path + "/DCNN_test.csv")
test_predictions = test_result["predictions"].values.tolist()
test_predictions = [eval(x) for x in test_predictions]
test_references = []
for i in range(len(test_predictions)):
    test_references.append(test_tags[i][:len(test_predictions[i])])
test_result = seqeval.compute(predictions=test_predictions, references=test_references)
test_result

{'LOC': {'precision': 0.6582594964470567,
  'recall': 0.7668761817159794,
  'f1': 0.7084287299077112,
  'number': 19569},
 'ORG': {'precision': 0.5075318066157761,
  'recall': 0.5900485149686427,
  'f1': 0.5456883344276647,
  'number': 16902},
 'PER': {'precision': 0.6760729816600491,
  'recall': 0.8315697674418605,
  'f1': 0.7458024820106373,
  'number': 17200},
 'overall_precision': 0.6176183887805798,
 'overall_recall': 0.7319222671461311,
 'overall_f1': 0.6699296525261139,
 'overall_accuracy': 0.9019221895230236}

# SKWEAK Aggregation

In [7]:
skweak_tags2tags = {
    "BLOC": "B-LOC",
    "ILOC": "I-LOC",
    "BPER": "B-PER", 
    "IPER": "I-PER",
    "BORG": "B-ORG",
    "IORG": "I-ORG",
    "BMISC": "B-MISC",
    "IMISC": "I-MISC",
    "O": "O"
}

## BiLSTM & BiLSTM + CharCNN & BiLSTM + CRF & Attention (Encoder) + CharCNN

In [11]:
dev_result = pd.read_csv("skweak/skweak_hmm_output.csv")
dev_predictions = dev_result["predictions"].values.tolist()
dev_predictions = [eval(x) for x in dev_predictions]
dev_predictions = [[skweak_tags2tags[x] for x in tags] for tags in dev_predictions]

dev_references = []
for i in range(len(dev_predictions)):
    dev_references.append(dev_tags[i][:len(dev_predictions[i])])
dev_result = seqeval.compute(predictions=dev_predictions, references=dev_references)
dev_result

{'LOC': {'precision': 0.8439731316443645,
  'recall': 0.8279028743384871,
  'f1': 0.835860768445038,
  'number': 19274},
 'ORG': {'precision': 0.772856221792392,
  'recall': 0.7453211465522601,
  'f1': 0.7588389833190896,
  'number': 16083},
 'PER': {'precision': 0.8804778371581263,
  'recall': 0.8459075807913017,
  'f1': 0.8628465804066543,
  'number': 16555},
 'overall_precision': 0.8335916696474703,
 'overall_recall': 0.8080597934966867,
 'overall_f1': 0.8206271886065301,
 'overall_accuracy': 0.9096865110264607}