# Preprocess

## Set Working Directory, Imports, and Constants 

In [2]:
# Make sure our working directory is the root of project, e.g /amr-parser
%cd ..
%pwd

/home/banditelol/Works/MasterDegree/AMRResearch/amr_parser


'/home/banditelol/Works/MasterDegree/AMRResearch/amr_parser'

In [3]:
# Add current working directory as module path
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
module_path = os.path.abspath(os.path.join('.'))
if module_path not in sys.path:
    sys.path.append(module_path)
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [4]:
import penman
from importlib import reload
reload(penman)

<module 'penman' from '/home/banditelol/miniconda3/envs/amr_parser/lib/python3.8/site-packages/penman.py'>

In [5]:
import os
import pickle
import string
import time
import logging
from collections import Counter, defaultdict

import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import stanfordnlp
import stanza

from IPython.display import HTML, display
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import \
    StopWordRemoverFactory
from wordcloud import STOPWORDS, WordCloud

from stog.data.dataset_builder import dataset_from_params, iterator_from_params
from utils.amr_parsing.io import AMRIO
from utils.ner.entity_recognizer import get_entities

[2021-08-11 00:29:44,987 INFO] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


In [6]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)

logger.debug("test debug")
logger.info("test info")
logger.warning("test warning")



In [7]:
AMR_FILEPATH = "data/raw/amr_simple.txt"
AMR_TEST_FILEPATH = "data/raw/amr_simple_test.txt"
EDGE_PREDICTION_DATADIR = 'data/raw/edge_prediction/'

# Dependency feature
DEPENDENCY_FEATURES_FILEPATH = EDGE_PREDICTION_DATADIR + 'dependency_parser_features.csv'
TEST_DEPENDENCY_FEATURES_FILEPATH = EDGE_PREDICTION_DATADIR + 'test_dependency_parser_features.csv'

# Filtered dependency features
FILTERED_DEPENDENCY_FEATURES_FILEPATH = EDGE_PREDICTION_DATADIR + 'dependency_parser_features_filtered.csv'
FILTERED_TEST_DEPENDENCY_FEATURES_FILEPATH = EDGE_PREDICTION_DATADIR + 'test_dependency_parser_features_filtered.csv'

# Labeled (preprocess result)
LABELED_TEST_DEPENDENCY_FEATURES_FILEPATH = EDGE_PREDICTION_DATADIR + 'test_dependency_parser_features_labeled.csv'
LABELED_DEPENDENCY_FEATURES_FILEPATH = EDGE_PREDICTION_DATADIR + 'dependency_parser_features_labeled.csv'
LABELED_DEPENDENCY_FEATURES_FILEPATH_NO_UNK = EDGE_PREDICTION_DATADIR + 'dependency_parser_features_labeled_no_unk.csv'

# AMR (preprocess result dari gold dataset)
AMR_DF_FILEPATH = EDGE_PREDICTION_DATADIR + 'amr.csv'
TEST_AMR_DF_FILEPATH = EDGE_PREDICTION_DATADIR + 'amr.test.csv'

# ANNOTATOR 
ANNOTATOR_PARAMS={
    'pos_tagger': 'nltk'
}

# FILTER TYPE
FILTER_TYPE = ['preposition', 'determiner', 'sc']

# Severina Dataset
TEST_BERITA_DIR = "data/raw/test/"
TEST_BERITA_FILE_NAMES = ["b-salah-darat.txt","c-gedung-roboh.txt","d-indo-fuji.txt", "f-bunuh-diri.txt", "g-gempa-dieng.txt"]
TEST_BERITA_FILE_PATHS = [TEST_BERITA_DIR + filename for filename in TEST_BERITA_FILE_NAMES]

## Function Definitions

In [8]:
def show_world_cloud(all_sentence):

    wordcloud = WordCloud(width = 800, height = 800, 
                    background_color ='white',
                    min_font_size = 10).generate(all_sentences) 

    # plot the WordCloud image                        
    plt.figure(figsize = (8, 8), facecolor = None) 
    plt.imshow(wordcloud) 
    plt.axis("off") 
    plt.tight_layout(pad = 0) 
    
    plt.show() 

In [9]:
class FeatureAnnotator:
    word_dict = {}
    
    def __init__(self, params):
        self.nlp = stanza.Pipeline(lang="id",use_gpu=True,verbose=False, tokenize_pretokenized=True)
        factory = StemmerFactory()
        self.stemmer = factory.create_stemmer()
        self.ner = get_entities
        self.params = params
        self.pos_tagger = nltk.tag.CRFTagger()
        self.pos_tagger.set_model_file('pretrained/pos_tagger/all_indo_man_tag_corpus_model.crf.tagger')
        
    def annotate(self, sentence):
        annotation = defaultdict(list)
        sentence = sentence.translate(str.maketrans('', '', string.punctuation))
        doc = self.nlp(sentence)
        
        annotation['ner_tags'] = self.ner(sentence)
        
        word_dict = defaultdict(int)
        
        for sent in doc.sentences:
            for idx, word in enumerate(sent.words):
                annotation['tokens'].append(word.text)
                stemmed_word = self.stemmer.stem(word.text)                
                if (annotation['ner_tags'][idx] in ['PER', 'ORG']):
                    stemmed_word = word.text.lower()
                word_dict[stemmed_word] += 1
                annotation['lemmas'].append(stemmed_word+'_{}'.format(word_dict[stemmed_word]))
                annotation['pos_tags'].append(word.upos)
#                 annotation['dependency'].append(dict(relation=word.dependency_relation, head=word.governor))
                annotation['dependency'].append(dict(relation=word.deprel, head=word.head))
        
        if self.params['pos_tagger'] == 'nltk':
            annotation['pos_tags'] = [tag[1] for tag in self.pos_tagger.tag(annotation['tokens'])]
        else:
            logger.warning("No Pos Tagger set, using default pos tag from stanfordnlp pipeline")
            
        return annotation

# Preprocess

### Features

In [10]:
def dump_amr_features(amr, annotation, f):
    amr.tokens = annotation['tokens']
    amr.lemmas = annotation['lemmas']
    amr.pos_tags = annotation['pos_tags']
    amr.ner_tags = annotation['ner_tags']
    AMRIO.dump([amr], f)

def create_dataset(annotation, sentence_id):
    edge_labels_dataset = {
        'sentence_id': [],
        'parent': [], 
        'parent_position': [], 
        'child': [], 
        'child_position': [], 
        'is_root' : [],
        'sequence': [],
        'parent_ner': [],
        'child_ner': [],
        'parent_pos': [],
        'dependency_role': [],
        'child_pos': []
    }

    for idx, dependency in enumerate(annotation['dependency']):
        head = dependency['head']
        if (head != 0):
            head_idx = head-1
            edge_labels_dataset['sentence_id'].append(sentence_id)
            edge_labels_dataset['sequence'].append(idx+1)
            edge_labels_dataset['parent'].append(annotation['lemmas'][head_idx])
            edge_labels_dataset['parent_position'].append(head_idx)
            edge_labels_dataset['child'].append(annotation['lemmas'][idx])
            edge_labels_dataset['child_position'].append(annotation['lemmas'].index(annotation['lemmas'][idx]))
            edge_labels_dataset['is_root'].append(1 if annotation['dependency'][head_idx]['head'] == 0 else 0)
            edge_labels_dataset['parent_ner'].append(annotation['ner_tags'][head_idx])
            edge_labels_dataset['child_ner'].append(annotation['ner_tags'][idx])
            edge_labels_dataset['parent_pos'].append(annotation['pos_tags'][head_idx])
            edge_labels_dataset['child_pos'].append(annotation['pos_tags'][idx])
            edge_labels_dataset['dependency_role'].append(dependency['relation'])
            
    return edge_labels_dataset

def create_dependency_parser_feature_df(filepath, params):
    sentence_dataset = []
    word_dict = defaultdict(int)
    amrs = []
    sentence_ids = []
    all_sentences = ''
    annotator = FeatureAnnotator(params)
    
    with open(filepath + '.features', 'w', encoding='utf-8') as f:
        for i, amr in enumerate(AMRIO.read(filepath), 1):
            if i % 100 == 0:
                print('{} processed.'.format(i))

            for word in nltk.tokenize.word_tokenize(amr.sentence):
                word_dict[word.lower()] += 1
                all_sentences += word.lower() + ' '

            annotation = annotator.annotate(amr.sentence)
            dump_amr_features(amr, annotation, f)
            sentence_data = create_dataset(annotation, i)
            sentence_dataset.append(sentence_data)
            sentence_ids.append(i)
            amrs.append(amr)

    dataset_dict = {
        'sentence_id': sum([sum([sentence_data['sentence_id'] for sentence_data in sentence_dataset], [])],[]),
        'sequence': sum([sum([sentence_data['sequence'] for sentence_data in sentence_dataset], [])],[]),
        'parent': sum([sum([sentence_data['parent'] for sentence_data in sentence_dataset], [])],[]),
        'parent_position': sum([sum([sentence_data['parent_position'] for sentence_data in sentence_dataset], [])],[]), 
        'child': sum([sum([sentence_data['child'] for sentence_data in sentence_dataset], [])],[]), 
        'child_position': sum([sum([sentence_data['child_position'] for sentence_data in sentence_dataset], [])],[]), 
        'is_root' : sum([sum([sentence_data['is_root'] for sentence_data in sentence_dataset], [])],[]),
        'parent_ner': sum([sum([sentence_data['parent_ner'] for sentence_data in sentence_dataset], [])],[]),
        'child_ner': sum([sum([sentence_data['child_ner'] for sentence_data in sentence_dataset], [])],[]),
        'parent_pos': sum([sum([sentence_data['parent_pos'] for sentence_data in sentence_dataset], [])],[]),
        'dependency_role': sum([sum([sentence_data['dependency_role'] for sentence_data in sentence_dataset], [])],[]),
        'child_pos': sum([sum([sentence_data['child_pos'] for sentence_data in sentence_dataset], [])],[])
    }

    amr_dict = {
        'sentence_id': sentence_ids,
        'amr': [str(amr.graph) for amr in amrs]
    }

    sentence_dataset = pd.DataFrame(dataset_dict)
    amr_df = pd.DataFrame(amr_dict)
    
    return sentence_dataset, amr_df, all_sentences, amrs

def filter_dataset(data, mode='all'):
    if ('preposition' in mode):
        data = data[data['child_pos'] != 'IN']
    
    if ('determiner' in mode):
        data = data[data['dependency_role'] != 'det']
        
    if ('case' in mode):
        data = data[data['dependency_role'] != 'case']
    
    if ('sc' in mode):
        data = data[data['child_pos'] != 'SC']
    
    return data

### Pair Label Generation

In [11]:
def create_pair_labels(train_data):
    pairs = []
    for sentence_id, data in enumerate(train_data):
        tokens =  data.fields['tgt_tokens'].tokens[1:-1]
        head_indices = data.fields['head_indices'].labels
        labels = data.fields['head_tags'].labels
        
        for i, index in enumerate(head_indices):
            if (index != 0):
                parent = tokens[index-1]
                child = tokens[i]
                label = labels[i]
                pairs.append((sentence_id+1, str(parent), str(child),str(label)))
                
    return pairs

def find_label(data, pair_labels):
    for pair_label in pair_labels:
        sentence_id, label_parent, label_child, label = pair_label
        if (data['sentence_id'] == sentence_id and data['parent'] == label_parent and data['child']==label_child):
            return label
    return "unk"
    
def create_labels(pair_labels, sentence_dataset, change_unk_to_mod=False, with_flipped=False):
    labels = []
    unk_pairs = []
    pair_labels_dict = dict()
    not_found_count = 0
    found_count = 0
    
    for idx, data in sentence_dataset.iterrows():
        found = False
        flipped_found = False
        flipped_data = None
        
        if (data['sentence_id'] not in pair_labels_dict):
            pair_labels_dict[data['sentence_id']] = [pair_label for pair_label in pair_labels if pair_label[0] == data['sentence_id']]    
        cur_pair_labels = pair_labels_dict[data['sentence_id']]
            
        for pair_label in cur_pair_labels:
            _, label_parent, label_child, label = pair_label
            if (data['parent'].split('_')[0] == label_parent and data['child'].split('_')[0]==label_child):
                labels.append(label)
                found = True
                break

            elif with_flipped and (data['parent'].split('_')[0] == label_child and data['child'].split('_')[0]==label_parent):
                flipped_found = True
                flipped_data = (data['child'], data['parent'], label)
        
        if flipped_found and not found:
            sentence_dataset.loc[idx,'parent'] = flipped_data[0]
            sentence_dataset.loc[idx,'child'] = flipped_data[1]
            labels.append(flipped_data[2])
        
        if not flipped_found and not found:
            not_found_count += 1
            dependency_role = data['dependency_role'] 
            if (dependency_role in ['nsubj']):
                labels.append("ARG0")
            elif (dependency_role in ['obl', 'obj']):
                labels.append('ARG1')
            elif (dependency_role in ['case'] and data['child'] in ['di', 'ke', 'dari']):
                labels.append('location')
            else:
                if change_unk_to_mod:
                    labels.append("mod")
                else: 
                    labels.append("unk")

            unk_pairs.append((data['parent'], data['child'], dependency_role, data['sentence_id']))
        else:
            found_count += 1
    
    logger.info("Found pairs: ", found_count)
    logger.info("AMR Pair labels: ", len(pair_labels))
    logger.info("Dependency Parser Pair labels: ", len(sentence_dataset))
    logger.info("Not found: ", not_found_count)
    
    precision = found_count/len(sentence_dataset)
    recall = found_count/len(pair_labels)
    logger.info("Pair Precision: ", precision)
    logger.info("Pair Recall: ", recall)
    logger.info("Pair F1: ", 2*((precision*recall)/(precision+recall)))
    
    return labels, unk_pairs

#### Dev data

In [12]:
%ls pretrained

[0m[34;42mner_tagger[0m/  [34;42mpos_tagger[0m/  [34;42mword2vec[0m/


In [14]:
sentence_dataset, amr_df, all_sentences, amrs = create_dependency_parser_feature_df(AMR_FILEPATH, ANNOTATOR_PARAMS)
test_sentence_dataset, test_amr_df, test_all_sentences, test_amrs = create_dependency_parser_feature_df(AMR_TEST_FILEPATH, ANNOTATOR_PARAMS)

100 processed.
200 processed.
300 processed.
400 processed.
500 processed.
600 processed.
700 processed.
100 processed.
200 processed.
300 processed.


### [Checkpoint 1 ] Save Dependency Features & AMR

In [38]:
sentence_dataset.to_csv(DEPENDENCY_FEATURES_FILEPATH, index=None)
test_sentence_dataset.to_csv(TEST_DEPENDENCY_FEATURES_FILEPATH, index=None)
amr_df.to_csv(AMR_DF_FILEPATH, index=None)
test_amr_df.to_csv(TEST_AMR_DF_FILEPATH, index=None)

### Load Dependency Features

In [39]:
sentence_dataset = pd.read_csv(DEPENDENCY_FEATURES_FILEPATH)
test_sentence_dataset = pd.read_csv(TEST_DEPENDENCY_FEATURES_FILEPATH)

## Filtering

In [40]:
# FILTER_TYPE = []
filtered_sentence_dataset = filter_dataset(sentence_dataset, mode=FILTER_TYPE)
filtered_test_sentence_dataset = filter_dataset(test_sentence_dataset, mode=FILTER_TYPE)

### Save Filtered Dependency Features Data

In [41]:
# Do Filter
filtered_sentence_dataset.to_csv(FILTERED_DEPENDENCY_FEATURES_FILEPATH, index=None)
filtered_test_sentence_dataset.to_csv(FILTERED_TEST_DEPENDENCY_FEATURES_FILEPATH, index=None)

### [Checkpoint 2] Load Filtered Dependency Parser Features dataset

In [42]:
filtered_sentence_dataset = pd.read_csv(FILTERED_DEPENDENCY_FEATURES_FILEPATH)
filtered_test_sentence_dataset = pd.read_csv(FILTERED_TEST_DEPENDENCY_FEATURES_FILEPATH)

## Create Label Features

In [43]:
data_params = dict(
    data_dir="data/raw",
    train_data="amr_simple.txt.features",
    test_data="amr_simple_test.txt.features",
    dev_data="amr_simple.txt.features",
    data_type="AMR"
)

amr_dataset = dataset_from_params(data_params)
amr_data = amr_dataset['train']

Building train datasets ...
False


0it [00:00, ?it/s][2021-07-13 15:05:28,234 INFO] Reading instances from lines in file at: data/raw\amr_simple.txt.features
700it [00:00, 1111.10it/s]


Building dev datasets ...
False


0it [00:00, ?it/s][2021-07-13 15:05:28,878 INFO] Reading instances from lines in file at: data/raw\amr_simple.txt.features
700it [00:01, 465.35it/s]


Building test datasets ...
False


0it [00:00, ?it/s][2021-07-13 15:05:30,399 INFO] Reading instances from lines in file at: data/raw\amr_simple_test.txt.features
306it [00:00, 687.64it/s]


#### Create Pair Labels

In [44]:
pair_labels = create_pair_labels(amr_data)
labels_no_unk, unk_pairs = create_labels(pair_labels,filtered_sentence_dataset, change_unk_to_mod=True)
dependency_features_dict = filtered_sentence_dataset.to_dict()
dependency_features_dict['label'] = pd.Series(labels_no_unk)
dependency_features_df = pd.DataFrame(dependency_features_dict)
dependency_features_df.to_csv(LABELED_DEPENDENCY_FEATURES_FILEPATH_NO_UNK, index=None)

In [45]:
display(dependency_features_df['label'].value_counts())

mod         693
ARG0        611
ARG1        582
name        216
location    120
time         58
Name: label, dtype: int64

In [46]:
dependency_features_df.tail()

Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
2275,699,3,saji_1,1,makan_1,2,1,O,O,VB,obj,NN,ARG1
2276,700,1,laku_1,3,hari_1,0,1,O,O,VB,obl,NN,time
2277,700,3,laku_1,3,murid_1,2,1,O,O,VB,nsubj,NN,ARG0
2278,700,5,laku_1,3,upacara_1,4,1,O,O,VB,obj,NN,ARG1
2279,700,6,upacara_1,4,bendera_1,5,0,O,O,NN,compound,JJ,mod


### Analyze undetected pairs

In [63]:
start_idx = 350
for unk_pair in unk_pairs[start_idx:start_idx+50]:
    print(amrs[unk_pair[-1]-1].sentence)
    print("Label salah: ", unk_pair)
    print("Hasil Dependency Parser: ")
    display(dependency_features_df[dependency_features_df.sentence_id == unk_pair[-1]])
    
    print("Gold AMR:")
    print(amrs[unk_pair[-1]-1].graph)
    print()
    print('----------------------------------------------------------------------------------------------------------------')    

Di Bandung, aku bertemu dengan seorang kawan lama
Label salah:  ('bandung_1', 'di_1', 'case', 256)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
999,256,1,bandung_1,1,di_1,0,0,GPE,O,NNP,case,IN,mod
1000,256,2,temu_1,3,bandung_1,1,1,O,GPE,VB,obl,NNP,location
1001,256,3,temu_1,3,aku_1,2,1,O,O,VB,nsubj,VB,ARG0
1002,256,5,kawan_1,6,dengan_1,4,0,O,O,NN,case,IN,mod
1003,256,6,kawan_1,6,orang_1,5,0,O,O,NN,det,NND,mod
1004,256,7,temu_1,3,kawan_1,6,1,O,O,VB,obl,NN,ARG1
1005,256,8,kawan_1,6,lama_1,7,0,O,O,NN,amod,JJ,mod


Gold AMR:
(t / temu
      :ARG0 (a / aku)
      :ARG1 (o / orang
            :mod (k / kawan
                  :mod (l / lama)))
      :location (b / bandung))

----------------------------------------------------------------------------------------------------------------
Di Bandung, aku bertemu dengan seorang kawan lama
Label salah:  ('kawan_1', 'dengan_1', 'case', 256)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
999,256,1,bandung_1,1,di_1,0,0,GPE,O,NNP,case,IN,mod
1000,256,2,temu_1,3,bandung_1,1,1,O,GPE,VB,obl,NNP,location
1001,256,3,temu_1,3,aku_1,2,1,O,O,VB,nsubj,VB,ARG0
1002,256,5,kawan_1,6,dengan_1,4,0,O,O,NN,case,IN,mod
1003,256,6,kawan_1,6,orang_1,5,0,O,O,NN,det,NND,mod
1004,256,7,temu_1,3,kawan_1,6,1,O,O,VB,obl,NN,ARG1
1005,256,8,kawan_1,6,lama_1,7,0,O,O,NN,amod,JJ,mod


Gold AMR:
(t / temu
      :ARG0 (a / aku)
      :ARG1 (o / orang
            :mod (k / kawan
                  :mod (l / lama)))
      :location (b / bandung))

----------------------------------------------------------------------------------------------------------------
Di Bandung, aku bertemu dengan seorang kawan lama
Label salah:  ('kawan_1', 'orang_1', 'det', 256)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
999,256,1,bandung_1,1,di_1,0,0,GPE,O,NNP,case,IN,mod
1000,256,2,temu_1,3,bandung_1,1,1,O,GPE,VB,obl,NNP,location
1001,256,3,temu_1,3,aku_1,2,1,O,O,VB,nsubj,VB,ARG0
1002,256,5,kawan_1,6,dengan_1,4,0,O,O,NN,case,IN,mod
1003,256,6,kawan_1,6,orang_1,5,0,O,O,NN,det,NND,mod
1004,256,7,temu_1,3,kawan_1,6,1,O,O,VB,obl,NN,ARG1
1005,256,8,kawan_1,6,lama_1,7,0,O,O,NN,amod,JJ,mod


Gold AMR:
(t / temu
      :ARG0 (a / aku)
      :ARG1 (o / orang
            :mod (k / kawan
                  :mod (l / lama)))
      :location (b / bandung))

----------------------------------------------------------------------------------------------------------------
Di Bandung, aku bertemu dengan seorang kawan lama
Label salah:  ('temu_1', 'kawan_1', 'obl', 256)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
999,256,1,bandung_1,1,di_1,0,0,GPE,O,NNP,case,IN,mod
1000,256,2,temu_1,3,bandung_1,1,1,O,GPE,VB,obl,NNP,location
1001,256,3,temu_1,3,aku_1,2,1,O,O,VB,nsubj,VB,ARG0
1002,256,5,kawan_1,6,dengan_1,4,0,O,O,NN,case,IN,mod
1003,256,6,kawan_1,6,orang_1,5,0,O,O,NN,det,NND,mod
1004,256,7,temu_1,3,kawan_1,6,1,O,O,VB,obl,NN,ARG1
1005,256,8,kawan_1,6,lama_1,7,0,O,O,NN,amod,JJ,mod


Gold AMR:
(t / temu
      :ARG0 (a / aku)
      :ARG1 (o / orang
            :mod (k / kawan
                  :mod (l / lama)))
      :location (b / bandung))

----------------------------------------------------------------------------------------------------------------
Sepulang dari kantor, aku mampir ke rumah makan
Label salah:  ('kantor_1', 'dari_1', 'case', 257)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1006,257,1,mampir_1,4,pulang_1,0,1,O,O,NN,nsubj,NN,time
1007,257,2,kantor_1,2,dari_1,1,0,O,O,NN,case,IN,mod
1008,257,3,pulang_1,0,kantor_1,2,0,O,O,NN,nmod,NN,mod
1009,257,4,mampir_1,4,aku_1,3,1,O,O,NN,nsubj,NN,ARG0
1010,257,6,rumah_1,6,ke_1,5,0,O,O,NN,case,IN,mod
1011,257,7,mampir_1,4,rumah_1,6,1,O,O,NN,obl,NN,location
1012,257,8,rumah_1,6,makan_1,7,0,O,O,NN,compound,VB,mod


Gold AMR:
(m1 / mampir
      :ARG0 (a / aku)
      :location (r / rumah
            :mod (m2 / makan))
      :time (p / pulang
            :mod (d / dari
                  :mod (k / kantor))))

----------------------------------------------------------------------------------------------------------------
Sepulang dari kantor, aku mampir ke rumah makan
Label salah:  ('pulang_1', 'kantor_1', 'nmod', 257)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1006,257,1,mampir_1,4,pulang_1,0,1,O,O,NN,nsubj,NN,time
1007,257,2,kantor_1,2,dari_1,1,0,O,O,NN,case,IN,mod
1008,257,3,pulang_1,0,kantor_1,2,0,O,O,NN,nmod,NN,mod
1009,257,4,mampir_1,4,aku_1,3,1,O,O,NN,nsubj,NN,ARG0
1010,257,6,rumah_1,6,ke_1,5,0,O,O,NN,case,IN,mod
1011,257,7,mampir_1,4,rumah_1,6,1,O,O,NN,obl,NN,location
1012,257,8,rumah_1,6,makan_1,7,0,O,O,NN,compound,VB,mod


Gold AMR:
(m1 / mampir
      :ARG0 (a / aku)
      :location (r / rumah
            :mod (m2 / makan))
      :time (p / pulang
            :mod (d / dari
                  :mod (k / kantor))))

----------------------------------------------------------------------------------------------------------------
Sepulang dari kantor, aku mampir ke rumah makan
Label salah:  ('rumah_1', 'ke_1', 'case', 257)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1006,257,1,mampir_1,4,pulang_1,0,1,O,O,NN,nsubj,NN,time
1007,257,2,kantor_1,2,dari_1,1,0,O,O,NN,case,IN,mod
1008,257,3,pulang_1,0,kantor_1,2,0,O,O,NN,nmod,NN,mod
1009,257,4,mampir_1,4,aku_1,3,1,O,O,NN,nsubj,NN,ARG0
1010,257,6,rumah_1,6,ke_1,5,0,O,O,NN,case,IN,mod
1011,257,7,mampir_1,4,rumah_1,6,1,O,O,NN,obl,NN,location
1012,257,8,rumah_1,6,makan_1,7,0,O,O,NN,compound,VB,mod


Gold AMR:
(m1 / mampir
      :ARG0 (a / aku)
      :location (r / rumah
            :mod (m2 / makan))
      :time (p / pulang
            :mod (d / dari
                  :mod (k / kantor))))

----------------------------------------------------------------------------------------------------------------
Kami naik kereta api dari Stasiun Gambir
Label salah:  ('stasiun_1', 'dari_1', 'case', 258)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1013,258,1,naik_1,1,kami_1,0,1,O,O,VB,nsubj,PRP,ARG0
1014,258,3,naik_1,1,kereta_1,2,1,O,O,VB,obj,NN,ARG1
1015,258,4,kereta_1,2,api_1,3,0,O,O,NN,compound,NN,mod
1016,258,5,stasiun_1,5,dari_1,4,0,LOC,O,NNP,case,IN,mod
1017,258,6,naik_1,1,stasiun_1,5,1,O,LOC,VB,obl,NNP,location
1018,258,7,stasiun_1,5,gambir_1,6,0,LOC,O,NNP,flat,NNP,name


Gold AMR:
(n / naik
      :ARG0 (k1 / kami)
      :ARG1 (k2 / kereta
            :mod (a / api))
      :location (s / stasiun
            :name (g / gambir)))

----------------------------------------------------------------------------------------------------------------
Dodol merupakan makanan khas dari Garut
Label salah:  ('garut_1', 'dari_1', 'case', 259)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1019,259,1,rupa_1,1,dodol_1,0,1,O,PER,VB,nsubj,NNP,ARG0
1020,259,3,rupa_1,1,makan_1,2,1,O,O,VB,obj,NN,ARG1
1021,259,4,makan_1,2,khas_1,3,0,O,O,NN,amod,VB,mod
1022,259,5,garut_1,5,dari_1,4,0,GPE,O,NNP,case,IN,mod
1023,259,6,makan_1,2,garut_1,5,0,O,GPE,NN,nmod,NNP,mod


Gold AMR:
(r / rupa
      :ARG0 (d / dodol)
      :ARG1 (m / makan
            :mod (k / khas
                  :location (g / garut))))

----------------------------------------------------------------------------------------------------------------
Dodol merupakan makanan khas dari Garut
Label salah:  ('makan_1', 'garut_1', 'nmod', 259)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1019,259,1,rupa_1,1,dodol_1,0,1,O,PER,VB,nsubj,NNP,ARG0
1020,259,3,rupa_1,1,makan_1,2,1,O,O,VB,obj,NN,ARG1
1021,259,4,makan_1,2,khas_1,3,0,O,O,NN,amod,VB,mod
1022,259,5,garut_1,5,dari_1,4,0,GPE,O,NNP,case,IN,mod
1023,259,6,makan_1,2,garut_1,5,0,O,GPE,NN,nmod,NNP,mod


Gold AMR:
(r / rupa
      :ARG0 (d / dodol)
      :ARG1 (m / makan
            :mod (k / khas
                  :location (g / garut))))

----------------------------------------------------------------------------------------------------------------
Banyak wisatawan mancanegara yang berwisata ke Pulau Bali
Label salah:  ('wisatawan_1', 'banyak_1', 'det', 260)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1024,260,1,wisatawan_1,1,banyak_1,0,1,O,O,NN,det,CD,mod
1025,260,3,wisatawan_1,1,mancanegara_1,2,1,O,O,NN,amod,NN,mod
1026,260,5,wisatawan_1,1,wisata_1,4,1,O,O,NN,acl,VB,mod
1027,260,6,pulau_1,6,ke_1,5,0,LOC,O,NNP,case,IN,mod
1028,260,7,wisata_1,4,pulau_1,6,0,O,LOC,VB,obl,NNP,location
1029,260,8,pulau_1,6,bal_1,7,0,LOC,O,NNP,flat,NNP,mod


Gold AMR:
(w1 / wisata
      :ARG0 (b1 / banyak
            :mod (w2 / wisata
                  :mod (m / mancanegara)))
      :location (p / pulau
            :mod (b2 / bali)))

----------------------------------------------------------------------------------------------------------------
Banyak wisatawan mancanegara yang berwisata ke Pulau Bali
Label salah:  ('wisatawan_1', 'mancanegara_1', 'amod', 260)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1024,260,1,wisatawan_1,1,banyak_1,0,1,O,O,NN,det,CD,mod
1025,260,3,wisatawan_1,1,mancanegara_1,2,1,O,O,NN,amod,NN,mod
1026,260,5,wisatawan_1,1,wisata_1,4,1,O,O,NN,acl,VB,mod
1027,260,6,pulau_1,6,ke_1,5,0,LOC,O,NNP,case,IN,mod
1028,260,7,wisata_1,4,pulau_1,6,0,O,LOC,VB,obl,NNP,location
1029,260,8,pulau_1,6,bal_1,7,0,LOC,O,NNP,flat,NNP,mod


Gold AMR:
(w1 / wisata
      :ARG0 (b1 / banyak
            :mod (w2 / wisata
                  :mod (m / mancanegara)))
      :location (p / pulau
            :mod (b2 / bali)))

----------------------------------------------------------------------------------------------------------------
Banyak wisatawan mancanegara yang berwisata ke Pulau Bali
Label salah:  ('wisatawan_1', 'wisata_1', 'acl', 260)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1024,260,1,wisatawan_1,1,banyak_1,0,1,O,O,NN,det,CD,mod
1025,260,3,wisatawan_1,1,mancanegara_1,2,1,O,O,NN,amod,NN,mod
1026,260,5,wisatawan_1,1,wisata_1,4,1,O,O,NN,acl,VB,mod
1027,260,6,pulau_1,6,ke_1,5,0,LOC,O,NNP,case,IN,mod
1028,260,7,wisata_1,4,pulau_1,6,0,O,LOC,VB,obl,NNP,location
1029,260,8,pulau_1,6,bal_1,7,0,LOC,O,NNP,flat,NNP,mod


Gold AMR:
(w1 / wisata
      :ARG0 (b1 / banyak
            :mod (w2 / wisata
                  :mod (m / mancanegara)))
      :location (p / pulau
            :mod (b2 / bali)))

----------------------------------------------------------------------------------------------------------------
Banyak wisatawan mancanegara yang berwisata ke Pulau Bali
Label salah:  ('pulau_1', 'ke_1', 'case', 260)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1024,260,1,wisatawan_1,1,banyak_1,0,1,O,O,NN,det,CD,mod
1025,260,3,wisatawan_1,1,mancanegara_1,2,1,O,O,NN,amod,NN,mod
1026,260,5,wisatawan_1,1,wisata_1,4,1,O,O,NN,acl,VB,mod
1027,260,6,pulau_1,6,ke_1,5,0,LOC,O,NNP,case,IN,mod
1028,260,7,wisata_1,4,pulau_1,6,0,O,LOC,VB,obl,NNP,location
1029,260,8,pulau_1,6,bal_1,7,0,LOC,O,NNP,flat,NNP,mod


Gold AMR:
(w1 / wisata
      :ARG0 (b1 / banyak
            :mod (w2 / wisata
                  :mod (m / mancanegara)))
      :location (p / pulau
            :mod (b2 / bali)))

----------------------------------------------------------------------------------------------------------------
Banyak wisatawan mancanegara yang berwisata ke Pulau Bali
Label salah:  ('pulau_1', 'bal_1', 'flat', 260)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1024,260,1,wisatawan_1,1,banyak_1,0,1,O,O,NN,det,CD,mod
1025,260,3,wisatawan_1,1,mancanegara_1,2,1,O,O,NN,amod,NN,mod
1026,260,5,wisatawan_1,1,wisata_1,4,1,O,O,NN,acl,VB,mod
1027,260,6,pulau_1,6,ke_1,5,0,LOC,O,NNP,case,IN,mod
1028,260,7,wisata_1,4,pulau_1,6,0,O,LOC,VB,obl,NNP,location
1029,260,8,pulau_1,6,bal_1,7,0,LOC,O,NNP,flat,NNP,mod


Gold AMR:
(w1 / wisata
      :ARG0 (b1 / banyak
            :mod (w2 / wisata
                  :mod (m / mancanegara)))
      :location (p / pulau
            :mod (b2 / bali)))

----------------------------------------------------------------------------------------------------------------
Di rumahnya terdapat banyak koleksi mainan zaman dulu
Label salah:  ('rumah_1', 'di_1', 'case', 261)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1030,261,1,rumah_1,1,di_1,0,0,O,O,RB,case,IN,mod
1031,261,2,dapat_1,2,rumah_1,1,1,O,O,VB,obl,RB,location
1032,261,4,koleksi_1,4,banyak_1,3,0,O,O,NN,advmod,CD,mod
1033,261,5,dapat_1,2,koleksi_1,4,1,O,O,VB,obj,NN,ARG1
1034,261,6,koleksi_1,4,main_1,5,0,O,O,NN,compound,NN,mod
1035,261,7,main_1,5,zaman_1,6,0,O,O,NN,compound,NN,mod
1036,261,8,dapat_1,2,dulu_1,7,1,O,O,VB,obl,JJ,ARG1


Gold AMR:
(d1 / dapat
      :ARG1 (b / banyak
            :mod (k / koleksi
                  :mod (m / main
                        :mod (z / zaman
                              :mod (d2 / dulu)))))
      :location (r / rumah))

----------------------------------------------------------------------------------------------------------------
Di rumahnya terdapat banyak koleksi mainan zaman dulu
Label salah:  ('koleksi_1', 'banyak_1', 'advmod', 261)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1030,261,1,rumah_1,1,di_1,0,0,O,O,RB,case,IN,mod
1031,261,2,dapat_1,2,rumah_1,1,1,O,O,VB,obl,RB,location
1032,261,4,koleksi_1,4,banyak_1,3,0,O,O,NN,advmod,CD,mod
1033,261,5,dapat_1,2,koleksi_1,4,1,O,O,VB,obj,NN,ARG1
1034,261,6,koleksi_1,4,main_1,5,0,O,O,NN,compound,NN,mod
1035,261,7,main_1,5,zaman_1,6,0,O,O,NN,compound,NN,mod
1036,261,8,dapat_1,2,dulu_1,7,1,O,O,VB,obl,JJ,ARG1


Gold AMR:
(d1 / dapat
      :ARG1 (b / banyak
            :mod (k / koleksi
                  :mod (m / main
                        :mod (z / zaman
                              :mod (d2 / dulu)))))
      :location (r / rumah))

----------------------------------------------------------------------------------------------------------------
Di rumahnya terdapat banyak koleksi mainan zaman dulu
Label salah:  ('dapat_1', 'koleksi_1', 'obj', 261)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1030,261,1,rumah_1,1,di_1,0,0,O,O,RB,case,IN,mod
1031,261,2,dapat_1,2,rumah_1,1,1,O,O,VB,obl,RB,location
1032,261,4,koleksi_1,4,banyak_1,3,0,O,O,NN,advmod,CD,mod
1033,261,5,dapat_1,2,koleksi_1,4,1,O,O,VB,obj,NN,ARG1
1034,261,6,koleksi_1,4,main_1,5,0,O,O,NN,compound,NN,mod
1035,261,7,main_1,5,zaman_1,6,0,O,O,NN,compound,NN,mod
1036,261,8,dapat_1,2,dulu_1,7,1,O,O,VB,obl,JJ,ARG1


Gold AMR:
(d1 / dapat
      :ARG1 (b / banyak
            :mod (k / koleksi
                  :mod (m / main
                        :mod (z / zaman
                              :mod (d2 / dulu)))))
      :location (r / rumah))

----------------------------------------------------------------------------------------------------------------
Di rumahnya terdapat banyak koleksi mainan zaman dulu
Label salah:  ('dapat_1', 'dulu_1', 'obl', 261)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1030,261,1,rumah_1,1,di_1,0,0,O,O,RB,case,IN,mod
1031,261,2,dapat_1,2,rumah_1,1,1,O,O,VB,obl,RB,location
1032,261,4,koleksi_1,4,banyak_1,3,0,O,O,NN,advmod,CD,mod
1033,261,5,dapat_1,2,koleksi_1,4,1,O,O,VB,obj,NN,ARG1
1034,261,6,koleksi_1,4,main_1,5,0,O,O,NN,compound,NN,mod
1035,261,7,main_1,5,zaman_1,6,0,O,O,NN,compound,NN,mod
1036,261,8,dapat_1,2,dulu_1,7,1,O,O,VB,obl,JJ,ARG1


Gold AMR:
(d1 / dapat
      :ARG1 (b / banyak
            :mod (k / koleksi
                  :mod (m / main
                        :mod (z / zaman
                              :mod (d2 / dulu)))))
      :location (r / rumah))

----------------------------------------------------------------------------------------------------------------
Profesor Manaf merupakan jebolan dari Universitas Connecticut
Label salah:  ('universitas_1', 'dari_1', 'case', 262)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1037,262,1,rupa_1,2,profesor_1,0,1,O,O,VB,nsubj,NNP,ARG0
1038,262,2,profesor_1,0,manaf_1,1,0,O,PER,NNP,flat,NNP,name
1039,262,4,rupa_1,2,jebol_1,3,1,O,O,VB,obj,NN,ARG1
1040,262,5,universitas_1,5,dari_1,4,0,ORG,O,NNP,case,IN,mod
1041,262,6,jebol_1,3,universitas_1,5,0,O,ORG,NN,nmod,NNP,location
1042,262,7,universitas_1,5,connecticut_1,6,0,ORG,LOC,NNP,flat,NNP,location


Gold AMR:
(r / rupa
      :ARG0 (p / profesor
            :name (m / manaf))
      :ARG1 (j / jebol
            :location (u / universitas
                  :location (c / connecticut))))

----------------------------------------------------------------------------------------------------------------
Kita mencetak gol ke gawang lawan
Label salah:  ('gawang_1', 'ke_1', 'case', 269)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1063,269,1,cetak_1,1,kita_1,0,1,O,O,VB,nsubj,PRP,ARG0
1064,269,3,cetak_1,1,gol_1,2,1,O,O,VB,obj,NN,ARG1
1065,269,4,gawang_1,4,ke_1,3,0,O,O,NN,case,IN,mod
1066,269,5,gol_1,2,gawang_1,4,0,O,O,NN,nmod,NN,mod
1067,269,6,gawang_1,4,lawan_1,5,0,O,O,NN,compound,NN,mod


Gold AMR:
(c / cetak
      :ARG0 (k / kita)
      :ARG1 (g1 / gol)
      :location (g2 / gawang
            :mod (l / lawan)))

----------------------------------------------------------------------------------------------------------------
Kita mencetak gol ke gawang lawan
Label salah:  ('gol_1', 'gawang_1', 'nmod', 269)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1063,269,1,cetak_1,1,kita_1,0,1,O,O,VB,nsubj,PRP,ARG0
1064,269,3,cetak_1,1,gol_1,2,1,O,O,VB,obj,NN,ARG1
1065,269,4,gawang_1,4,ke_1,3,0,O,O,NN,case,IN,mod
1066,269,5,gol_1,2,gawang_1,4,0,O,O,NN,nmod,NN,mod
1067,269,6,gawang_1,4,lawan_1,5,0,O,O,NN,compound,NN,mod


Gold AMR:
(c / cetak
      :ARG0 (k / kita)
      :ARG1 (g1 / gol)
      :location (g2 / gawang
            :mod (l / lawan)))

----------------------------------------------------------------------------------------------------------------
gol dicetak kita ke gawang lawan
Label salah:  ('gawang_1', 'ke_1', 'case', 270)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1068,270,1,cetak_1,1,gol_1,0,1,O,O,NN,nsubj:pass,NN,ARG1
1069,270,3,cetak_1,1,kita_1,2,1,O,O,NN,obj,PRP,ARG0
1070,270,4,gawang_1,4,ke_1,3,0,O,O,NN,case,IN,mod
1071,270,5,cetak_1,1,gawang_1,4,1,O,O,NN,obl,NN,location
1072,270,6,gawang_1,4,lawan_1,5,0,O,O,NN,compound,NN,mod


Gold AMR:
(c / cetak
      :ARG0 (k / kita)
      :ARG1 (g1 / gol)
      :location (g2 / gawang
            :mod (l / lawan)))

----------------------------------------------------------------------------------------------------------------
Mbak Shinta memintal benang wol
Label salah:  ('mintal_1', 'mbak_1', 'nsubj', 271)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1073,271,1,mintal_1,2,mbak_1,0,1,O,ORG,NN,nsubj,NNP,ARG0
1074,271,2,mbak_1,0,shinta_1,1,0,ORG,PER,NNP,flat,NNP,name
1075,271,4,mintal_1,2,benang_1,3,1,O,O,NN,obj,NN,ARG1
1076,271,5,benang_1,3,wol_1,4,0,O,O,NN,compound,JJ,mod


Gold AMR:
(p / pintal
      :ARG0 (m / mbak
            :name (s / shinta))
      :ARG1 (b / benang
            :mod (w / wol)))

----------------------------------------------------------------------------------------------------------------
Mbak Shinta memintal benang wol
Label salah:  ('mintal_1', 'benang_1', 'obj', 271)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1073,271,1,mintal_1,2,mbak_1,0,1,O,ORG,NN,nsubj,NNP,ARG0
1074,271,2,mbak_1,0,shinta_1,1,0,ORG,PER,NNP,flat,NNP,name
1075,271,4,mintal_1,2,benang_1,3,1,O,O,NN,obj,NN,ARG1
1076,271,5,benang_1,3,wol_1,4,0,O,O,NN,compound,JJ,mod


Gold AMR:
(p / pintal
      :ARG0 (m / mbak
            :name (s / shinta))
      :ARG1 (b / benang
            :mod (w / wol)))

----------------------------------------------------------------------------------------------------------------
Saya mengetik cerita
Label salah:  ('etik_1', 'saya_1', 'nsubj', 272)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1077,272,1,etik_1,1,saya_1,0,1,O,O,NN,nsubj,PRP,ARG0
1078,272,3,etik_1,1,cerita_1,2,1,O,O,NN,obj,PRP,ARG1


Gold AMR:
(k / ketik
      :ARG0 (s / saya)
      :ARG1 (c / cerita))

----------------------------------------------------------------------------------------------------------------
Saya mengetik cerita
Label salah:  ('etik_1', 'cerita_1', 'obj', 272)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1077,272,1,etik_1,1,saya_1,0,1,O,O,NN,nsubj,PRP,ARG0
1078,272,3,etik_1,1,cerita_1,2,1,O,O,NN,obj,PRP,ARG1


Gold AMR:
(k / ketik
      :ARG0 (s / saya)
      :ARG1 (c / cerita))

----------------------------------------------------------------------------------------------------------------
Balonku ada lima
Label salah:  ('ada_1', 'balonku_1', 'nsubj', 274)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1081,274,1,ada_1,1,balonku_1,0,1,O,PER,VB,nsubj,NNP,ARG0
1082,274,3,ada_1,1,lima_1,2,1,O,CRD,VB,nummod,CD,mod


Gold AMR:
(a / ada
      :ARG1 (b / balon)
      :mod (l / lima))

----------------------------------------------------------------------------------------------------------------
Aku ada di depan gedung
Label salah:  ('depan_1', 'di_1', 'case', 275)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1083,275,1,ada_1,1,aku_1,0,1,O,O,VB,nsubj,NNP,ARG0
1084,275,3,depan_1,3,di_1,2,0,O,O,NN,case,IN,mod
1085,275,4,ada_1,1,depan_1,3,1,O,O,VB,obl,NN,location
1086,275,5,depan_1,3,gedung_1,4,0,O,O,NN,nmod,NN,mod


Gold AMR:
(a1 / ada
      :ARG0 (a2 / aku)
      :location (d / depan
            :mod (g / gedung)))

----------------------------------------------------------------------------------------------------------------
Kita kehausan
Label salah:  ('kita_1', 'haus_1', 'acl', 276)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1087,276,2,kita_1,0,haus_1,1,1,O,O,PRP,acl,NN,mod


Gold AMR:
(h / haus
      :ARG0 (k / kita))

----------------------------------------------------------------------------------------------------------------
Ibu Sukiyem menjual kue di pasar
Label salah:  ('pasar_1', 'di_1', 'case', 279)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1094,279,1,jual_1,2,ibu_1,0,1,O,O,VB,nsubj,NNP,ARG0
1095,279,2,ibu_1,0,sukiyem_1,1,0,O,O,NNP,flat,NNP,name
1096,279,4,jual_1,2,kue_1,3,1,O,O,VB,obj,FW,ARG1
1097,279,5,pasar_1,5,di_1,4,0,O,O,NN,case,IN,mod
1098,279,6,jual_1,2,pasar_1,5,1,O,O,VB,obl,NN,location


Gold AMR:
(j / jual
      :ARG0 (i / ibu
            :name (s / sukiyem))
      :ARG1 (k / kue)
      :location (p / pasar))

----------------------------------------------------------------------------------------------------------------
kue dijual Ibu Sukiyem di pasar
Label salah:  ('pasar_1', 'di_1', 'case', 280)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1099,280,1,jual_1,1,kue_1,0,1,O,O,VB,nsubj:pass,NN,ARG1
1100,280,3,jual_1,1,ibu_1,2,1,O,O,VB,obj,NNP,ARG0
1101,280,4,ibu_1,2,sukiyem_1,3,0,O,LOC,NNP,flat,NNP,name
1102,280,5,pasar_1,5,di_1,4,0,O,O,NN,case,IN,mod
1103,280,6,jual_1,1,pasar_1,5,1,O,O,VB,obl,NN,location


Gold AMR:
(j / jual
      :ARG0 (i / ibu
            :name (s / sukiyem))
      :ARG1 (k / kue)
      :location (p / pasar))

----------------------------------------------------------------------------------------------------------------
Mas Ridho bertanya pada guru
Label salah:  ('guru_1', 'pada_1', 'case', 281)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1104,281,1,tanya_1,2,mas_1,0,1,O,ORG,VB,nsubj,NNP,ARG0
1105,281,2,mas_1,0,ridho_1,1,0,ORG,PER,NNP,flat,NNP,name
1106,281,4,guru_1,4,pada_1,3,0,O,O,NN,case,IN,mod
1107,281,5,tanya_1,2,guru_1,4,1,O,O,VB,obl,NN,ARG1


Gold AMR:
(t / tanya
      :ARG0 (m / mas
            :name (r / ridho))
      :ARG1 (g / guru))

----------------------------------------------------------------------------------------------------------------
Anjing itu menggonggong
Label salah:  ('anjing_1', 'itu_1', 'det', 282)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1108,282,2,anjing_1,0,itu_1,1,1,O,O,NN,det,PR,mod
1109,282,3,anjing_1,0,gonggong_1,2,1,O,O,NN,amod,VB,mod


Gold AMR:
(g / gonggong
      :ARG0 (a / anjing))

----------------------------------------------------------------------------------------------------------------
Anjing itu menggonggong
Label salah:  ('anjing_1', 'gonggong_1', 'amod', 282)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1108,282,2,anjing_1,0,itu_1,1,1,O,O,NN,det,PR,mod
1109,282,3,anjing_1,0,gonggong_1,2,1,O,O,NN,amod,VB,mod


Gold AMR:
(g / gonggong
      :ARG0 (a / anjing))

----------------------------------------------------------------------------------------------------------------
Suara harimau mengaum
Label salah:  ('suara_1', 'harimau_1', 'compound', 283)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1110,283,2,suara_1,0,harimau_1,1,1,O,O,NN,compound,NN,mod
1111,283,3,suara_1,0,aum_1,2,1,O,O,NN,flat,NN,mod


Gold AMR:
(a / aum
      :ARG1 (h / harimau
            :mod (s / suara)))

----------------------------------------------------------------------------------------------------------------
Suara harimau mengaum
Label salah:  ('suara_1', 'aum_1', 'flat', 283)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1110,283,2,suara_1,0,harimau_1,1,1,O,O,NN,compound,NN,mod
1111,283,3,suara_1,0,aum_1,2,1,O,O,NN,flat,NN,mod


Gold AMR:
(a / aum
      :ARG1 (h / harimau
            :mod (s / suara)))

----------------------------------------------------------------------------------------------------------------
Kucing itu berlarian
Label salah:  ('kucing_1', 'itu_1', 'det', 284)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1112,284,2,kucing_1,0,itu_1,1,1,O,O,NN,det,PR,mod
1113,284,3,kucing_1,0,lari_1,2,1,O,O,NN,punct,NN,mod


Gold AMR:
(l / lari
      :ARG0 (k / kucing))

----------------------------------------------------------------------------------------------------------------
Kucing itu berlarian
Label salah:  ('kucing_1', 'lari_1', 'punct', 284)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1112,284,2,kucing_1,0,itu_1,1,1,O,O,NN,det,PR,mod
1113,284,3,kucing_1,0,lari_1,2,1,O,O,NN,punct,NN,mod


Gold AMR:
(l / lari
      :ARG0 (k / kucing))

----------------------------------------------------------------------------------------------------------------
Adik tidur
Label salah:  ('adik_1', 'tidur_1', 'compound', 285)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1114,285,2,adik_1,0,tidur_1,1,1,O,O,NN,compound,VB,mod


Gold AMR:
(t / tidur
      :ARG0 (a / adik))

----------------------------------------------------------------------------------------------------------------
Ibu Rina memasak di dapur
Label salah:  ('dapur_1', 'di_1', 'case', 286)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1115,286,1,masak_1,2,ibu_1,0,1,O,O,VB,nsubj,NNP,ARG0
1116,286,2,ibu_1,0,rina_1,1,0,O,PER,NNP,flat,NNP,name
1117,286,4,dapur_1,4,di_1,3,0,O,O,NN,case,IN,mod
1118,286,5,masak_1,2,dapur_1,4,1,O,O,VB,obl,NN,location


Gold AMR:
(m / masak
      :ARG0 (i / ibu
            :name (r / rina))
      :location (d / dapur))

----------------------------------------------------------------------------------------------------------------
Ibu Tina pergi ke pasar
Label salah:  ('pasar_1', 'ke_1', 'case', 287)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1119,287,1,pergi_1,2,ibu_1,0,1,O,O,VB,nsubj,NNP,ARG0
1120,287,2,ibu_1,0,tina_1,1,0,O,O,NNP,flat,NNP,name
1121,287,4,pasar_1,4,ke_1,3,0,O,O,NN,case,IN,mod
1122,287,5,pergi_1,2,pasar_1,4,1,O,O,VB,obl,NN,location


Gold AMR:
(p1 / pergi
      :ARG0 (i / ibu
            :name (t / tina))
      :location (p2 / pasar))

----------------------------------------------------------------------------------------------------------------
Jendela ditutup Pak Toni
Label salah:  ('pak_1', 'ton_1', 'flat', 291)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1132,291,1,tutup_1,1,jendela_1,0,1,O,PER,VB,nsubj:pass,NN,ARG1
1133,291,3,tutup_1,1,pak_1,2,1,O,LOC,VB,obj,NNP,ARG0
1134,291,4,pak_1,2,ton_1,3,0,LOC,O,NNP,flat,NNP,mod


Gold AMR:
(t1 / tutup
      :ARG0 (p / pak
            :name (t2 / toni))
      :ARG1 (j / jendela))

----------------------------------------------------------------------------------------------------------------
Bu Ayu mendesah
Label salah:  ('bu_1', 'desah_1', 'flat', 292)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1135,292,2,bu_1,0,ayu_1,1,1,ORG,PER,NNP,flat,NNP,name
1136,292,3,bu_1,0,desah_1,2,1,ORG,O,NNP,flat,VB,mod


Gold AMR:
(d / desah
      :ARG0 (b / bu
            :name (a / ayu)))

----------------------------------------------------------------------------------------------------------------
Susi menyiram tanaman
Label salah:  ('siram_1', 'susi_1', 'nsubj', 293)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1137,293,1,siram_1,1,susi_1,0,1,O,PER,NN,nsubj,NN,ARG0
1138,293,3,siram_1,1,tanam_1,2,1,O,O,NN,obj,NN,ARG1


Gold AMR:
(s1 / siram
      :ARG0 (b / bu
            :name (s2 / susi))
      :ARG1 (t / tanam))

----------------------------------------------------------------------------------------------------------------
Tanaman disiram susi
Label salah:  ('siram_1', 'sus_1', 'obj', 294)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1139,294,1,siram_1,1,tanam_1,0,1,O,O,NN,nsubj:pass,NN,ARG1
1140,294,3,siram_1,1,sus_1,2,1,O,O,NN,obj,NN,ARG1


Gold AMR:
(s1 / siram
      :ARG0 (b / bu
            :name (s2 / susi))
      :ARG1 (t / tanam))

----------------------------------------------------------------------------------------------------------------
Dia tergoda
Label salah:  ('dia_1', 'goda_1', 'punct', 295)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1141,295,2,dia_1,0,goda_1,1,1,O,O,PRP,punct,VB,mod


Gold AMR:
(g / goda
      :ARG0 (d / dia))

----------------------------------------------------------------------------------------------------------------
Ibu Dodi bersimpuh
Label salah:  ('ibu_1', 'simpuh_1', 'flat', 297)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1143,297,2,ibu_1,0,dodi_1,1,1,O,O,NNP,flat,NNP,name
1144,297,3,ibu_1,0,simpuh_1,2,1,O,O,NNP,flat,VB,mod


Gold AMR:
(s / simpuh
      :ARG0 (i / ibu
            :name (d / dodi)))

----------------------------------------------------------------------------------------------------------------
Kami bersujud
Label salah:  ('kami_1', 'sujud_1', 'flat', 298)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1145,298,2,kami_1,0,sujud_1,1,1,O,O,PRP,flat,VB,mod


Gold AMR:
(s / sujud
      :ARG0 (k / kami))

----------------------------------------------------------------------------------------------------------------
Balita merangkak
Label salah:  ('balita_1', 'rangkak_1', 'flat', 299)
Hasil Dependency Parser: 


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1146,299,2,balita_1,0,rangkak_1,1,1,O,O,PRP,flat,VB,mod


Gold AMR:
(r / rangkak
      :ARG0 (b / balita))

----------------------------------------------------------------------------------------------------------------


## Test 

#### Test data

In [64]:
test_amr_data = amr_dataset['test']
test_pair_labels = create_pair_labels(test_amr_data)

In [65]:
test_labels, unk_pairs = create_labels(test_pair_labels,filtered_test_sentence_dataset, change_unk_to_mod=True)
filtered_test_dependency_feature_dict = filtered_test_sentence_dataset.to_dict()
print(len(test_labels))
filtered_test_dependency_feature_dict['label'] = pd.Series(test_labels)
filtered_test_dependency_feature_df = pd.DataFrame(filtered_test_dependency_feature_dict)
print(len(filtered_test_dependency_feature_df))
display(filtered_test_dependency_feature_df.tail())
filtered_test_dependency_feature_df.to_csv(LABELED_TEST_DEPENDENCY_FEATURES_FILEPATH, index=None)

Found pairs:  799
AMR Pair labels:  1089
Dependency Parser Pair labels:  1265
Not found:  466
Pair Precision:  0.6316205533596838
Pair Recall:  0.7337006427915519
Pair F1:  0.6788445199660154
1265
1265


Unnamed: 0,sentence_id,sequence,parent,parent_position,child,child_position,is_root,parent_ner,child_ner,parent_pos,dependency_role,child_pos,label
1260,305,5,minggu_1,3,depan_1,4,0,CRD,O,NN,amod,NN,mod
1261,306,1,pilih_1,1,pasang_1,0,1,O,O,VB,nsubj:pass,NN,ARG1
1262,306,3,pilih_1,1,kakak_1,2,1,O,O,VB,obj,VB,ARG0
1263,306,4,kakak_1,2,minggu_1,3,0,O,CRD,VB,compound,NN,mod
1264,306,5,minggu_1,3,depan_1,4,0,CRD,O,NN,amod,NN,mod


### Test Berita

In [117]:
for filename in TEST_BERITA_FILE_NAMES:
    print(filename)
    berita_dependency_feature_df, amr_df, all_sentences, berita_amrs = create_dependency_parser_feature_df(TEST_BERITA_DIR+filename, ANNOTATOR_PARAMS)
    filtered_berita_dependency_features_df = filter_dataset(berita_dependency_feature_df, mode=FILTER_TYPE)
    
    # Berita pair label
    data_params = dict(
        data_dir=TEST_BERITA_DIR,
        train_data="{}.features".format(filename),
        test_data="{}.features".format(filename),
        dev_data="{}.features".format(filename),
        data_type="AMR"
    )
    
    berita_amr_data = dataset_from_params(data_params)['test']
    berita_pair_labels = create_pair_labels(berita_amr_data)

    berita_labels, unk_pairs = create_labels(berita_pair_labels, filtered_berita_dependency_features_df, change_unk_to_mod=True)
    filtered_berita_dependency_features_df.insert(len(filtered_berita_dependency_features_df.columns), "label", berita_labels)
    
    topic = filename.split('.')[0]
    filtered_berita_dependency_features_df.to_csv(TEST_BERITA_DIR+"labeled_df_"+topic+'.csv', index=None)

b-salah-darat.txt
Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tokenizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tagger.pt', 'pretrain_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd.pretrain.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_lemmatizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_parser.pt', 'pretrain_path': '/home/adylanrff/sta

0it [00:00, ?it/s][2020-04-30 13:59:12,067 INFO] Reading instances from lines in file at: data/raw/test/b-salah-darat.txt.features
32it [00:00, 511.20it/s]
0it [00:00, ?it/s][2020-04-30 13:59:12,131 INFO] Reading instances from lines in file at: data/raw/test/b-salah-darat.txt.features
32it [00:00, 467.84it/s]
0it [00:00, ?it/s][2020-04-30 13:59:12,201 INFO] Reading instances from lines in file at: data/raw/test/b-salah-darat.txt.features


Building train datasets ...
False
Building dev datasets ...
False
Building test datasets ...
False


32it [00:00, 471.93it/s]


Found pairs:  292
AMR Pair labels:  461
Dependency Parser Pair labels:  442
Not found:  150
Pair Precision:  0.6606334841628959
Pair Recall:  0.6334056399132321
Pair F1:  0.6467331118493909
c-gedung-roboh.txt
Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tokenizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tagger.pt', 'pretrain_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd.pretrain.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_lemmatizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemm

0it [00:00, ?it/s][2020-04-30 13:59:18,664 INFO] Reading instances from lines in file at: data/raw/test/c-gedung-roboh.txt.features
29it [00:00, 581.03it/s]
0it [00:00, ?it/s][2020-04-30 13:59:18,715 INFO] Reading instances from lines in file at: data/raw/test/c-gedung-roboh.txt.features
29it [00:00, 592.54it/s]
0it [00:00, ?it/s][2020-04-30 13:59:18,765 INFO] Reading instances from lines in file at: data/raw/test/c-gedung-roboh.txt.features
29it [00:00, 595.38it/s]

Building train datasets ...
False
Building dev datasets ...
False
Building test datasets ...
False





Found pairs:  222
AMR Pair labels:  395
Dependency Parser Pair labels:  370
Not found:  148
Pair Precision:  0.6
Pair Recall:  0.5620253164556962
Pair F1:  0.580392156862745
d-indo-fuji.txt
Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tokenizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tagger.pt', 'pretrain_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd.pretrain.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_lemmatizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit cl

0it [00:00, ?it/s][2020-04-30 13:59:26,183 INFO] Reading instances from lines in file at: data/raw/test/d-indo-fuji.txt.features
27it [00:00, 420.03it/s]
0it [00:00, ?it/s][2020-04-30 13:59:26,248 INFO] Reading instances from lines in file at: data/raw/test/d-indo-fuji.txt.features
27it [00:00, 457.16it/s]
0it [00:00, ?it/s][2020-04-30 13:59:26,308 INFO] Reading instances from lines in file at: data/raw/test/d-indo-fuji.txt.features
27it [00:00, 472.95it/s]

Building train datasets ...
False
Building dev datasets ...
False
Building test datasets ...
False





Found pairs:  287
AMR Pair labels:  528
Dependency Parser Pair labels:  517
Not found:  230
Pair Precision:  0.5551257253384912
Pair Recall:  0.5435606060606061
Pair F1:  0.5492822966507177
f-bunuh-diri.txt
Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tokenizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tagger.pt', 'pretrain_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd.pretrain.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_lemmatizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmat

0it [00:00, ?it/s][2020-04-30 13:59:32,618 INFO] Reading instances from lines in file at: data/raw/test/f-bunuh-diri.txt.features
23it [00:00, 590.71it/s]
0it [00:00, ?it/s][2020-04-30 13:59:32,659 INFO] Reading instances from lines in file at: data/raw/test/f-bunuh-diri.txt.features
23it [00:00, 508.59it/s]
0it [00:00, ?it/s][2020-04-30 13:59:32,705 INFO] Reading instances from lines in file at: data/raw/test/f-bunuh-diri.txt.features
23it [00:00, 626.97it/s]

Building train datasets ...
False
Building dev datasets ...
False
Building test datasets ...
False





Found pairs:  183
AMR Pair labels:  298
Dependency Parser Pair labels:  298
Not found:  115
Pair Precision:  0.6140939597315436
Pair Recall:  0.6140939597315436
Pair F1:  0.6140939597315436
g-gempa-dieng.txt
Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tokenizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_tagger.pt', 'pretrain_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd.pretrain.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/adylanrff/stanfordnlp_resources/id_gsd_models/id_gsd_lemmatizer.pt', 'lang': 'id', 'shorthand': 'id_gsd', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemma

0it [00:00, ?it/s][2020-04-30 13:59:39,260 INFO] Reading instances from lines in file at: data/raw/test/g-gempa-dieng.txt.features
19it [00:00, 603.71it/s]
0it [00:00, ?it/s][2020-04-30 13:59:39,293 INFO] Reading instances from lines in file at: data/raw/test/g-gempa-dieng.txt.features
19it [00:00, 597.40it/s]
0it [00:00, ?it/s][2020-04-30 13:59:39,326 INFO] Reading instances from lines in file at: data/raw/test/g-gempa-dieng.txt.features
19it [00:00, 581.39it/s]


Building train datasets ...
False
Building dev datasets ...
False
Building test datasets ...
False
Found pairs:  144
AMR Pair labels:  267
Dependency Parser Pair labels:  265
Not found:  121
Pair Precision:  0.5433962264150943
Pair Recall:  0.5393258426966292
Pair F1:  0.5413533834586467


In [67]:
test_data = pd.read_csv(TEST_BERITA_DIR+"labeled_df_b-salah-darat.csv")
test_data['child_pos'].value_counts()

NNP    167
NN     125
IN      62
VB      56
CD      31
JJ      19
FW      14
MD      12
PR      12
RB      10
CC       7
PRP      5
NEG      5
WH       1
OD       1
Name: child_pos, dtype: int64

In [48]:
amr_df.head()

Unnamed: 0,sentence_id,amr
0,1,(t / tanam\n :ARG0 (b1 / bu\n ...
1,2,(t / tanam\n :ARG0 (b1 / bu\n ...
2,3,(b1 / berangkat\n :ARG0 (o / om\n ...
3,4,(p / pergi\n :ARG0 (i / ibu\n ...
4,5,(p / pergi\n :ARG0 (k1 / kami\n ...
