In [124]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

In [125]:
factory = StemmerFactory()
stemmer = factory.create_stemmer()

In [126]:
amr_file_paths = [
    "../data/AMR/b-salah-darat/b-salah-darat.amr", 
    "../data/AMR/c-gedung-roboh/c-gedung-roboh.amr",
    "../data/AMR/d-indo-fuji/d-indo-fuji.amr",
    "../data/AMR/f-bunuh-diri/f-bunuh-diri.amr",
    "../data/AMR/g-gempa-dieng/g-gempa-dieng.amr"
]

sentence_file_paths = [
    "../data/AMR/b-salah-darat/b-salah-darat.txt", 
    "../data/AMR/c-gedung-roboh/c-gedung-roboh.txt",
    "../data/AMR/d-indo-fuji/d-indo-fuji.txt",
    "../data/AMR/f-bunuh-diri/f-bunuh-diri.txt",
    "../data/AMR/g-gempa-dieng/g-gempa-dieng.txt"
]

srl_predicates_path = "../data/SRL/indonesia_srl_annotation_predicates.txt"

intersecting_predicates_path = "../data/srl_amr_intersecting_predicates.txt"

In [127]:
class AMR:
    def __init__(self, amr_string):
        self.predicate = amr_string.split('\n')[0]
        self.rest = amr_string.split('\n')[1:]

In [147]:
def get_unique_predicates_from_amr(amrs):
    unique_predicates = set()
    for amr in amrs:
        predicate = amr.predicate.replace('-01','').strip().split('/')[1]
        unique_predicates.add(stemmer.stem(predicate))
    return unique_predicates

def get_srl_predicates_path(name="Richard"):
    return '../data/SRL/indonesia_srl_annotation_predicates_{}.txt'.format(name)

def get_predicates(annotator="Richard"):
    predicates = []
    with open(get_srl_predicates_path(annotator), 'r') as fp:
        line = fp.readline()
        while line:
            predicates.append(line.strip())
            line = fp.readline()
    return predicates

def load_sentences():
    sentences = []
    for file_path in sentence_file_paths:
        with open(file_path, 'r') as fp:
            line = fp.readline()
            while line:
                components = line.split(' ')
                if (len(components) > 1):
                    if (components[1] == '::snt'):
                        sentences.append(line[7:].strip())
                line = fp.readline()
    return predicates

def is_contains(sentence, predicates):
    return any([predicate in sentence for predicate in predicates])

In [129]:
predicate_set = set()
amrs = []

for file_path in amr_file_paths:
    with open(file_path) as fp:
        current_amr = ""
        line = fp.readline()
        while line:
            line_content = line.strip()
            if line_content == '':
                if (current_amr != ''):
                    amrs.append(AMR(current_amr))
                    current_amr = ""
            else:
                current_amr += line_content + '\n'            
            line = fp.readline()
        if (current_amr != ''):
            amrs.append(AMR(current_amr))
            current_amr = ""
            
unique_predicates = get_unique_predicates_from_amr(amrs)

print("Number of AMRs: ", len(amrs))
print("Unique predicates: ", len(unique_predicates))

Number of AMRs:  142
Unique predicates:  69


### Check intersecting predicates with SRL

In [130]:
srl_predicates = get_predicates()

In [131]:
intersecting_predicates = [unique_predicate for unique_predicate in unique_predicates if unique_predicate in srl_predicates]

In [132]:
for intersecting_predicate in intersecting_predicates:
    print(intersecting_predicate)
    
print("\nIntersecting predicates: ", len(intersecting_predicates))
print("Intersecting/All Predicates: {0:.2f}%".format(len(intersecting_predicates)/len(unique_predicates)))

benar
tutur
minta
kembali
buat
naik
berangkat
sambang
alami
laku
lihat
kerja
nyata
terjun
lapor
catat
menandatangani
edar
akhir
ada
tunggu
sebut
tulis
ungkap
tambah
kata
tutup
fokus
diri
beri
timbul
pasti
bahas
blokir
jelas
aku
bagi
putus
imbau
kira
jadi
panggil
yakin
dapat
guna
bawa

Intersecting predicates:  46
Intersecting/All Predicates: 0.67%


### Write intersecting predicates to file

In [133]:
print("Writing {} predicates ".format(len(intersecting_predicates)))
with open(intersecting_predicates_path, 'w') as fp:
    fp.seek(0)
    for predicate in intersecting_predicates:
        fp.writelines(predicate + '\n')
    fp.truncate()

Writing 46 predicates 


### Intersecting Predicate Sentence Analysis

In [134]:
sentences = load_sentences()

In [144]:
intersecting_predicate_sentences = [sentence for sentence in sentences if is_contains(sentence, intersecting_predicates)]
print("Sentence with intersecting predicates: {}".format(len(intersecting_predicate_sentences)))
print("Sentence with intersecting predicates (%): {0:.2f}%".format(len(intersecting_predicate_sentences)/len(amrs)))

Sentence with intersecting predicates: 125
Sentence with intersecting predicates (%): 0.88%


In [146]:
for sentence in intersecting_predicate_sentences:
    print(sentence, end="\n\n")

Kantor Imigrasi Bandara Soekarno-Hatta bersama Kantor Otoritas Bandara Soekarno-Hatta dan PT Angkasa Pura II akan membahas insiden penumpang penerbangan pesawat Lion Air JT 161 yang tiba di Jakarta dari Singapura 10 Mei 2016, yang keluar melalui Terminal 1 Domestik tanpa melalui pemeriksaan Imigrasi.

Kami akan meeting soal ini," ujar Kepala Kantor Imigrasi Soekarno-Hatta Alif Suadi kepada Tempo, Sabtu malam ini, 14 Mei 2016.

Dalam tulisannya itu, anak temannya bernama Natalie berangkat dari Singapura dan menggunakan pesawat Lion Air JT 161, pada pukul 18.50.

Sekretaris Perusahaan PT Angkasa Pura II Agus Haryadi juga mengatakan selaku pengelola Bandara Internasional Soekarno-Hatta, pihaknya akan berkoordinasi dengan Kantor Otoritas Bandara Wilayah I terkait dengan peristiwa ini.

Agus mengatakan petugas Aviation Security Bandara Internasional Soekarno-Hatta yang mengetahui kesalahan prosedur ini lalu mengambil tindakan untuk mengarahkan penumpang ke jalur yang benar, yakni kembali ke