In [1]:
import os, sys

import random
import numpy as np
import pandas as pd

In [2]:
from nltk.corpus import wordnet

def get_synonyms(word):
    
    synonyms = set()
    
    for syn in wordnet.synsets(word, lang="ind"):
        for l in syn.lemmas():
            synonym = l.name().replace("_", " ").replace("-", " ").lower()
            synonym = "".join([char for char in synonym if char in ' qwertyuiopasdfghjklzxcvbnm'])
            synonyms.add(synonym) 
    if word in synonyms:
        synonyms.remove(word)
    
    return list(synonyms)

In [3]:
from nltk.corpus import stopwords
stop_words = []
for w in stopwords.words('indonesian'):
    stop_words.append(w)
print(stop_words)

['ada', 'adalah', 'adanya', 'adapun', 'agak', 'agaknya', 'agar', 'akan', 'akankah', 'akhir', 'akhiri', 'akhirnya', 'aku', 'akulah', 'amat', 'amatlah', 'anda', 'andalah', 'antar', 'antara', 'antaranya', 'apa', 'apaan', 'apabila', 'apakah', 'apalagi', 'apatah', 'artinya', 'asal', 'asalkan', 'atas', 'atau', 'ataukah', 'ataupun', 'awal', 'awalnya', 'bagai', 'bagaikan', 'bagaimana', 'bagaimanakah', 'bagaimanapun', 'bagi', 'bagian', 'bahkan', 'bahwa', 'bahwasanya', 'baik', 'bakal', 'bakalan', 'balik', 'banyak', 'bapak', 'baru', 'bawah', 'beberapa', 'begini', 'beginian', 'beginikah', 'beginilah', 'begitu', 'begitukah', 'begitulah', 'begitupun', 'bekerja', 'belakang', 'belakangan', 'belum', 'belumlah', 'benar', 'benarkah', 'benarlah', 'berada', 'berakhir', 'berakhirlah', 'berakhirnya', 'berapa', 'berapakah', 'berapalah', 'berapapun', 'berarti', 'berawal', 'berbagai', 'berdatangan', 'beri', 'berikan', 'berikut', 'berikutnya', 'berjumlah', 'berkali-kali', 'berkata', 'berkehendak', 'berkeinginan'

In [4]:
def synonym_replacement(words, n):
    
    words = words.split()
    
    new_words = words.copy()
    random_word_list = list(set([word for word in words if word not in stop_words]))
    random.shuffle(random_word_list)
    num_replaced = 0
    
    for random_word in random_word_list:
        synonyms = get_synonyms(random_word)
        
        if len(synonyms) >= 1:
            synonym = random.choice(list(synonyms))
            new_words = [synonym if word == random_word else word for word in new_words]
            num_replaced += 1
        
        if num_replaced >= n: #only replace up to n words
            break

    sentence = ' '.join(new_words)

    return sentence

In [9]:
print(f" Example of Synonym Replacement: {synonym_replacement('halo kawan apa kabarmu',3)}")

 Example of Synonym Replacement: aureole teammate apa kabarmu


In [22]:
for syn in wordnet.synsets("kawan", lang="ind"):
    print(syn)

Synset('flock.n.02')
Synset('covey.n.01')
Synset('friend.n.05')
Synset('acquaintance.n.03')
Synset('ally.n.02')
Synset('associate.n.01')
Synset('bedfellow.n.01')
Synset('brother.n.04')
Synset('buddy.n.01')
Synset('chap.n.01')
Synset('companion.n.01')
Synset('friend.n.01')
Synset('mate.n.08')
Synset('sister.n.03')
Synset('sweetheart.n.01')
Synset('teammate.n.01')


In [11]:
kawan_lemmas = wordnet.lemmas("kawan", lang="ind")
print(kawan_lemmas)

[Lemma('flock.n.02.kawan'), Lemma('covey.n.01.kawan'), Lemma('friend.n.05.kawan'), Lemma('acquaintance.n.03.kawan'), Lemma('ally.n.02.kawan'), Lemma('associate.n.01.kawan'), Lemma('bedfellow.n.01.kawan'), Lemma('brother.n.04.kawan'), Lemma('buddy.n.01.kawan'), Lemma('chap.n.01.kawan'), Lemma('companion.n.01.kawan'), Lemma('friend.n.01.kawan'), Lemma('mate.n.08.kawan'), Lemma('sister.n.03.kawan'), Lemma('sweetheart.n.01.kawan'), Lemma('teammate.n.01.kawan')]


In [19]:
hypernyms = []
for lem in kawan_lemmas:
    hypernyms.append(lem.synset().hypernyms())
    
print(hypernyms)

[[Synset('animal_group.n.01')], [Synset('gathering.n.01')], [Synset('christian.n.01')], [Synset('person.n.01')], [Synset('associate.n.01')], [Synset('peer.n.01')], [Synset('associate.n.01')], [Synset('friend.n.01')], [Synset('friend.n.01')], [Synset('male.n.02')], [Synset('friend.n.01')], [Synset('person.n.01')], [Synset('friend.n.01')], [Synset('member.n.01')], [Synset('lover.n.01')], [Synset('associate.n.01')]]


In [21]:
for hypernym in hypernyms:
    print(hypernym[0].lemmas(lang="ind"))

[]
[Lemma('gathering.n.01.kumpulan'), Lemma('gathering.n.01.perhimpunan'), Lemma('gathering.n.01.persatuan'), Lemma('gathering.n.01.pertemuan'), Lemma('gathering.n.01.rapat'), Lemma('gathering.n.01.rapat_umum')]
[Lemma('christian.n.01.Nasrani')]
[Lemma('person.n.01.individu'), Lemma('person.n.01.insan'), Lemma('person.n.01.manusia'), Lemma('person.n.01.orang'), Lemma('person.n.01.seorang'), Lemma('person.n.01.seseorang'), Lemma('person.n.01.sukma'), Lemma('person.n.01.unik')]
[Lemma('associate.n.01.bersekutu'), Lemma('associate.n.01.kawan'), Lemma('associate.n.01.rekan'), Lemma('associate.n.01.sekutu'), Lemma('associate.n.01.teman'), Lemma('associate.n.01.teman_sejawat'), Lemma('associate.n.01.kolega')]
[Lemma('peer.n.01.sesama'), Lemma('peer.n.01.sama'), Lemma('peer.n.01.setara'), Lemma('peer.n.01.tolok')]
[Lemma('associate.n.01.bersekutu'), Lemma('associate.n.01.kawan'), Lemma('associate.n.01.rekan'), Lemma('associate.n.01.sekutu'), Lemma('associate.n.01.teman'), Lemma('associate.n.0