The purpose of this notebook is to develop a Lexicon for "confidential" as an addition to the features in <a href="http://nelatoolkit.science/">the NELA Toolkit</a> 

In [22]:
# import 
import phisherfolk

import numpy as np
import pandas as pd

# import requisite modules
import nltk
from gensim.models import Word2Vec
from nltk.corpus import wordnet as wn

import itertools  

In [100]:
def get_roots(root_word, level):
    
    level_1 = []
    for syn in wn.synsets(root_word): 
        for le in syn.lemmas(): 
            level_1.append(le.name()) 
    
    level_2 = []
    for l1 in set(level_1):
        for syn in wn.synsets(l1): 
            for le in syn.lemmas(): 
                level_2.append(le.name())
    
    level_3a = []
    for l2 in level_2:
        for syn in wn.synsets(l2):
            level_3a.append(syn)
    
    level_3b = []
    for i, syn in enumerate(level_3a):
        level_3b.append(syn.lemma_names())
        level_3c = ",".join([item for sublist in level_3b for item in sublist])
        level_3c = level_3c.split(",")
        
    if level == 0:
        return root_word
    if level == 1:
        return set(level_1)
    elif level == 2:
        return set(level_2)
    elif level == 3:
        return set(level_3c)

In [101]:
p = get_roots('confidential', 0)
p

'confidential'

In [3]:
# find synonyms of trust
confidential_synonyms = []  
  
for syn in wn.synsets("confidential"): 
    for l in syn.lemmas(): 
        confidential_synonyms.append(l.name()) 
  
print(set(confidential_synonyms)) 

{'confidential', 'secret'}


In [4]:
confidential_synonyms = ['confidential', 'secret']

In [20]:
# Find synonyms of trust synonyms
confidential_synonyms_v2 = []
for cs in confidential_synonyms:
    for syn in wn.synsets(cs): 
        for l in syn.lemmas(): 
            confidential_synonyms_v2.append(l.name()) 
  
print(set(confidential_synonyms_v2)) 
confidential_synonyms_v2 = set(confidential_synonyms_v2)

{'confidential', 'private', 'hidden', 'mystical', 'occult', 'mystery', 'cloak-and-dagger', 'surreptitious', 'mystic', 'arcanum', 'clandestine', 'hugger-mugger', 'underground', 'enigma', 'unavowed', 'hole-and-corner', 'closed_book', 'secret', 'undercover', 'mysterious', 'hush-hush', 'secluded', 'orphic', 'privy'}


In [28]:
z = []
for conf in confidential_synonyms_v2:
    for syn in wn.synsets(conf):
        z.append(syn)
print(z)

[Synset('confidential.s.01'), Synset('confidential.s.02'), Synset('confidential.s.03'), Synset('confidential.s.04'), Synset('private.n.01'), Synset('private.a.01'), Synset('private.s.02'), Synset('individual.s.04'), Synset('secret.s.05'), Synset('hide.v.01'), Synset('hide.v.02'), Synset('shroud.v.01'), Synset('obscure.v.05'), Synset('concealed.s.01'), Synset('hidden.s.02'), Synset('hidden.s.03'), Synset('mystic.a.03'), Synset('mystic.a.02'), Synset('mysterious.s.02'), Synset('supernatural.n.01'), Synset('occult.n.02'), Synset('eclipse.v.02'), Synset('occult.v.02'), Synset('occult.v.03'), Synset('occult.s.01'), Synset('mysterious.s.02'), Synset('mystery.n.01'), Synset('mystery.n.02'), Synset('clandestine.s.01'), Synset('furtive.s.01'), Synset('clandestine.s.01'), Synset('mystic.n.01'), Synset('mysterious.s.02'), Synset('mystic.a.02'), Synset('mystic.a.03'), Synset('secret.n.02'), Synset('clandestine.s.01'), Synset('hugger-mugger.n.01'), Synset('disorderly.s.02'), Synset('clandestine.s.0

In [35]:
o = []
for i, syn in enumerate(z):
    o.append(syn.lemma_names())
print(o)

[['confidential'], ['confidential', 'secret'], ['confidential'], ['confidential'], ['private', 'buck_private', 'common_soldier'], ['private'], ['private'], ['individual', 'private'], ['secret', 'private'], ['hide', 'conceal'], ['hide', 'hide_out'], ['shroud', 'enshroud', 'hide', 'cover'], ['obscure', 'blot_out', 'obliterate', 'veil', 'hide'], ['concealed', 'hidden', 'out_of_sight'], ['hidden', 'secret'], ['hidden', 'obscure'], ['mystic', 'mystical'], ['mystic', 'mystical'], ['mysterious', 'mystic', 'mystical', 'occult', 'secret', 'orphic'], ['supernatural', 'occult'], ['occult', 'occult_arts'], ['eclipse', 'occult'], ['occult'], ['occult'], ['occult'], ['mysterious', 'mystic', 'mystical', 'occult', 'secret', 'orphic'], ['mystery', 'enigma', 'secret', 'closed_book'], ['mystery', 'mystery_story', 'whodunit'], ['clandestine', 'cloak-and-dagger', 'hole-and-corner', 'hugger-mugger', 'hush-hush', 'secret', 'surreptitious', 'undercover', 'underground'], ['furtive', 'sneak', 'sneaky', 'stealth

In [78]:
o_stripped = ",".join([item for sublist in o for item in sublist])
o_stripped = o_stripped.split(",")
o_stripped = set(o_stripped)
o_stripped

{'Orphic',
 'arcanum',
 'bathroom',
 'belowground',
 'blot_out',
 'brain-teaser',
 'buck_private',
 'can',
 'clandestine',
 'cloak-and-dagger',
 'cloistered',
 'closed_book',
 'common_soldier',
 'conceal',
 'concealed',
 'confidential',
 'conundrum',
 'cover',
 'cryptic',
 'cryptical',
 'deep',
 'disorderly',
 'earth-closet',
 'eclipse',
 'enigma',
 'enshroud',
 'furtive',
 'hidden',
 'hide',
 'hide_out',
 'higgledy-piggledy',
 'hole-and-corner',
 'hole-in-corner',
 'hugger-mugger',
 'hush-hush',
 'individual',
 'inscrutable',
 'jakes',
 'john',
 'jumbled',
 'lav',
 'lavatory',
 'metro',
 'mysterious',
 'mystery',
 'mystery_story',
 'mystic',
 'mystical',
 'mystifying',
 'obliterate',
 'obscure',
 'occult',
 'occult_arts',
 'orphic',
 'out_of_sight',
 'outhouse',
 'private',
 'privy',
 'reclusive',
 'religious_mystic',
 'resistance',
 'riddle',
 'seclude',
 'secluded',
 'secret',
 'sequester',
 'sequestered',
 'sequestrate',
 'shroud',
 'sneak',
 'sneaking',
 'sneaky',
 'stealthy',
 's

In [None]:
# collect words manually to ensure there that there are no errors
confidential_bag = [
    'confidential', tic', 'religious_mystic'], ['mysterious', 'mystic', 'mystical', 'occult', 'secret', 'orphic'], ['mystic', 'mystical'], ['mystic
]

In [36]:
for i, syn in enumerate(z):
    print(syn.name())
    print('alternative names (lemmas): "%s"' % '", "'.join(syn.lemma_names()))
    print('\n')

confidential.s.01
alternative names (lemmas): "confidential"


confidential.s.02
alternative names (lemmas): "confidential", "secret"


confidential.s.03
alternative names (lemmas): "confidential"


confidential.s.04
alternative names (lemmas): "confidential"


private.n.01
alternative names (lemmas): "private", "buck_private", "common_soldier"


private.a.01
alternative names (lemmas): "private"


private.s.02
alternative names (lemmas): "private"


individual.s.04
alternative names (lemmas): "individual", "private"


secret.s.05
alternative names (lemmas): "secret", "private"


hide.v.01
alternative names (lemmas): "hide", "conceal"


hide.v.02
alternative names (lemmas): "hide", "hide_out"


shroud.v.01
alternative names (lemmas): "shroud", "enshroud", "hide", "cover"


obscure.v.05
alternative names (lemmas): "obscure", "blot_out", "obliterate", "veil", "hide"


concealed.s.01
alternative names (lemmas): "concealed", "hidden", "out_of_sight"


hidden.s.02
alternative names (lemmas)