In [117]:
import os
import sys
import fnmatch
import pandas as pd
import operator
import shutil
import re
import javalang
def create_dev_test_train_split_and_vocabulary(root_path, 
                                               train_output, 
                                               vocabFile
                                              ):

    train_file = ''
    dev_file = ''
    test_file = ''

    word_counts = dict()
    
    for root, dirnames, filenames in os.walk(root_path):
        for filename in fnmatch.filter(filenames, '*.csv'):

            path = os.path.join(root, filename)

            if filename.endswith("test.csv"):
                test_file = path

            elif filename.endswith("dev.csv"):
                dev_file = path

            else:
                path = "csv/oneLineCode.csv"
                train_file = path
                dataframe = pd.read_csv(path, na_filter = False)
                for i,data in dataframe.iterrows():
                    d = splitComment(data["comment"])
                    c = tokenizeJavaCode(data["code"])
                    print(i)
                    print("comment:")
                    print(d)
                    
                    print("code:")
                    print(c)
                    s = []
                    s =  d+c
                    add_counts(word_counts, s)
                with open(path, 'r', encoding='utf-8') as text:
                    for line in text:
                        add_counts(word_counts, line)

    vocabulary = build_vocabulary(word_counts)
    write_vocabulary(vocabulary, vocabFile)

    write_processed_dataset(train_file, train_output,vocabFile)
#     write_processed_dataset(dev_txt_files, dev_output)
#     write_processed_dataset(test_txt_files, test_output)

def write_processed_dataset(input_file, output_file, vocabFile):
    names = [ 'comment', 'code','non-information']
    df = pd.DataFrame()
    word_vocabulary = read_vocabulary(vocabFile)
    dataframe = pd.read_csv(input_file, na_filter = False)
    for i,d in dataframe.iterrows():
        comment = []
        code = []
        label = 1 if d["non-information"] == "yes" else 0
        for token in splitComment(d["comment"]):
            comment.append(word_vocabulary.get(token,0))
        for token in tokenizeJavaCode(d["code"]):
            code.append(word_vocabulary.get(token,0))
        da = [{
            "comment":comment,
            "code":code,
            "label":label,
        }]
        print(da)
        df = df.append(da,ignore_index=True,sort=False)
    df.to_csv(output_file, index=False)

In [118]:
delimiters = "#", ".",",","<b>","</b>","-",":","<br>","_","?"," ",";"
def splitComment(string,delimiters = delimiters, maxsplit=0):
## replace all web https to https
    for l in string.split():
        if l.startswith("https"):
            string = string.replace(l,"https")
    ## split string by delimiters
    regexPattern = '|'.join(map(re.escape, delimiters))
    result =  re.split(regexPattern, string, maxsplit)
    ## split string by uppercase
    f = []
    for r in result:
        p =  re.sub(r'((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))', r' \1', r)
#         print(p)
        for q in p.split():
            if q != "":
                f.append(q)
    return f
def tokenizeJavaCode(code):
    result = []
    try:
        tokens = list(javalang.tokenizer.tokenize(code))
        for token in tokens:
            result.append(token.value)
        return result
    except:
        return list(code)
#TODO stopwords
STOPWORDS=["and","or","i"]
def iterable_to_dict(arr):
    return dict((x.strip(), i) for (i, x) in enumerate(arr))

def read_vocabulary(file_name):
    with open(file_name, 'r', encoding='utf-8') as f:
        return iterable_to_dict(f.readlines())
def add_counts(word_counts, line):
    for w in line:
        if w in STOPWORDS:
            continue
        word_counts[w] = word_counts.get(w, 0) + 1
END = "</S>"
UNK = "<UNK>"
NUM = "<NUM>"

def dump(d, path):
    with open(path, 'w') as f:
        for s in d:
            f.write("%s\n" % repr(s))
        
def write_vocabulary(vocabulary, file_name):
    if END not in vocabulary:
        vocabulary.append(END)
    if UNK not in vocabulary:
        vocabulary.append(UNK)

    print("Vocabulary size: %d" % len(vocabulary))

    with open(file_name, 'w', encoding='utf-8') as f:
        f.write("\n".join(vocabulary))
MAX_WORD_VOCABULARY_SIZE = 100000
MIN_WORD_COUNT_IN_VOCAB = 2
MAX_SEQUENCE_LEN = 50
def build_vocabulary(word_counts):
    return [wc[0] for wc in reversed(sorted(word_counts.items(), key=operator.itemgetter(1))) if wc[1] >= MIN_WORD_COUNT_IN_VOCAB and wc[0] != UNK][:MAX_WORD_VOCABULARY_SIZE] # Unk will be appended to end

In [119]:
root_path = "csv"
vocabFile = "csv/vocab.txt"
create_dev_test_train_split_and_vocabulary(root_path,"csv/test_out.csv",vocabFile)

0
comment:
['@impl', 'Note', 'taken', 'from', '{@link', 'com', 'sun', 'javafx', 'scene', 'control', 'behavior', 'Text', 'Area', 'Behavior', 'context', 'Menu', 'Requested(javafx', 'scene', 'input', 'Context', 'Menu', 'Event)}']
code:
['public', 'static', 'void', 'showContextMenu', '(', 'TextArea', 'textArea', ',', 'ContextMenu', 'contextMenu', ',', 'ContextMenuEvent', 'e', ')', '{']
1
comment:
['icon', 'set', 'Tool', 'Tip', 'Text(printed', 'View', 'Model', 'get', 'Localization())']
code:
['TABLE_ICONS', '.', 'put', '(', 'SpecialField', '.', 'PRINTED', ',', 'icon', ')', ';']
2
comment:
['Synchronize', 'changes', 'of', 'the', 'underlying', 'date', 'value', 'with', 'the', 'temporal', 'Accessor', 'Value']
code:
['BindingsHelper', '.', 'bindBidirectional', '(', 'valueProperty', '(', ')', ',', 'temporalAccessorValue', ',']
3
comment:
['Ask', 'if', 'the', 'user', 'really', 'wants', 'to', 'close', 'the', 'given', 'database']
code:
['private', 'boolean', 'confirmClose', '(', 'BasePanel', 'panel'

161
comment:
['for', 'each', 'column', 'get', 'either', 'actual', 'width', 'or', 'if', 'it', 'does', 'not', 'exist', 'default', 'value']
code:
['Map', '<', 'String', ',', 'Double', '>', 'columnWidths', '=', 'new', 'HashMap', '<', '>', '(', ')', ';']
162
comment:
['Opens', 'the', 'import', 'inspection', 'dialog', 'to', 'let', 'the', 'user', 'decide', 'which', 'of', 'the', 'given', 'entries', 'to', 'import']
code:
['private', 'void', 'addImportedEntries', '(', 'final', 'BasePanel', 'panel', ',', 'final', 'List', '<', 'BibEntry', '>', 'entries', ')', '{']
163
comment:
['Update', 'journal', 'abbreviation', 'loader']
code:
['journalAbbreviationLoader', '.', 'update', '(', 'abbreviationsPreferences', ')', ';']
164
comment:
['Generate', 'dialog', 'title']
code:
['String', 'dialogTitle', ';']
165
comment:
['Wrapper', 'around', 'one', 'of', 'our', 'actions', 'from', '{@link', 'Action}', 'to', 'convert', 'them', 'to', 'controlsfx', '{@link', 'org', 'controlsfx', 'control', 'action', 'Action}']
c

comment:
['This', 'class', 'represents', 'the', 'change', 'of', 'type', 'for', 'an', 'entry']
code:
['public', 'class', 'UndoableChangeType', 'extends', 'AbstractUndoableJabRefEdit', '{']
271
comment:
['General']
code:
[]
272
comment:
['Sets', 'the', 'title', 'of', 'the', 'main', 'window']
code:
['public', 'void', 'setWindowTitle', '(', ')', '{']
273
comment:
['check', 'mode', 'of', 'currently', 'used', 'DB']
code:
['if', '(', 'panel', '!=', 'null', ')', '{']
274
comment:
['No', 'parent', '=', 'root', '>', 'just', 'add']
code:
['this', '.', 'moveTo', '(', 'target', ')', ';']
275
comment:
['write', 'the', 'entries', 'using', 'sw', 'which', 'is', 'used', 'later', 'to', 'form', 'the', 'email', 'content']
code:
['BibEntryWriter', 'bibtexEntryWriter', '=', 'new', 'BibEntryWriter', '(']
276
comment:
['Opens', 'the', 'given', 'files', 'If', 'one', 'of', 'it', 'is', 'null', 'or', '404', 'nothing', 'happens']
code:
['public', 'void', 'openFiles', '(', 'List', '<', 'Path', '>', 'filesToOpen', ',

379
comment:
['Nothing', 'set', 'so', 'we', 'use', 'the', 'default', 'values']
code:
['while', '(', 'preferences', '.', 'get', '(', 'JabRefPreferences', '.', 'CUSTOM_TAB_NAME', '+', '"_def"', '+', 'i', ')', '!=', 'null', ')', '{']
380
comment:
['Run', 'the', 'actual', 'open', 'in', 'a', 'thread', 'to', 'prevent', 'the', 'program', 'locking', 'until', 'the', 'file', 'is', 'loaded']
code:
['BackgroundTask', '.', 'wrap', '(', '(', ')', '->', 'openIt', '(', 'file', ',', 'dialog', '.', 'importEntries', '(', ')', ',', 'dialog', '.', 'importStrings', '(', ')', ',', 'dialog', '.', 'importGroups', '(', ')', ',', 'dialog', '.', 'importSelectorWords', '(', ')', ')', ')']
381
comment:
['It', 'could', 'be', 'that', 'somehow', 'the', 'path', 'is', 'null', 'for', 'example', 'if', 'it', 'got', 'deleted', 'in', 'the', 'meantime']
code:
['if', '(', 'directory', '==', 'null', ')', '{']
382
comment:
['extract', 'url', 'parameter']
code:
['String', '[', ']', 'pairs', '=', 'query', '.', 'split', '(', '"&"',

503
comment:
['//', 'TODO', 'Show', 'detailed', 'description', 'of', 'the', 'changes', 'String', 'Builder', 'sb', '=', 'new', 'String', 'Builder(', '"<html>"', '+', 'Localization', 'lang("', 'Changes', 'have', 'been', 'made', 'to', 'the', 'following', 'metadata', 'elements")', '+', '"', '<p>', '&nbsp', '&nbsp', '")', 'sb', 'append(changes', 'stream()', 'map(unit', '>', 'unit', 'key)', 'collect(', 'Collectors', 'joining("', '&nbsp', '&nbsp', '")))', 'sb', 'append("</html>")', 'info', 'Pane', 'set', 'Text(sb', 'to', 'String())']
code:
['return', 'new', 'Label', '(', 'Localization', '.', 'lang', '(', '"Metadata change"', ')', ')', ';']
504
comment:
['Global', 'String', 'constants', 'for', 'GUI', 'actions']
code:
['@', 'Deprecated']
505
comment:
['Put', 'everything', 'together']
code:
['VBox', 'container', '=', 'new', 'VBox', '(', ')', ';']
506
comment:
['Check', 'if', 'we', 'should', 'italicize', 'the', '"et', 'al', '"', 'string', 'in', 'citations']
code:
['boolean', 'italicize', '=', 'st

624
comment:
['Set', 'entry', 'number', 'in', 'case', 'that', 'is', 'included', 'in', 'the', 'preview', 'layout']
code:
['ExporterFactory', '.', 'entryNumber', '=', '1', ';']
625
comment:
['For', 'some', 'reason', 'the', 'graphic', 'is', 'not', 'set', 'correctly', 'so', "let's", 'fix', 'this', 'To', 'DO', 'Find', 'a', 'way', 'to', 'reuse', 'Jab', 'Ref', 'Icon', 'View']
code:
['button', '.', 'graphicProperty', '(', ')', '.', 'unbind', '(', ')', ';']
626
comment:
['Pass', 'other', 'keys', 'to', 'children']
code:
['}']
627
comment:
['class', 'is', 'indirectly', 'constructed', 'by', 'log4j']
code:
628
comment:
['Different', 'actions', 'depending', 'on', 'where', 'the', 'user', 'releases', 'the', 'drop', 'in', 'the', 'target', 'row', 'Bottom', '+', 'top', '>', 'insert', 'source', 'row', 'before', '/', 'after', 'this', 'row', 'Center', '>', 'add', 'as', 'child']
code:
['switch', '(', 'mouseLocation', ')', '{']
629
comment:
['Opens', 'a', 'file', 'browser', 'of', 'the', 'folder', 'of', 'the',

742
comment:
['The', 'action', 'concerned', 'with', 'opening', 'an', 'existing', 'database']
code:
['public', 'class', 'OpenDatabaseAction', 'extends', 'SimpleCommand', '{']
743
comment:
['icon', 'set', 'Tool', 'Tip', 'Text(priority', 'View', 'Model', 'get', 'Localization())']
code:
['TABLE_ICONS', '.', 'put', '(', 'SpecialField', '.', 'PRIORITY', ',', 'icon', ')', ';']
744
comment:
['Check', 'if', 'the', 'type', 'has', 'changed']
code:
['if', '(', '!', 'identicalTypes', '&&', '!', 'typeRadioButtons', '.', 'isEmpty', '(', ')', '&&', 'typeRadioButtons', '.', 'get', '(', '0', ')', '.', 'isSelected', '(', ')', ')', '{']
745
comment:
['In', 'case', 'no', 'string', 'Converter', 'was', 'provided', 'use', 'the', 'default', 'strategy']
code:
['if', '(', 'this', '.', 'stringConverter', '==', 'null', ')', '{']
746
comment:
['Installs', 'the', 'base', 'css', 'file', 'as', 'a', 'stylesheet', 'in', 'the', 'given', 'scene', 'Changes', 'in', 'the', 'css', 'file', 'lead', 'to', 'a', 'redraw', 'of', 't

862
comment:
['Check', 'if', 'we', 'should', 'reset', 'all', 'preferences', 'to', 'default', 'values']
code:
['if', '(', 'cli', '.', 'isPreferencesReset', '(', ')', ')', '{']
863
comment:
['Check', 'if', 'we', 'should', 'import', 'preferences', 'from', 'a', 'file']
code:
['if', '(', 'cli', '.', 'isPreferencesImport', '(', ')', ')', '{']
864
comment:
['List', 'to', 'put', 'imported/loaded', 'database(s)', 'in']
code:
['List', '<', 'ParserResult', '>', 'loaded', '=', 'importAndOpenFiles', '(', ')', ';']
865
comment:
['enables', 'blanks', 'within', 'the', 'search', 'term', '$', 'stands', 'for', 'a', 'blank']
code:
['String', 'searchTerm', '=', 'data', '[', '0', ']', '.', 'replace', '(', '"\\\\$"', ',', '" "', ')', ';']
866
comment:
['export', 'matches']
code:
['if', '(', '!', 'matches', '.', 'isEmpty', '(', ')', ')', '{']
867
comment:
['read', 'in', 'the', 'export', 'format', 'take', 'default', 'format', 'if', 'no', 'format', 'entered']
code:
['switch', '(', 'data', '.', 'length', ')', '{

code:
['public', 'class', 'FXDialog', 'extends', 'Alert', '{']
970
comment:
['This', 'field', 'is', 'initialized', 'upon', 'startup', 'Only', 'GUI', 'code', 'is', 'allowed', 'to', 'access', 'it', 'logic', 'code', 'should', 'use', 'dependency', 'injection']
code:
['public', 'static', 'JournalAbbreviationLoader', 'journalAbbreviationLoader', ';']
971
comment:
['This', 'field', 'is', 'initialized', 'upon', 'startup', 'Only', 'GUI', 'code', 'is', 'allowed', 'to', 'access', 'it', 'logic', 'code', 'should', 'use', 'dependency', 'injection']
code:
['public', 'static', 'ProtectedTermsLoader', 'protectedTermsLoader', ';']
972
comment:
['Manager', 'for', 'the', 'state', 'of', 'the', 'GUI']
code:
[]
973
comment:
['Remote', 'listener']
code:
['public', 'static', 'final', 'RemoteListenerServerLifecycle', 'REMOTE_LISTENER', '=', 'new', 'RemoteListenerServerLifecycle', '(', ')', ';']
974
comment:
['In', 'the', 'main', 'program', 'this', 'field', 'is', 'initialized', 'in', 'Jab', 'Ref', 'java', 'Each'

0
comment:
['@impl', 'Note', 'taken', 'from', '{@link', 'com', 'sun', 'javafx', 'scene', 'control', 'behavior', 'Text', 'Area', 'Behavior', 'context', 'Menu', 'Requested(javafx', 'scene', 'input', 'Context', 'Menu', 'Event)}']
code:
['public', 'static', 'void', 'showContextMenu', '(', 'TextArea', 'textArea', ',', 'ContextMenu', 'contextMenu', ',', 'ContextMenuEvent', 'e', ')', '{']
1
comment:
['icon', 'set', 'Tool', 'Tip', 'Text(printed', 'View', 'Model', 'get', 'Localization())']
code:
['TABLE_ICONS', '.', 'put', '(', 'SpecialField', '.', 'PRINTED', ',', 'icon', ')', ';']
2
comment:
['Synchronize', 'changes', 'of', 'the', 'underlying', 'date', 'value', 'with', 'the', 'temporal', 'Accessor', 'Value']
code:
['BindingsHelper', '.', 'bindBidirectional', '(', 'valueProperty', '(', ')', ',', 'temporalAccessorValue', ',']
3
comment:
['Ask', 'if', 'the', 'user', 'really', 'wants', 'to', 'close', 'the', 'given', 'database']
code:
['private', 'boolean', 'confirmClose', '(', 'BasePanel', 'panel'

104
comment:
['Push', 'To', 'Application']
code:
['final', 'PushToApplicationAction', 'pushToApplicationAction', '=', 'pushToApplicationsManager', '.', 'getPushToApplicationAction', '(', ')', ';']
105
comment:
['Returns', 'the', 'default', 'context', 'menu', 'items', '(except', 'undo/redo)']
code:
['public', 'static', 'List', '<', 'MenuItem', '>', 'getDefaultContextMenuItems', '(', 'TextInputControl', 'textInputControl', ')', '{']
106
comment:
['use', 'preferences', 'value', 'if', 'no', 'DB', 'is', 'open']
code:
['mode', '=', 'Globals', '.', 'prefs', '.', 'getDefaultBibDatabaseMode', '(', ')', ';']
107
comment:
['We', 'have', 'to', 'use', 'supplier', 'for', 'the', 'localized', 'text', 'so', 'that', 'language', 'changes', 'are', 'correctly', 'reflected']
code:
['SearchDisplayMode', '(', 'Supplier', '<', 'String', '>', 'displayName', ',', 'Supplier', '<', 'String', '>', 'toolTipText', ')', '{']
108
comment:
['TODO', 'switch', 'Icon', 'color', 'search', 'Icon', 'set', 'Icon(', 'Icon', 'Th

222
comment:
['Read', 'all', 'saved', 'file', 'paths', 'and', 'read', 'their', 'abbreviations']
code:
['public', 'void', 'createFileObjects', '(', ')', '{']
223
comment:
['On', 'Linux', 'Java', 'FX', 'fonts', 'look', 'blurry', 'per', 'default', 'This', 'can', 'be', 'improved', 'by', 'using', 'a', 'non', 'default', 'rendering', 'setting', 'See', 'https']
code:
['if', '(', 'Globals', '.', 'prefs', '.', 'getBoolean', '(', 'JabRefPreferences', '.', 'FX_FONT_RENDERING_TWEAK', ')', ')', '{']
224
comment:
['Runs', 'the', 'specified', '{@link', 'Runnable}', 'on', 'the', 'Java', 'FX', 'application', 'thread', 'and', 'waits', 'for', 'completion']
code:
['public', 'static', 'void', 'runAndWaitInJavaFXThread', '(', 'Runnable', 'action', ')', '{']
225
comment:
['Jab', 'Ref', 'Main', 'Class']
code:
['public', 'class', 'JabRefMain', 'extends', 'Application', '{']
226
comment:
['Bootstraps', 'the', 'component', 'context', 'from', 'a', 'UNO', 'installation']
code:
['public', 'static', 'final', 'XCompon

346
comment:
['Decides', 'if', 'the', 'content', 'stored', 'in', 'the', 'given', '{@link', 'Dragboard}', 'can', 'be', 'droped', 'on', 'the', 'given', 'target', 'row', 'Currently', 'the', 'following', 'sources', 'are', 'allowed', 'another', 'group', '(will', 'be', 'added', 'as', 'subgroup', 'on', 'drop)', 'entries', 'if', 'the', 'group', 'implements', '{@link', 'Group', 'Entry', 'Changer}', '(will', 'be', 'assigned', 'to', 'group', 'on', 'drop)']
code:
['public', 'boolean', 'acceptableDrop', '(', 'Dragboard', 'dragboard', ')', '{']
347
comment:
['The', 'string', 'was', 'removed', 'or', 'renamed', 'locally', 'We', 'guess', 'that', 'it', 'was', 'removed']
code:
['BibtexString', 'bs', '=', 'new', 'BibtexString', '(', 'label', ',', 'disk', ')', ';']
348
comment:
['Returns', 'a', 'consent', 'dialog', 'used', 'to', 'ask', 'permission', 'to', 'send', 'data', 'to', 'Mr', 'D', 'Lib']
code:
['private', 'ScrollPane', 'getPrivacyDialog', '(', 'BibEntry', 'entry', ')', '{']
349
comment:
['Left', 'te

496
comment:
['A', 'new', 'or', 'modified', 'entry', 'type', 'Construct', 'it', 'from', 'the', 'string', 'array']
code:
['ExternalFileType', 'type', '=', 'CustomExternalFileType', '.', 'buildFromArgs', '(', 'val', ')', ';']
497
comment:
['To', 'account', 'for', 'numbering', 'and', 'for', 'uniqiefiers', 'we', 'must', 'refresh', 'the', 'cite', 'markers']
code:
['updateSortedReferenceMarks', '(', ')', ';']
498
comment:
['Open', 'a', 'http/pdf/ps', 'viewer', 'for', 'the', 'given', 'link', 'string']
code:
['public', 'static', 'void', 'openExternalViewer', '(', 'BibDatabaseContext', 'databaseContext', ',', 'String', 'initialLink', ',', 'Field', 'initialField', ')']
499
comment:
['General']
code:
[]
500
comment:
['Read', 'all', 'keybindings', 'from', 'the', 'keybinding', 'repository', 'and', 'create', 'table', 'keybinding', 'models', 'for', 'them']
code:
['private', 'void', 'populateTable', '(', ')', '{']
501
comment:
['Text', 'leading', 'up', 'to', 's1', 'Insert', 's2']
code:
['stringBuilder

607
comment:
['set', 'Bounds(', 'Graphics', 'Environment', 'get', 'Local', 'Graphics', 'Environment()', 'get', 'Maximum', 'Window', 'Bounds())', 'Window', 'Location', 'pw', '=', 'new', 'Window', 'Location(this', 'Jab', 'Ref', 'Preferences', 'POS', 'X', 'Jab', 'Ref', 'Preferences', 'POS', 'Y', 'Jab', 'Ref', 'Preferences', 'SIZE', 'X', 'Jab', 'Ref', 'Preferences', 'SIZE', 'Y)', 'pw', 'display', 'Window', 'At', 'Stored', 'Location()']
code:
[]
608
comment:
['css', 'file', 'find', '*']
code:
['AUTO_FILE_LINK', '(', 'MaterialDesignIcon', '.', 'FILE_FIND', ')']
609
comment:
['icon', 'set', 'Tool', 'Tip', 'Text(', 'Localization', 'lang("', 'Open', 'file"))']
code:
['TABLE_ICONS', '.', 'put', '(', 'StandardField', '.', 'FILE', ',', 'icon', ')', ';']
610
comment:
['Here', 'we', 'store', 'the', 'names', 'of', 'all', 'current', 'files', 'If', 'there', 'is', 'no', 'current', 'file', 'we', 'remove', 'any', 'previously', 'stored', 'filename']
code:
['if', '(', 'filenames', '.', 'isEmpty', '(', ')', 

719
comment:
['Enriches', 'a', 'suggestion', 'provider', 'by', 'a', 'given', 'set', 'of', 'content', 'selector', 'values']
code:
['public', 'class', 'ContentSelectorSuggestionProvider', 'implements', 'AutoCompleteSuggestionProvider', '<', 'String', '>', '{']
720
comment:
['Revert', 'the', 'change']
code:
['try', '{']
721
comment:
['Always', 'fill', 'out', 'all', 'the', 'available', 'space']
code:
['setPrefHeight', '(', 'Double', '.', 'POSITIVE_INFINITY', ')', ';']
722
comment:
['Good', 'bye!']
code:
['tearDownJabRef', '(', 'filenames', ')', ';']
723
comment:
['Enrich', 'auto', 'completion', 'by', 'content', 'selector', 'values']
code:
['try', '{']
724
comment:
['Name', 'the', 'reference']
code:
['XNamed', 'xNamed', '=', 'UnoRuntime', '.', 'queryInterface', '(', 'XNamed', '.', 'class', ',', 'bookmark', ')', ';']
725
comment:
['The', 'log', 'event', 'will', 'be', 'forwarded', 'to', 'the', '{@link', 'Log', 'Messages}', 'archive']
code:
['@', 'Override']
726
comment:
['To', 'Do', 'After', 

835
comment:
['backwards', 'compatibility', 'stub']
code:
['public', 'static', 'XComponentContext', 'createInitialComponentContext', '(', 'Hashtable', '<', 'String', ',', 'Object', '>', 'context_entries', ')', 'throws', 'Exception', '{']
836
comment:
['Swing']
code:
['requires', 'java', '.', 'desktop', ';']
837
comment:
['Preferences', 'and', 'XML']
code:
['requires', 'java', '.', 'prefs', ';']
838
comment:
['Annotations', '(@', 'Post', 'Construct)']
code:
['requires', 'java', '.', 'annotation', ';']
839
comment:
['Libre', 'Office']
code:
['requires', 'org', '.', 'jabref', '.', 'thirdparty', '.', 'libreoffice', ';']
840
comment:
['A', 'mocking', 'class', 'used', 'as', 'a', 'placeholder', 'for', 'the', 'real', 'Oracle', 'JDBC', 'drivers', 'to', 'prevent', 'build', 'errors']
code:
['public', 'class', 'DatabaseChangeEvent', '{']
841
comment:
['no', 'data']
code:
['}']
842
comment:
['A', 'mocking', 'class', 'used', 'as', 'a', 'placeholder', 'for', 'the', 'real', 'Oracle', 'JDBC', 'drivers'

954
comment:
['Create', 'and', 'display', 'a', 'new', 'confirmation', 'dialog', 'It', 'will', 'include', 'a', 'blue', 'question', 'icon', 'on', 'the', 'left', 'and', 'a', 'YES', '(with', 'given', 'label)', 'and', 'Cancel', '(also', 'with', 'given', 'label)', 'button', 'To', 'create', 'a', 'confirmation', 'dialog', 'with', 'custom', 'buttons', 'see', 'also', '{@link', 'show', 'Custom', 'Button', 'Dialog', 'And', 'Wait(', 'Alert', 'Alert', 'Type', 'String', 'String', 'Button', 'Type', ')}', 'Moreover', 'the', 'dialog', 'contains', 'a', 'opt', 'out', 'checkbox', 'with', 'the', 'given', 'text', 'to', 'support', '"', 'Do', 'not', 'ask', 'again"', 'behaviour']
code:
['boolean', 'showConfirmationDialogWithOptOutAndWait', '(', 'String', 'title', ',', 'String', 'content', ',']
955
comment:
['Shows', 'a', 'custom', 'dialog', 'and', 'returns', 'the', 'result', '@param', 'dialog', 'dialog', 'to', 'show', '@param', '<R>', 'type', 'of', 'result']
code:
['<', 'R', '>', 'Optional', '<', 'R', '>', 'sho

0
comment:
['@impl', 'Note', 'taken', 'from', '{@link', 'com', 'sun', 'javafx', 'scene', 'control', 'behavior', 'Text', 'Area', 'Behavior', 'context', 'Menu', 'Requested(javafx', 'scene', 'input', 'Context', 'Menu', 'Event)}']
code:
['public', 'static', 'void', 'showContextMenu', '(', 'TextArea', 'textArea', ',', 'ContextMenu', 'contextMenu', ',', 'ContextMenuEvent', 'e', ')', '{']
1
comment:
['icon', 'set', 'Tool', 'Tip', 'Text(printed', 'View', 'Model', 'get', 'Localization())']
code:
['TABLE_ICONS', '.', 'put', '(', 'SpecialField', '.', 'PRINTED', ',', 'icon', ')', ';']
2
comment:
['Synchronize', 'changes', 'of', 'the', 'underlying', 'date', 'value', 'with', 'the', 'temporal', 'Accessor', 'Value']
code:
['BindingsHelper', '.', 'bindBidirectional', '(', 'valueProperty', '(', ')', ',', 'temporalAccessorValue', ',']
3
comment:
['Ask', 'if', 'the', 'user', 'really', 'wants', 'to', 'close', 'the', 'given', 'database']
code:
['private', 'boolean', 'confirmClose', '(', 'BasePanel', 'panel'

115
comment:
['Show', 'progress', 'indicator']
code:
['ProgressIndicator', 'progress', '=', 'new', 'ProgressIndicator', '(', ')', ';']
116
comment:
['Fail', 'on', 'unsupported', 'Java', 'versions']
code:
['ensureCorrectJavaVersion', '(', ')', ';']
117
comment:
['Get', 'the', 'string', 'associated', 'with', 'this', 'file', "type's", 'icon']
code:
['public', 'String', 'getIconName', '(', ')', '{']
118
comment:
['things', 'to', 'be', 'appended', 'to', 'an', 'opened', 'tab', 'should', 'be', 'done', 'after', 'opening', 'all', 'tabs', 'add', 'them', 'to', 'the', 'list']
code:
['toOpenTab', '.', 'add', '(', 'pr', ')', ';']
119
comment:
['set', 'Title(FRAME', 'TITLE)']
code:
['return', ';']
120
comment:
['we', 'do', 'not', 'trim', 'the', 'value', 'at', 'the', 'assignment', 'to', 'enable', 'users', 'to', 'have', 'spaces', 'at', 'the', 'beginning', 'and', 'at', 'the', 'end', 'of', 'the', 'pattern']
code:
['if', '(', '!', 'text', '.', 'trim', '(', ')', '.', 'isEmpty', '(', ')', ')', '{']
121
comm

233
comment:
['Create', 'buttons']
code:
['ButtonType', 'replaceEntries', '=', 'new', 'ButtonType', '(', 'Localization', '.', 'lang', '(', '"Merge entries"', ')', ',', 'ButtonBar', '.', 'ButtonData', '.', 'OK_DONE', ')', ';']
234
comment:
['Time', 'stamp']
code:
[]
235
comment:
['This', 'class', 'can', 'be', 'used', 'to', 'wrap', 'an', '@see', 'Observable', 'List', 'inside', 'it', 'When', 'wrapped', 'any', 'Listener', 'listening', 'for', 'updates', 'to', 'the', 'wrapped', 'Observable', 'List', '(for', 'example', 'because', 'of', 'a', 'binding', 'to', 'it)', 'is', 'ensured', 'to', 'be', 'notified', 'on', 'the', 'Java', 'FX', 'Application', 'Thread', 'It', 'should', 'be', 'used', 'to', 'implement', 'bindings', 'where', 'updates', 'come', 'in', 'from', 'a', 'background', 'thread', 'but', 'should', 'be', 'reflected', 'in', 'the', 'UI', 'where', 'it', 'is', 'necessary', 'that', 'changes', 'to', 'the', 'UI', 'are', 'performed', 'on', 'the', 'Java', 'FX', 'Application', 'thread']
code:
['publ

348
comment:
['Returns', 'a', 'consent', 'dialog', 'used', 'to', 'ask', 'permission', 'to', 'send', 'data', 'to', 'Mr', 'D', 'Lib']
code:
['private', 'ScrollPane', 'getPrivacyDialog', '(', 'BibEntry', 'entry', ')', '{']
349
comment:
['Left', 'text', 'pane']
code:
['if', '(', 'leftString', '.', 'isPresent', '(', ')', ')', '{']
350
comment:
['Initializes', 'the', 'components', 'the', 'layout', 'the', 'data', 'structure', 'and', 'the', 'actions', 'in', 'this', 'dialog']
code:
['private', 'void', 'initialize', '(', ')', '{']
351
comment:
['Add', 'type', 'change', 'menu']
code:
['ContextMenu', 'typeMenu', '=', 'new', 'ChangeEntryTypeMenu', '(', ')', '.', 'getChangeEntryTypePopupMenu', '(', 'entry', ',', 'databaseContext', ',', 'undoManager', ')', ';']
352
comment:
['This', 'property', 'is', 'only', 'needed', 'to', 'get', 'proper', 'IDE', 'support', 'in', 'FXML', 'files', '(e', 'g', 'validation', 'that', 'parameter', 'passed', 'to', '"icon"', 'is', 'indeed', 'of', 'type', '{@link', 'Icon', '

443
comment:
['Ok', 'we', 'have', 'seen', 'this', 'exact', 'marker', 'before']
code:
['if', '(', '!', 'refKeys', '.', 'get', '(', 'marker', ')', '.', 'contains', '(', 'currentKey', ')', ')', '{']
444
comment:
['Execute', 'the', 'callables', 'and', 'wait', 'for', 'the', 'results']
code:
['List', '<', 'Future', '<', 'Boolean', '>', '>', 'futures', '=', 'JabRefExecutorService', '.', 'INSTANCE', '.', 'executeAll', '(', 'tasks', ')', ';']
445
comment:
['Stores', 'all', 'user', 'dialogs', 'related', 'to', '{@link', 'Backup', 'Manager}']
code:
['public', 'class', 'BackupUIManager', '{']
446
comment:
['Nothing', 'to', 'do', 'here']
code:
['}']
447
comment:
['The', 'minimum', 'number', 'of', 'selected', 'entries', 'to', 'ask', 'the', 'user', 'for', 'confirmation']
code:
448
comment:
['Exception', 'used', 'to', 'indicate', 'that', 'the', 'plugin', 'attempted', 'to', 'set', 'a', 'character', 'format', 'that', 'is', 'not', 'defined', 'in', 'the', 'current', 'Open', 'Office', 'document']
code:
['cl

554
comment:
['no', 'more', 'bps', 'to', 'check', 'we', 'found', 'a', 'matching', 'one']
code:
['break', ';']
555
comment:
['default', 'pattern']
code:
['protected', 'final', 'TextField', 'defaultPat', '=', 'new', 'TextField', '(', ')', ';']
556
comment:
['We', 'try', 'to', 'find', 'the', 'page', 'that', 'is', 'displayed', 'in', 'the', 'center', 'of', 'the', 'viewport']
code:
['Optional', '<', 'DocumentViewerPage', '>', 'inMiddleOfViewport', '=', 'Optional', '.', 'empty', '(', ')', ';']
557
comment:
['Function', 'to', 'get', 'the', 'command', 'name', 'in', 'case', 'it', 'is', 'different', 'from', 'the', 'application', 'name']
code:
['protected', 'String', 'getCommandName', '(', ')', '{']
558
comment:
['Check', 'if', 'we', 'are', 'running', 'an', 'acceptable', 'version', 'of', 'Java']
code:
['final', 'BuildInfo', 'buildInfo', '=', 'Globals', '.', 'BUILD_INFO', ';']
559
comment:
['{@inherit', 'Doc}']
code:
['@', 'Override']
560
comment:
['Alt', 'on', 'Windows']
code:
['LOGGER', '.', 'deb

692
comment:
['Updated', 'the', 'original', 'entry', 'with', 'the', 'new', 'fields']
code:
['Set', '<', 'Field', '>', 'jointFields', '=', 'new', 'TreeSet', '<', '>', '(', 'Comparator', '.', 'comparing', '(', 'Field', '::', 'getName', ')', ')', ';']
693
comment:
['GUI', 'for', 'tab', 'displaying', 'article', 'recommendations', 'based', 'on', 'the', 'currently', 'selected', 'Bib', 'Entry']
code:
['public', 'class', 'RelatedArticlesTab', 'extends', 'EntryEditorTab', '{']
694
comment:
['TODO', 'NULL', 'TODO', 'Null', 'TODO', 'NULL']
code:
['GroupTreeNode', 'newParent', '=', 'root', '.', 'getNode', '(', ')', '.', 'getDescendant', '(', 'pathToNewParent', ')', '.', 'get', '(', ')', ';']
695
comment:
['A', 'date', 'picker', 'with', 'configurable', 'datetime', 'format', 'where', 'both', 'date', 'and', 'time', 'can', 'be', 'changed', 'via', 'the', 'text', 'field', 'and', 'the', 'date', 'can', 'additionally', 'be', 'changed', 'via', 'the', 'Java', 'FX', 'default', 'date', 'picker', 'Also', 'suppo

809
comment:
['Since', 'the', 'directory', 'does', 'not', 'exist', 'we', 'cannot', 'move', 'it', 'to', 'there', 'So', 'this', 'option', 'is', 'not', 'checked', 'regardless', 'of', 'the', 'presets', 'stored', 'in', 'the', 'preferences']
code:
['cleanUpMovePDF', '.', 'setDisable', '(', 'true', ')', ';']
810
comment:
['TODO', 'reflective', 'access', 'should', 'be', 'removed']
code:
['Field', 'privatePopup', '=', 'AutoCompletionBinding', '.', 'class', '.', 'getDeclaredField', '(', '"autoCompletionPopup"', ')', ';']
811
comment:
['ensure', 'that', 'there', 'is', 'always', 'only', 'one', 'All', 'Entries', 'Group']
code:
['if', '(', 'newGroups', '.', 'getGroup', '(', ')', 'instanceof', 'AllEntriesGroup', ')', '{']
812
comment:
['Class', 'for', 'manipulating', 'the', 'Bibliography', 'of', 'the', 'currently', 'start', 'document', 'in', 'Open', 'Office']
code:
['class', 'OOBibBase', '{']
813
comment:
['Check', 'for', 'running', 'Jab', 'Ref']
code:
['if', '(', '!', 'handleMultipleAppInstances', '

930
comment:
['Ensure', 'that', 'the', 'suggestion', 'providers', 'are', 'in', 'sync', 'with', 'entries']
code:
['CoarseChangeFilter', 'changeFilter', '=', 'new', 'CoarseChangeFilter', '(', 'bibDatabaseContext', ')', ';']
931
comment:
['Create', 'empty', 'suggestion', 'providers', 'if', 'auto', 'completion', 'is', 'deactivated']
code:
['suggestionProviders', '=', 'new', 'SuggestionProviders', '(', ')', ';']
932
comment:
['Put', 'an', 'asterisk', 'behind', 'the', 'filename', 'to', 'indicate', 'the', 'database', 'has', 'changed']
code:
['frame', '.', 'setWindowTitle', '(', ')', ';']
933
comment:
['Run', 'the', 'search', 'operation']
code:
['FileFinder', 'fileFinder', '=', 'FileFinders', '.', 'constructFromConfiguration', '(', 'Globals', '.', 'prefs', '.', 'getAutoLinkPreferences', '(', ')', ')', ';']
934
comment:
['Automatically', 'add', 'new', 'entry', 'to', 'the', 'selected', 'group', '(or', 'set', 'of', 'groups)']
code:
['if', '(', 'Globals', '.', 'prefs', '.', 'getBoolean', '(', 'Jab

Vocabulary size: 4509
[{'comment': [2064, 668, 576, 105, 95, 4507, 2063, 464, 510, 667, 4506, 347, 2062, 4505, 304, 431, 4504, 510, 575, 4503, 431, 4502], 'code': [67, 122, 90, 2061, 16, 2060, 795, 9, 794, 2059, 9, 2058, 1, 17, 35], 'label': 1}]
[{'comment': [159, 123, 666, 665, 4501, 222, 406, 100, 2057], 'code': [574, 15, 221, 16, 793, 15, 2056, 9, 159, 17, 33], 'label': 1}]
[{'comment': [4500, 158, 70, 34, 1270, 287, 151, 116, 34, 1269, 4499, 979], 'code': [1268, 15, 4498, 16, 4497, 16, 17, 9, 4496, 9], 'label': 0}]
[{'comment': [1267, 72, 34, 137, 1266, 664, 51, 573, 34, 118, 155], 'code': [104, 153, 4495, 16, 572, 200, 17, 35], 'label': 1}]
[{'comment': [187, 272, 111], 'code': [4494, 16, 213, 15, 4493, 17, 9], 'label': 0}]
[{'comment': [239, 72, 173, 64, 4, 146, 125, 116, 34, 509, 150, 170, 145, 86, 64, 4, 978, 70, 89, 125, 145, 286, 34, 146, 172], 'code': [370, 792, 49, 110, 33], 'label': 0}]
[{'comment': [303, 88, 125], 'code': [76, 16, 346, 15, 195, 47, 1265, 9, 508, 45, 88, 6

[{'comment': [4234, 1191, 147, 150], 'code': [72, 16, 108, 4233, 15, 4232, 16, 150, 17, 17, 35], 'label': 1}]
[{'comment': [751, 1927, 934], 'code': [72, 16, 108, 4231, 16, 17, 17, 35], 'label': 1}]
[{'comment': [1926, 4230, 116, 34, 216, 252, 185, 195, 0, 750, 101, 51, 485, 198], 'code': [484, 33], 'label': 1}]
[{'comment': [187, 166, 75, 1190, 111], 'code': [4229, 16, 213, 15, 4228, 17], 'label': 1}]
[{'comment': [4227, 34, 163, 269, 51, 34, 152, 235], 'code': [67, 90, 1189, 16, 4226, 4225, 17, 35], 'label': 0}]
[{'comment': [253, 4, 146, 384, 749, 258, 101, 34, 51, 1188, 93, 70, 34, 764, 396], 'code': [67, 122, 47, 29, 45, 773, 47, 29, 45, 160, 16, 311, 47, 29, 45, 1925, 17, 35], 'label': 0}]
[{'comment': [98, 4224, 483, 318, 183, 72, 34, 1187, 296, 102, 748, 270, 4223, 1186, 64, 1924, 147, 34, 4222, 117, 148, 4221, 262], 'code': [], 'label': 0}]
[{'comment': [187, 4220], 'code': [4219, 16, 213, 15, 4218, 17], 'label': 1}]
[{'comment': [1923, 34, 400, 207, 34, 4217, 217], 'code': [3

[{'comment': [4012, 34, 95, 1818, 212, 222, 406, 726, 328, 1817, 93, 76, 303, 84, 75, 34, 386, 789, 226, 164, 113, 1135, 34, 763, 138, 116, 34, 178, 70, 34, 268, 226, 70, 34, 4011, 212, 1857, 763, 138, 113, 82, 614], 'code': [67, 90, 4010, 16, 17, 35], 'label': 0}]
[{'comment': [505, 34, 95, 2039, 89, 64, 504, 232, 34, 231, 64, 1816, 942, 98, 2038, 503, 502, 101, 34, 191, 190, 230], 'code': [67, 319, 47, 73, 45, 1815, 16, 2037, 47, 73, 45, 1815, 17, 35], 'label': 0}]
[{'comment': [1134, 908, 1814, 4009, 896, 190, 263, 895, 1133, 1813, 0, 182, 75, 345, 0, 894, 1812, 116, 0, 354, 978, 115, 1811, 460, 89, 34, 307, 532, 115, 1810, 111, 893, 70, 345, 378, 302, 1809, 34, 483, 725, 612, 86, 134, 70, 532, 0, 34, 307, 892, 111, 893, 75, 894, 891, 302, 1132, 34, 483, 725, 612, 86, 134, 70, 532, 0, 34, 307, 892, 75, 34, 1131, 1130, 366, 1808, 460, 116, 34, 1807, 111, 1806, 34, 150, 70, 896, 190, 180, 454, 1805, 1804, 34, 562, 70, 336, 1803, 477, 82, 144, 51, 1802, 0, 1801, 1800, 1799, 105, 86, 11

[{'comment': [964, 51, 491, 34, 227, 123, 75, 34, 163, 498, 226, 98, 163, 498, 1088, 113, 82, 123, 51, 34, 492, 0, 485, 227, 75, 34, 268, 226, 72, 3900, 3899, 97, 113, 82, 123, 51, 313, 1089, 72, 173, 115, 148, 268, 327], 'code': [67, 90, 3898, 16, 17, 35], 'label': 0}]
[{'comment': [149, 208, 51, 967, 34, 310, 866, 522, 51, 314, 533, 70, 706], 'code': [562, 49, 3897, 33], 'label': 0}]
[{'comment': [284, 4, 1087, 541, 34, 118, 134, 89, 3896, 158, 101, 34, 191, 190, 230], 'code': [67, 122, 47, 29, 45, 787, 47, 29, 45, 3895, 16, 787, 47, 29, 45, 134, 17, 35], 'label': 0}]
[{'comment': [3894, 34, 949, 230], 'code': [3893, 16, 17, 33], 'label': 1}]
[{'comment': [600, 181, 3892, 209, 72, 157, 181, 1712, 209, 381, 160, 157, 88], 'code': [72, 16, 108, 3891, 15, 276, 16, 17, 17, 35], 'label': 0}]
[{'comment': [961, 34, 179], 'code': [334, 15, 1711, 16, 206, 15, 1710, 16, 17, 17, 33], 'label': 0}]
[{'comment': [1709, 634, 1708, 351, 237, 399, 105, 34, 1707, 89, 135, 82, 337, 0, 105, 34, 134, 70

[{'comment': [187, 3633, 111], 'code': [1627, 16, 213, 15, 1627, 17, 9], 'label': 1}]
[{'comment': [3632, 0, 644, 595], 'code': [281, 15, 3631, 16, 282, 246, 35], 'label': 0}]
[{'comment': [212, 101, 564, 141, 273, 1153, 709, 76, 3630, 158, 0, 778, 1880, 75, 245, 173, 115, 756, 158], 'code': [1626, 3629, 49, 79, 1626, 16, 155, 9, 3628, 17, 33], 'label': 0}]
[{'comment': [3627, 4, 118, 151, 51, 4, 118, 124, 76, 121, 103, 75, 4, 311, 91, 93, 1625, 530, 255, 180, 427, 871, 220, 292, 4, 558, 708, 856, 89, 135, 82, 567, 147, 34, 1624], 'code': [104, 122, 1058, 3626, 16, 311, 47, 130, 45, 103, 9, 197, 124, 9, 80, 3625, 17, 35], 'label': 0}]
[{'comment': [149, 169, 51, 182, 1057, 51, 100, 34, 454, 758], 'code': [181, 35], 'label': 0}]
[{'comment': [72, 145, 698, 334, 273, 1623, 34, 427, 266, 105, 1056, 179, 3624], 'code': [72, 16, 273, 17, 35], 'label': 0}]
[{'comment': [844, 84, 3623, 695, 34, 216, 3622, 135, 82, 772], 'code': [80, 2007, 49, 3621, 15, 360, 16, 17], 'label': 0}]
[{'comment': 

[{'comment': [149, 208, 51, 536, 86, 172, 443, 474, 34, 353, 409, 293, 81, 64, 350, 278, 845, 0, 1167, 608, 343, 310, 0, 536, 4, 3511, 344, 711, 64, 75, 4, 3510, 3509, 3508], 'code': [1046, 79, 3507, 16, 1578, 15, 3506, 16, 17, 17, 33], 'label': 0}]
[{'comment': [187, 758, 1577, 111], 'code': [3505, 16, 213, 15, 3504, 17, 9], 'label': 1}]
[{'comment': [91, 1595, 3503, 64, 3502, 34, 509, 117, 95, 349, 222, 1576, 1575, 1574, 149, 380, 480, 89, 34, 550, 70, 121, 922, 3501, 205, 51, 34, 1573, 550, 70, 34, 308, 3500, 75, 1258, 51, 95, 349, 222, 1576, 1575, 1574, 120, 473, 34, 922, 3499, 147, 314, 1143, 550], 'code': [67, 81, 3498, 193, 489, 47, 1144, 15, 3497, 9, 1045, 45, 35], 'label': 0}]
[{'comment': [149, 169, 51, 3496, 34, 185, 394, 105, 34, 185, 195, 1944, 1943, 51, 3495, 195, 1572], 'code': [311, 47, 130, 45, 3494, 49, 16, 311, 47, 130, 45, 17, 1571, 15, 561, 16, 1044, 17, 33], 'label': 0}]
[{'comment': [952, 37, 119, 70, 95, 185, 2014, 75, 34, 299, 263, 492, 178, 64, 1220], 'code': 

[{'comment': [344, 144, 51, 493, 89, 34, 1032, 1031, 51, 123, 4, 415, 217, 89, 64, 102, 375, 75, 34, 163, 353, 409, 439], 'code': [81, 3276, 133, 344, 35], 'label': 0}]
[{'comment': [1494], 'code': [], 'label': 0}]
[{'comment': [505, 34, 3275, 696, 70, 34, 482, 697, 441], 'code': [67, 90, 3274, 16, 131, 153, 151, 17, 35], 'label': 1}]
[{'comment': [98, 176, 243, 548, 957, 0, 3273], 'code': [518, 15, 778, 16, 165, 15, 211, 16, 3272, 17, 17, 33], 'label': 0}]
[{'comment': [237, 235, 164, 115, 102, 75, 34, 855, 88, 1493, 566, 115, 1492, 235], 'code': [76, 16, 197, 124, 61, 3271, 17, 35], 'label': 0}]
[{'comment': [347, 278, 34, 393], 'code': [72, 16, 445, 45, 56, 17, 35], 'label': 0}]
[{'comment': [88, 125], 'code': [1265, 3270, 49, 3269, 15, 3268, 16, 17, 33], 'label': 1}]
[{'comment': [1665, 1225, 72, 180], 'code': [188, 3267, 49, 56, 33], 'label': 1}]
[{'comment': [175, 34, 206, 454, 116, 86, 84, 1909, 159], 'code': [67, 90, 3266, 16, 80, 150, 17, 35], 'label': 1}]
[{'comment': [3265, 

[{'comment': [3065, 76, 1029, 70, 121, 1495, 1185, 330, 3064, 75, 143, 142], 'code': [67, 81, 1532, 35], 'label': 0}]
[{'comment': [121, 34, 819, 64, 488, 75, 166, 184, 1464, 3063, 109, 763, 200], 'code': [3062, 16, 17, 15, 1454, 16, 84, 9, 236, 17, 33], 'label': 0}]
[{'comment': [1453, 34, 118, 124], 'code': [67, 90, 1452, 16, 197, 1451, 17, 35], 'label': 1}]
[{'comment': [175, 457, 244, 3061, 1450, 51, 844, 1200], 'code': [67, 90, 1945, 16, 3060, 1200, 17, 35], 'label': 1}]
[{'comment': [903, 504, 72, 34, 137, 693, 4, 382, 189, 149, 208, 51, 778, 34, 95, 1449, 1529, 757, 86, 179, 145, 89, 34, 322, 308, 587, 818], 'code': [104, 90, 3059, 16, 787, 47, 1074, 45, 735, 17, 35], 'label': 0}]
[{'comment': [3058, 103], 'code': [76, 16, 130, 88, 61, 430, 15, 640, 16, 17, 15, 1448, 16, 17, 17, 35], 'label': 0}]
[{'comment': [91, 200, 338, 34, 458, 3057, 0, 34, 167, 266], 'code': [104, 3056, 3055, 16, 17, 35], 'label': 0}]
[{'comment': [903, 504, 72, 34, 137, 158, 34, 817, 155, 149, 208, 51, 10

[{'comment': [91, 81, 630, 34, 1006, 70, 109, 88, 98, 686, 539, 1227, 51, 34, 155, 34, 88, 0, 34, 299, 70, 166, 88, 715, 98, 1415, 51, 82, 1414, 51, 573, 34, 607, 242, 72, 97, 64, 462, 232, 109, 219, 0, 34, 1006, 64, 341, 1022], 'code': [67, 81, 2948, 133, 745, 35], 'label': 0}]
[{'comment': [149, 1413, 89, 976, 195, 64, 102, 869, 51, 564, 195, 239, 72, 97, 141, 204, 401, 1619, 117, 2947, 297, 596, 2946, 243, 754], 'code': [153, 2945, 49, 16, 1253, 15, 1252, 16, 971, 9, 1192, 17, 2944, 39, 17, 33], 'label': 0}]
[{'comment': [98, 186, 936, 82, 102, 681, 75, 1412, 45, 106, 1250, 140, 1104, 86, 613, 1752, 147, 535, 1005, 2943], 'code': [72, 16, 154, 15, 156, 280, 110, 17, 35], 'label': 0}]
[{'comment': [376, 2942, 185, 195], 'code': [67, 130, 2941, 16, 17, 35], 'label': 1}]
[{'comment': [1453, 34, 196, 124, 72, 97, 64, 102, 772], 'code': [67, 90, 500, 16, 17, 35], 'label': 1}]
[{'comment': [2940, 387, 51, 203, 86, 249, 76, 34, 118, 88], 'code': [67, 283, 153, 1540, 16, 130, 88, 17, 33], '

[{'comment': [167, 304, 218, 2713, 76, 361, 110, 2712], 'code': [281, 15, 2711, 16, 17, 15, 768, 16], 'label': 0}]
[{'comment': [1249, 34, 129, 103, 0, 1824, 399, 116, 34, 129, 393, 326, 647, 2710, 341, 97, 64, 1021, 51, 34, 315, 91, 1612, 608, 82, 1039], 'code': [67, 81, 2709, 35], 'label': 0}]
[{'comment': [91, 81, 1066, 4, 125, 70, 411, 138, 89, 162, 82, 417, 51, 105, 143, 142, 98, 81, 338, 2708, 272, 51, 1714, 109, 159, 4, 731, 476, 0, 4, 279, 51, 164, 275, 918, 138, 70, 86, 125], 'code': [67, 81, 1488, 193, 370, 35], 'label': 0}]
[{'comment': [1569, 360, 141, 204, 1358, 525, 72, 173, 1357, 180, 468], 'code': [72, 16, 108, 827, 15, 623, 16, 17, 15, 929, 16, 17, 15, 276, 16, 17, 17, 35], 'label': 0}]
[{'comment': [429, 245, 97, 1356, 102, 318], 'code': [536, 16, 479, 2707, 17, 35], 'label': 1}]
[{'comment': [95, 212, 940, 519, 89, 812, 199, 138, 164, 115, 102, 417, 75, 180, 70, 34, 95, 185, 2706, 70, 34, 261, 95, 185, 1373, 91, 95, 212, 940, 2705, 101, 591, 70, 560, 95, 212, 940, 51

[{'comment': [140, 542, 383, 1921, 196, 383, 123, 1920, 383, 1180, 143, 142, 383, 932, 100, 1919], 'code': [69], 'label': 0}]
[{'comment': [98, 88, 321, 356], 'code': [104, 174, 47, 130, 45, 88, 49, 174, 15, 157, 16, 17, 33], 'label': 0}]
[{'comment': [617, 4, 2458, 151, 1732, 124, 34, 2457, 151, 64, 335, 972], 'code': [80, 132, 136, 2456, 49, 35, 901, 15, 2455, 16, 17, 9, 1325, 132, 56, 136, 9, 1325, 132, 39, 136, 69, 33], 'label': 0}]
[{'comment': [600, 2454, 419, 333, 0, 203, 414, 403, 2453, 473, 70, 414, 51, 380, 480, 336, 2452], 'code': [414, 15, 2451, 16, 1837, 16, 17, 17, 33], 'label': 0}]
[{'comment': [329, 367, 838, 34, 549, 64, 102, 123, 543, 145, 1024, 837, 86], 'code': [1592, 16, 218, 9, 176, 17, 33], 'label': 0}]
[{'comment': [344, 144, 51, 493, 89, 34, 1032, 1031, 51, 123, 4, 415, 217, 89, 64, 102, 375, 75, 34, 163, 353, 409, 439], 'code': [81, 1555, 133, 344, 35], 'label': 0}]
[{'comment': [2450, 4, 172, 2449, 231, 89, 2448, 1557, 232, 34, 118, 1324], 'code': [47, 73, 45

[{'comment': [175, 205, 150, 1107, 76, 196], 'code': [2220, 16, 17, 33], 'label': 1}]
[{'comment': [2219, 34, 821, 449, 117, 516, 117, 97, 158, 149, 208, 51, 1951, 4, 310, 51, 34, 2218, 381, 34, 339, 587, 1340, 1339], 'code': [2217, 49, 882, 15, 881, 16, 1518, 15, 2216, 16, 2215, 15, 2214, 16, 17, 9, 56, 17, 17], 'label': 0}]
[{'comment': [160, 179, 2213, 0, 179, 825, 145, 120, 100, 1336, 757, 2212, 158, 51, 34, 84], 'code': [2211, 16, 17, 33], 'label': 0}]
[{'comment': [72, 34, 155, 64, 102, 157, 0, 148, 84, 64, 743, 34, 155, 1298, 105, 109, 209, 0, 141, 51, 82, 1297, 1047, 45, 698, 117, 273], 'code': [86, 15, 2210, 49, 236, 33], 'label': 0}]
[{'comment': [2209, 89, 34, 384, 986, 115, 75, 1730, 116, 103], 'code': [1294, 2208, 49, 79, 1294, 16, 797, 17, 33], 'label': 0}]
[{'comment': [253, 157, 384, 986, 72, 340, 605, 64, 2207], 'code': [2206, 49, 79, 1000, 16, 17, 33], 'label': 0}]
[{'comment': [1483, 109, 2205, 2204, 34, 527, 51, 493, 34, 155, 141, 273], 'code': [420, 15, 1690, 16, 1

[{'comment': [429, 34, 322, 1048, 86, 124, 64, 681, 75, 143, 142, 684, 2120, 1158, 245, 2119, 86, 124, 72, 713], 'code': [67, 122, 323, 156, 33], 'label': 0}]
[{'comment': [440, 339, 186], 'code': [104, 122, 1053, 1608, 33], 'label': 0}]
[{'comment': [440, 339, 186], 'code': [67, 122, 2118, 1053, 2117, 16, 17, 35], 'label': 1}]
[{'comment': [841, 601], 'code': [67, 122, 90, 2116, 16, 17, 324, 2115, 35], 'label': 1}]
[{'comment': [682, 1588, 76, 1308, 369], 'code': [2114, 15, 1531, 15, 2113, 16, 17, 33], 'label': 0}]
[{'comment': [2112, 2111, 76, 2110, 138, 0, 2109, 2108], 'code': [67, 81, 1275, 35], 'label': 0}]
[{'comment': [1543, 2107, 89, 115, 199, 144, 75, 2106, 544, 91, 64, 51, 372, 34, 22, 2105, 845, 51, 22, 56, 2104, 2103, 147, 2102, 260, 168, 402, 51, 34, 87, 923, 101, 2101, 64, 1320], 'code': [67, 122, 90, 1946, 16, 17, 35], 'label': 0}]
[{'comment': [139, 367, 1379, 679, 51, 2100, 706], 'code': [67, 122, 131, 188, 2099, 49, 1274, 92, 2098, 33], 'label': 0}]
[{'comment': [2097

In [16]:
dataframe = pd.read_csv("csv/oneLineCode.csv", na_filter = False)
for i,data in dataframe.iterrows():
    s =  data["comment"]
    c = data["code"]
    print(i)
    print("comment:")
    print(s)
    print("code:")
    print(c)

0
comment:
@implNote taken from {@link com.sun.javafx.scene.control.behavior.TextAreaBehavior#contextMenuRequested(javafx.scene.input.ContextMenuEvent)}
code:
public static void showContextMenu(TextArea textArea, ContextMenu contextMenu, ContextMenuEvent e) {
1
comment:
icon.setToolTipText(printedViewModel.getLocalization());
code:
TABLE_ICONS.put(SpecialField.PRINTED, icon);
2
comment:
Synchronize changes of the underlying date value with the temporalAccessorValue
code:
BindingsHelper.bindBidirectional(valueProperty(), temporalAccessorValue,
3
comment:
Ask if the user really wants to close the given database
code:
private boolean confirmClose(BasePanel panel) {
4
comment:
css: information *
code:
INTEGRITY_INFO(MaterialDesignIcon.INFORMATION),
5
comment:
Check if there is a default type with the same name. If so, this is a modification of that type, so remove the default one:
code:
ExternalFileType toRemove = null;
6
comment:
each entry type
code:
for (Map.Entry<EntryType, TextField> 

250
comment:
paranoia: ensure that there are never two instances of AllEntriesGroup
code:
if (newGroups.getGroup() instanceof AllEntriesGroup) {
251
comment:
Create button that triggers search
code:
Button search = new Button(Localization.lang("Search"));
252
comment:
Activate context menu if user presses the "context menu" key
code:
tableView.addEventHandler(KeyEvent.KEY_RELEASED, event -> {
253
comment:

code:
public static <V> V runInJavaFXThread(Callable<V> callable) {
254
comment:
Add the edit field for Bibtex-key.
code:
fields.add(InternalField.KEY_FIELD);
255
comment:
Remove all reference marks that don't look like JabRef citations:
code:
List<String> tmp = new ArrayList<>();
256
comment:
Although entries are redefined without use, this also updates linkSourceBase
code:
Map<BibEntry, BibDatabase> entries = findCitedEntries(databases, cited, linkSourceBase);
257
comment:
for the remaining entries, intersection has to be used this approach ensures that one empty keyword list leads

378
comment:
Process arguments
code:
ArgumentProcessor argumentProcessor = new ArgumentProcessor(arguments, ArgumentProcessor.Mode.INITIAL_START);
379
comment:
Nothing set, so we use the default values:
code:
while (preferences.get(JabRefPreferences.CUSTOM_TAB_NAME + "_def" + i) != null) {
380
comment:
Run the actual open in a thread to prevent the program locking until the file is loaded.
code:
BackgroundTask.wrap(() -> openIt(file, dialog.importEntries(), dialog.importStrings(), dialog.importGroups(), dialog.importSelectorWords()))
381
comment:
It could be that somehow the path is null, for example if it got deleted in the meantime
code:
if (directory == null) {
382
comment:
extract url parameter
code:
String[] pairs = query.split("&");
383
comment:
Collect all callables to execute in one collection.
code:
for (BibEntry entry : entries) {
384
comment:
TODO: Add undo Store undo information. AbstractUndoableEdit undoAddPreviousEntries = null; UndoableModifyGroup undo = new UndoableModi

TABLE_ICONS.put(StandardField.PDF, icon);
575
comment:
First get a list of the default file types as a starting point:
code:
List<ExternalFileType> types = new ArrayList<>(getDefaultExternalFileTypes());
576
comment:
The name to change to is already in the database, so we can't comply.
code:
LOGGER.info("Cannot rename string '" + mem + "' to '" + disk + "' because the name "
577
comment:
Look up the external file type registered with this name, if any.
code:
public Optional<ExternalFileType> getExternalFileTypeByName(String name) {
578
comment:
TODO: Add undo/redo ce.addEdit(new UndoableInsertEntry(panel.getDatabase(), entry));
code:

579
comment:
The action concerned with generate a new (sub-)database from latex AUX file.
code:
public class NewSubLibraryAction extends SimpleCommand {
580
comment:
Starts the search of unlinked files according chosen directory and the file type selection. The search will process in a separate thread and a progress indicator will be displayed.
code:
priv

public <T> BackgroundTask<T> thenRun(Function<V, T> nextOperation) {
775
comment:
This action checks whether any new custom entry types were loaded from this BIB file. If so, an offer to remember these entry types is given.
code:
public class CheckForNewEntryTypesAction implements GUIPostOpenAction {
776
comment:

code:
private void openTheFile(Path file, boolean raisePanel) {
777
comment:
Other fields
code:
entryEditorTabs.add(new OtherFieldsTab(databaseContext, panel.getSuggestionProviders(), undoManager,
778
comment:
Register listener The wrapper created by the FXCollections will set a weak listener on the wrapped list. This weak listener gets garbage collected. Hence, we need to maintain a reference to this list.
code:
entriesList = databaseContext.getDatabase().getEntries();
779
comment:
Try to download fulltext PDF for selected entry(ies) by following URL or DOI link.
code:
public class FindFullTextAction extends SimpleCommand {
780
comment:
Editor for external file types.
code:


code:
public static JabRefPreferences prefs;
975
comment:
Key binding preferences
code:
private static KeyBindingRepository keyBindingRepository;
976
comment:
Key binding preferences
code:
public static synchronized KeyBindingRepository getKeyPrefs() {
977
comment:
Background tasks
code:
public static void startBackgroundTasks() throws JabRefException {
978
comment:
FIXME: Workaround for bug https://github.com/Microsoft/ApplicationInsights-Java/issues/662
code:
SDKShutdownActivity.INSTANCE.stopAll();
979
comment:
Static variables for graphics files and keyboard shortcuts.
code:
public class GUIGlobals {
980
comment:
Perform initializations that are only used in graphical mode. This is to prevent the "Xlib: connection to ":0.0" refused by server" error when access to the X server on Un*x is unavailable.
code:
public static void init() {
981
comment:
add some additional space to improve appearance
code:
public static final int WIDTH_ICON_COL = 16 + 12;
982
comment:
Width of Ranking Icon 

In [54]:
delimiters = "#", ".",",","<b>","</b>","-",":","<br>","_","?"," ",";"
example = "stacko#verFlow (c) is a=Wesome... isn't it? D/?DD"
def splitText(delimiters, string, maxsplit=0):
    string.replace()
    import re
    regexPattern = '|'.join(map(re.escape, delimiters))
#     print(regexPattern)
    result =  re.split(regexPattern, string, maxsplit)
    f = []
    for r in result:
        p =  re.sub(r'((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))', r' \1', r)
#         print(p)
        for q in p.split():
            if q != "":
                f.append(q)
    return f
splitText(delimiters,example)

['stacko',
 'ver',
 'Flow',
 '(c)',
 'is',
 'a=',
 'Wesome',
 "isn't",
 'it',
 'D/',
 'DD']

In [60]:
tokens = javalang.tokenizer.tokenize('System.out.println("Hello " + "world");')
parser = javalang.parser.Parser(tokens)
parser.parse_expression()

MethodInvocation(arguments=[BinaryOperation(operandl=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value="Hello "), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value="world"), operator=+)], member=println, postfix_operators=[], prefix_operators=[], qualifier=System.out, selectors=[], type_arguments=None)

In [73]:
f = "This class monitors a set of files for changes. Upon detecting a change it notifies the registered {@link FileUpdateListener}s. Implementation based on https://stackoverflow.com/questions/16251273/can-i-watch-for-single-file-change-with-watchservice-not-the-whole-directory"
splitComment(f)

https://stackoverflow.com/questions/16251273/can-i-watch-for-single-file-change-with-watchservice-not-the-whole-directory


['This',
 'class',
 'monitors',
 'a',
 'set',
 'of',
 'files',
 'for',
 'changes',
 'Upon',
 'detecting',
 'a',
 'change',
 'it',
 'notifies',
 'the',
 'registered',
 '{@link',
 'File',
 'Update',
 'Listener}s',
 'Implementation',
 'based',
 'on',
 'https']