In [None]:
# default_exp mining.ir

# Information Retrieval and Traceability Interfaces
> Implementing Common Information Retrieval Interfaces
> Author: @danaderp December 2020

We test diferent similarities based on [blog](https://www.kdnuggets.com/2017/08/comparing-distance-measurements-python-scipy.html) and [blog2](https://www.kdnuggets.com/2019/01/comparison-text-distance-metrics.html)

In [None]:
# ! pip install -e . <----- Install in the console

In [None]:
#export
import numpy as np
import gensim
import pandas as pd
from itertools import product 
from random import sample 
import functools 
import os

In [None]:
#export
from gensim.models import WordEmbeddingSimilarityIndex
from gensim.similarities import SparseTermSimilarityMatrix
from gensim import corpora
from datetime import datetime
from enum import Enum, unique, auto
from ds4se.mgmnt.prep.conv import *

In [None]:
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cosine.html
#export
from scipy.spatial import distance
from scipy.stats import pearsonr

In [None]:
#export
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

## Enums

In [None]:
#export
#@unique
class VectorizationType(Enum):
    word2vec = auto()
    doc2vec = auto()
    vsm2vec = auto()

In [None]:
VectorizationType.word2vec

<VectorizationType.word2vec: 1>

In [None]:
#export
#@unique
class DistanceMetric(Enum):
    WMD = auto()
    COS = auto()
    SCM = auto()
    EUC = auto()
    MAN = auto()

In [None]:
#export
#@unique
class SimilarityMetric(Enum):
    WMD_sim = auto()
    COS_sim = auto()
    SCM_sim = auto()
    EUC_sim = auto()
    MAN_sim = auto()
    Pearson = auto()

In [None]:
#export
class EntropyMetric(Enum):
    MSI_I = auto() #Minimum shared information Entropy
    MSI_X = auto() #Minimum shared information Extropy
    MI = auto() #Mutual information
    JI = auto() #Joint information
    Loss = auto() #Conditioned Entropy given the output I(x|y)
    Noise = auto() #Conditioned Entropy given the input I(y|x)
    Entropy_src = auto() #Self  Information src artifacts
    Entropy_tgt = auto() #Self Information target artifacts

In [None]:
#export
class SoftwareArtifacts(Enum):
    REQ = 'req'
    TC = 'tc'
    SRC = 'src'
    PY = 'py'
    PR = 'pr'

In [None]:
#export
#@unique
class Preprocessing(Enum):
    conv = auto()
    bpe = auto()

In [None]:
#export
#@unique
class LinkType(Enum):
    req2tc = auto()
    req2src = auto()
    issue2src = auto()
    pr2src = auto()

In [None]:
#tst
LinkType.req2tc

<LinkType.req2tc: 1>

In [None]:
#tst
Preprocessing.bpe

<Preprocessing.bpe: 2>

## 1. Setting-Up Testing Environment

In [None]:
#hide
path_data = '../dvc-ds4se/' #dataset path

In [None]:
#hide
#experiment 0.0.0
#check it out in https://docs.google.com/spreadsheets/d/1UggaKFK8Qr5YltG_X9dN9BUlgH-GNiAbfPkcqSxjyoo/edit?usp=sharing
path_to_trained_model = path_data+'models/wv/bpe8k/[word2vec-Java-Py-SK-500-20E-8k-1594090297.869643].model'
path_model_prefix = path_data+'models/bpe/sentencepiece/wiki_py_java_bpe_8k'

In [None]:
#hide
#experiment 0.0.0
#Experiment 1 with Libest Conv preprocessing
def libest_params():
        return {
        "vectorizationType": VectorizationType.word2vec,
        "linkType": LinkType.req2tc,
        "system": 'libest',
        "path_to_trained_model": path_to_trained_model,
        "source_type": SoftwareArtifacts.REQ.value,
        "target_type": SoftwareArtifacts.TC.value,
        "system_path_config": {
            "system_path": path_data + 'se-benchmarking/traceability/cisco/libest_data/[libest-all-corpus-1596063103.098236].csv',
            "sep": '~',
            "names": ['ids','conv'],
            "prep": Preprocessing.conv
        },
        "saving_path": path_data + 'metrics/traceability/experiments0.0.x/',
        "names": ['Source','Target','Linked?'],
        "model_prefix":path_model_prefix, #For BPE Analysis
        "path_mappings": path_data + 'se-benchmarking/traceability/testbeds/groundtruth/english/[libest-ground-req-to-tc].txt',    
    }

In [None]:
#hide
parameters = libest_params()
parameters

{'vectorizationType': <VectorizationType.word2vec: 1>,
 'linkType': <LinkType.req2tc: 1>,
 'system': 'libest',
 'path_to_trained_model': '../dvc-ds4se/models/wv/bpe8k/[word2vec-Java-Py-SK-500-20E-8k-1594090297.869643].model',
 'source_type': 'req',
 'target_type': 'tc',
 'system_path_config': {'system_path': '../dvc-ds4se/se-benchmarking/traceability/cisco/libest_data/[libest-all-corpus-1596063103.098236].csv',
  'sep': '~',
  'names': ['ids', 'conv'],
  'prep': <Preprocessing.conv: 1>},
 'saving_path': '../dvc-ds4se/metrics/traceability/experiments0.0.x/',
 'names': ['Source', 'Target', 'Linked?'],
 'model_prefix': '../dvc-ds4se/models/bpe/sentencepiece/wiki_py_java_bpe_8k',
 'path_mappings': '../dvc-ds4se/se-benchmarking/traceability/testbeds/groundtruth/english/[libest-ground-req-to-tc].txt'}

In [None]:
parameters['source_type']

'req'

In [None]:
#tst
parameters['system_path_config']['system_path']

'../dvc-ds4se/se-benchmarking/traceability/cisco/libest_data/[libest-all-corpus-1596063103.098236].csv'

In [None]:
#tst
parameters['system_path_config']['names'][1]

'conv'

In [None]:
parameters['system_path_config']['sep'] #tst

'~'

In [None]:
#hide
df_all_system = pd.read_csv(
            parameters['system_path_config']['system_path'], 
            #names = params['system_path_config']['names'], #include the names into the files!!!
            header = 0, 
            index_col = 0, 
            sep = parameters['system_path_config']['sep'] 
        )

In [None]:
df_all_system.head(1)

Unnamed: 0,ids,filenames,text,type,conv,bpe128k,bpe32k,bpe8k
0,test_data/LibEST_semeru_format/test/us903.c,us903.c,/*--------------------------------------------...,tc,unit test user stori server simpl enrol august...,"['▁/*', '----------------', '----------------'...","['▁/', '*', '--------', '--------', '--------'...","['▁/', '*', '-', '-', '-', '-', '-', '-', '-',..."


In [None]:
#hide
tag = parameters['system_path_config']['names'][1]
[doc.split() for doc in df_all_system[df_all_system[tag].notnull()][tag].values]

[['unit',
  'test',
  'user',
  'stori',
  'server',
  'simpl',
  'enrol',
  'august',
  'copyright',
  'cisco',
  'system',
  'inc',
  'right',
  'reserv',
  'includ',
  'stdio',
  'ifndef',
  'win',
  'includ',
  'unistd',
  'endif',
  'includ',
  'est',
  'includ',
  'curl',
  'curl',
  'includ',
  'curl',
  'util',
  'includ',
  'test',
  'util',
  'includ',
  'server',
  'includ',
  'openssl',
  'ssl',
  'ifdef',
  'cunit',
  'includ',
  'cunit',
  'basic',
  'includ',
  'cunit',
  'autom',
  'endif',
  'ifndef',
  'win',
  'static',
  'char',
  'test',
  'outfil',
  'filenam',
  'max',
  'test',
  'hdr',
  'defin',
  'cacert',
  'est',
  'cacert',
  'crt',
  'defin',
  'explicit_cert',
  'us903',
  'cert',
  'pem',
  'defin',
  'us903_explicit_key',
  'us903',
  'key',
  'pem',
  'defin',
  'us903_cacert',
  'est',
  'cacert',
  'crt',
  'defin',
  'us903_trusted_cert',
  'trustedcert',
  'crt',
  'defin',
  'est',
  'privat',
  'estservercertandkey',
  'pem',
  'els',
  'static'

In [None]:
len(df_all_system[tag].values) #tst

87

In [None]:
#tst
len(df_all_system[df_all_system[tag].notnull()]) #some files are _init_ thefore are empty

87

In [None]:
#tst
df_all_system[df_all_system[tag].notnull()][tag].values

array(['unit test user stori server simpl enrol august copyright cisco system inc right reserv includ stdio ifndef win includ unistd endif includ est includ curl curl includ curl util includ test util includ server includ openssl ssl ifdef cunit includ cunit basic includ cunit autom endif ifndef win static char test outfil filenam max test hdr defin cacert est cacert crt defin explicit_cert us903 cert pem defin us903_explicit_key us903 key pem defin us903_cacert est cacert crt defin us903_trusted_cert trustedcert crt defin est privat estservercertandkey pem els static char test5_outfil filename_max us903 test5 hdr defin us903_cacert est cacert crt defin us903_explicit_cert us903 cert pem defin us903_explicit_key us903 key pem defin us903_cacert est cacert crt defin us903_trusted_cert trustedcert crt defin est privat estservercertandkey pem endif static unsign char cacert null static int cacerts_len defin us903_retry_interv 3600 defin us903_tcp_port 29001 follow csr generat use follow o

In [None]:
#tst
df_all_system.loc[df_all_system['type'] == parameters['source_type']][parameters['system_path_config']['names']]

Unnamed: 0,ids,conv
35,test_data/LibEST_semeru_format/requirements/RQ...,requir http uri control est server must suppor...
36,test_data/LibEST_semeru_format/requirements/RQ...,requir server side key generat respons request...
37,test_data/LibEST_semeru_format/requirements/RQ...,requir http base client authent est server may...
38,test_data/LibEST_semeru_format/requirements/RQ...,requir csr attribut request est client request...
39,test_data/LibEST_semeru_format/requirements/RQ...,requir server side key generat est client may ...
40,test_data/LibEST_semeru_format/requirements/RQ...,requir client author decis issu certif client ...
41,test_data/LibEST_semeru_format/requirements/RQ...,requir csr attribut polici may allow inclus cl...
42,test_data/LibEST_semeru_format/requirements/RQ...,requir simpl enrol client https post simpleenr...
43,test_data/LibEST_semeru_format/requirements/RQ...,requir csr attribut follow exampl valid csratt...
44,test_data/LibEST_semeru_format/requirements/RQ...,requir http layer http use transfer est messag...


In [None]:
df_all_system.loc[df_all_system['type'] == parameters['target_type']][parameters['system_path_config']['names']]

Unnamed: 0,ids,conv
0,test_data/LibEST_semeru_format/test/us903.c,unit test user stori server simpl enrol august...
1,test_data/LibEST_semeru_format/test/us3496.c,unit test uri path segment extens support marc...
2,test_data/LibEST_semeru_format/test/us899.c,unit test user stori client simpl enrol septem...
3,test_data/LibEST_semeru_format/test/us4020.c,unit test user stori unit test client proxi mo...
4,test_data/LibEST_semeru_format/test/us897.c,unit test user stori client cacert june copyri...
5,test_data/LibEST_semeru_format/test/us1060.c,unit test user stori tls srp support server pr...
6,test_data/LibEST_semeru_format/test/us900.c,unit test user stori server csr attribut novem...
7,test_data/LibEST_semeru_format/test/us896.c,unit test user stori client csr attribut novem...
8,test_data/LibEST_semeru_format/test/us894.c,unit test user stori proxi cacert novemb copyr...
9,test_data/LibEST_semeru_format/test/us1005.c,unit test user stori client easi provis novemb...


## 1. Defining BasicSequenceVectorization

In [None]:
#tst
print(list(VectorizationType), list(DistanceMetric), list(SimilarityMetric), list(LinkType))

[<VectorizationType.word2vec: 1>, <VectorizationType.doc2vec: 2>, <VectorizationType.vsm2vec: 3>] [<DistanceMetric.WMD: 1>, <DistanceMetric.COS: 2>, <DistanceMetric.SCM: 3>, <DistanceMetric.EUC: 4>, <DistanceMetric.MAN: 5>] [<SimilarityMetric.WMD_sim: 1>, <SimilarityMetric.COS_sim: 2>, <SimilarityMetric.SCM_sim: 3>, <SimilarityMetric.EUC_sim: 4>, <SimilarityMetric.MAN_sim: 5>, <SimilarityMetric.Pearson: 6>] [<LinkType.req2tc: 1>, <LinkType.req2src: 2>, <LinkType.issue2src: 3>, <LinkType.pr2src: 4>]


In [None]:
#export
class BasicSequenceVectorization():
    '''Implementation of the class sequence-vanilla-vectorization other classes can inheritance this one'''
    def __init__(self, params):
                
        self.params = params
        self.df_nonground_link = None
        self.df_ground_link = None
        self.prep = ConventionalPreprocessing(self.params, bpe = True)
        
        self.df_all_system = pd.read_csv(
            self.params['system_path_config']['system_path'], 
            #names = params['system_path_config']['names'], #include the names into the files!!!
            header = 0, 
            index_col = 0, 
            sep = self.params['system_path_config']['sep'] 
        )
        
        #self.df_source = pd.read_csv(params['source_path'], names=['ids', 'text'], header=None, sep=' ')
        #self.df_target = pd.read_csv(params['target_path'], names=['ids', 'text'], header=None, sep=' ')
        self.df_source = self.df_all_system.loc[self.df_all_system['type'] == self.params['source_type']][self.params['system_path_config']['names']]
        self.df_target = self.df_all_system.loc[self.df_all_system['type'] == self.params['target_type']][self.params['system_path_config']['names']]
        
        #NA verification
        tag = self.params['system_path_config']['names'][1]
        self.df_source[tag] = self.df_source[tag].fillna("")
        self.df_target[tag] = self.df_target[tag].fillna("")
        
        ## self.document and self.dictionary is the vocabulary of the traceability corpus
        ## Do not confuse it with the dictionary of the general vectorization model
        if self.params['system_path_config']['prep'] == Preprocessing.conv: #if conventional preprocessing
            self.documents = [doc.split() for doc in self.df_all_system[self.df_all_system[tag].notnull()][tag].values] #Preparing Corpus
            self.dictionary = corpora.Dictionary( self.documents ) #Preparing Dictionary
            self.vocab = dict.fromkeys( self.dictionary.token2id.keys(),0 )
            logging.info("conventional preprocessing documents, dictionary, and vocab for the test corpus")
        
        elif self.params['system_path_config']['prep'] == Preprocessing.bpe:
            self.documents = [eval(doc) for doc in self.df_all_system[tag].values] #Preparing Corpus
            self.dictionary = corpora.Dictionary( self.documents ) #Preparing Dictionary
            self.computing_bpe_vocab()
            logging.info("bpe preprocessing documents, dictionary, and vocab for the test corpus")
        
        
        #This can be extended for future metrics <---------------------
        #TODO include mutual and join information
        self.dict_labels = {
            DistanceMetric.COS:[DistanceMetric.COS, SimilarityMetric.COS_sim],
            SimilarityMetric.Pearson:[SimilarityMetric.Pearson],
            DistanceMetric.EUC:[DistanceMetric.EUC, SimilarityMetric.EUC_sim],
            DistanceMetric.WMD:[DistanceMetric.WMD, SimilarityMetric.WMD_sim],
            DistanceMetric.SCM:[DistanceMetric.SCM, SimilarityMetric.SCM_sim],
            DistanceMetric.MAN:[DistanceMetric.MAN, SimilarityMetric.MAN_sim],
            EntropyMetric.MSI_I:[EntropyMetric.MSI_I, EntropyMetric.MSI_X],
            EntropyMetric.MI:[EntropyMetric.Entropy_src, EntropyMetric.Entropy_tgt,
                              EntropyMetric.JI, EntropyMetric.MI,
                              EntropyMetric.Loss, EntropyMetric.Noise
                             ]
        }

    def computing_bpe_vocab(self):
        ####INFO science params
        #TODO generalize bpe8k parameter
        abstracted_vocab = [ set(doc) for doc in self.df_all_system[ 'bpe8k' ].values] #creation of sets
        abstracted_vocab = functools.reduce( lambda a,b : a.union(b), abstracted_vocab ) #union of sets
        self.vocab = {self.prep.sp_bpe.id_to_piece(id): 0 for id in range(self.prep.sp_bpe.get_piece_size())}
        dict_abs_vocab = { elem : 0 for elem in abstracted_vocab - set(self.vocab.keys()) } #Ignored vocab by BPE
        self.vocab.update(dict_abs_vocab) #Updating
    
    def ground_truth_processing(self, path_to_ground_truth = '', from_mappings = False):
        'Optional class when corpus has ground truth. This function create tuples of links'
        
        if from_mappings:
            df_mapping = pd.read_csv(self.params['path_mappings'], header = 0, sep = ',')
            ground_links = list(zip(df_mapping['id_pr'].astype(str), df_mapping['doc_id']))
        else:
            ground_truth = open(path_to_ground_truth,'r')
            #Organizing The Ground Truth under the given format
            ground_links = [ [(line.strip().split()[0], elem) for elem in line.strip().split()[1:]] for line in ground_truth]
            ground_links = functools.reduce(lambda a,b : a+b,ground_links) #reducing into one list
            assert len(ground_links) ==  len(set(ground_links)) #To Verify Redundancies in the file
        return ground_links
    
    def samplingLinks(self, sampling = False, samples = 10, basename = False):
        
        if basename:
            source = [os.path.basename(elem) for elem in self.df_source['ids'].values ] 
            target = [os.path.basename(elem) for elem in self.df_target['ids'].values ]
        else:
            source = self.df_source['ids'].values
            target = self.df_target['ids'].values

        if sampling:
            links = sample( list( product( source , target ) ), samples)
        else:
            links = list( product( source , target ))

        return links
    
    def cos_scipy(self, vector_v, vector_w):
        cos =  distance.cosine( vector_v, vector_w )
        return [cos, 1.-cos]
    
    def euclidean_scipy(self, vector_v, vector_w):
        dst = distance.euclidean(vector_v,vector_w)
        return [dst, 1./(1.+dst)] #Computing the inverse for similarity
    
    def manhattan_scipy(self, vector_v, vector_w):
        dst = distance.cityblock(vector_v,vector_w)
        n = len(vector_v)
        return [dst, 1./(1.+dst)] #Computing the inverse for similarity
    
    def pearson_abs_scipy(self, vector_v, vector_w):
        '''We are not sure that pearson correlation works well on doc2vec inference vectors'''
        #vector_v =  np.asarray(vector_v, dtype=np.float32)
        #vector_w =  np.asarray(vector_w, dtype=np.float32)
        #logging.info("pearson_abs_scipy"  + 'len: ' + str(len(vector_v)) + 'type: ' + str(type(vector_v)) )
        #logging.info("pearson_abs_scipy"  + 'len: ' + str(len(vector_w)) + 'type: ' + str(type(vector_w)) )
        corr, _ = pearsonr(vector_v, vector_w)
        return [abs(corr)] #Absolute value of the correlation
    

    def computeDistanceMetric(self, links, metric_list):
        '''Metric List Iteration''' 
        
        metric_labels = [ self.dict_labels[metric] for metric in metric_list] #tracking of the labels
        distSim = [[link[0], link[1], self.distance( metric_list, link )] for link in links] #Return the link with metrics
        distSim = [[elem[0], elem[1]] + elem[2] for elem in distSim] #Return the link with metrics
        
        return distSim, functools.reduce(lambda a,b : a+b, metric_labels)
    
    def ComputeDistanceArtifacts(self, metric_list, sampling = False , samples = 10, basename = False):
        '''Activates Distance and Similarity Computations
        @metric_list if [] then Computes All metrics
        @sampling is False by the default
        @samples is the number of samples (or links) to be generated'''
        links_ = self.samplingLinks( sampling, samples, basename )
        
        docs, metric_labels = self.computeDistanceMetric( metric_list=metric_list, links=links_) #checkpoints
        self.df_nonground_link = pd.DataFrame(docs, columns =[self.params['names'][0], self.params['names'][1]]+ metric_labels) #Transforming into a Pandas
        logging.info("Non-groundtruth links computed")
        pass 
    
    
    def SaveLinks(self, grtruth=False, sep=' ', mode='a'):
        timestamp = datetime.timestamp(datetime.now())
        path_to_link = self.params['saving_path'] + '['+ self.params['system'] + '-' + str(self.params['vectorizationType']) + '-' + str(self.params['linkType']) + '-' + str(grtruth) + '-{}].csv'.format(timestamp)
        
        if grtruth:
            self.df_ground_link.to_csv(path_to_link, header=True, index=True, sep=sep, mode=mode)
        else:
            self.df_nonground_link.to_csv(path_to_link, header=True, index=True, sep=sep, mode=mode)
        
        logging.info('Saving in...' + path_to_link)
        pass
    
    def findDistInDF(self, g_tuple, from_mappings=False, semeru_format=False):
        '''Return the index values of the matched mappings
        .eq is used for Source since it must match the exact code to avoid number substrings
        for the target, the substring might works fine'''

        if from_mappings: #SACP Format
            dist = self.df_ground_link.loc[(self.df_ground_link["Source"].eq(g_tuple[0]) ) & 
                 (self.df_ground_link["Target"].str.contains(g_tuple[1], regex=False))]
            logging.info('findDistInDF: from_mappings')
        elif semeru_format: #LibEST Format
            dist = self.df_ground_link.loc[(self.df_ground_link["Source"].str.contains(g_tuple[0], regex=False) ) & 
                 (self.df_ground_link["Target"].str.contains(g_tuple[1], regex=False))]
            logging.info('findDistInDF: semeru_format')
        else: #By Default use Semeru Format
            dist = self.df_ground_link[self.df_ground_link[self.params['names'][0]].str.contains( g_tuple[0][:g_tuple[0].find('.')] + '-' ) 
                     & self.df_ground_link[self.params['names'][1]].str.contains(g_tuple[1][:g_tuple[1].find('.')]) ]
            logging.info('findDistInDF: default')
        return dist.index.values
    
        
    def MatchWithGroundTruth(self, path_to_ground_truth='', from_mappings=False, semeru_format=False ):
        self.df_ground_link = self.df_nonground_link.copy()
        self.df_ground_link[self.params['names'][2]] = 0
        
        matchGT = [ self.findDistInDF( g , from_mappings=from_mappings, semeru_format=semeru_format ) for g in self.ground_truth_processing(path_to_ground_truth,from_mappings)]
        matchGT = functools.reduce(lambda a,b : np.concatenate([a,b]), matchGT) #Concatenate indexes
        new_column = pd.Series(np.full([len(matchGT)], 1 ), name=self.params['names'][2], index = matchGT)
        
        self.df_ground_link.update(new_column)
        logging.info("Groundtruth links computed")
        pass

### Testing BasicSequenceVectorization

In [None]:
general2vec =  BasicSequenceVectorization(params = parameters)

2020-12-19 00:51:51,902 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:51:51,959 : INFO : built Dictionary(6957 unique tokens: ['");', '"../../', '("\\', '();', ')))']...) from 87 documents (total 88944 corpus positions)
2020-12-19 00:51:51,961 : INFO : conventional preprocessing documents, dictionary, and vocab for the test corpus


In [None]:
m = dict.fromkeys( general2vec.dictionary.token2id.keys(),0 ) #From traceability dataset!

In [None]:
n = general2vec.vocab

In [None]:
len(set(m.keys()) - set(n.keys())) #TODO

0

In [None]:
assert len(set( m.keys()) - set(n.keys())) == 0 

In [None]:
general2vec.documents

[['unit',
  'test',
  'user',
  'stori',
  'server',
  'simpl',
  'enrol',
  'august',
  'copyright',
  'cisco',
  'system',
  'inc',
  'right',
  'reserv',
  'includ',
  'stdio',
  'ifndef',
  'win',
  'includ',
  'unistd',
  'endif',
  'includ',
  'est',
  'includ',
  'curl',
  'curl',
  'includ',
  'curl',
  'util',
  'includ',
  'test',
  'util',
  'includ',
  'server',
  'includ',
  'openssl',
  'ssl',
  'ifdef',
  'cunit',
  'includ',
  'cunit',
  'basic',
  'includ',
  'cunit',
  'autom',
  'endif',
  'ifndef',
  'win',
  'static',
  'char',
  'test',
  'outfil',
  'filenam',
  'max',
  'test',
  'hdr',
  'defin',
  'cacert',
  'est',
  'cacert',
  'crt',
  'defin',
  'explicit_cert',
  'us903',
  'cert',
  'pem',
  'defin',
  'us903_explicit_key',
  'us903',
  'key',
  'pem',
  'defin',
  'us903_cacert',
  'est',
  'cacert',
  'crt',
  'defin',
  'us903_trusted_cert',
  'trustedcert',
  'crt',
  'defin',
  'est',
  'privat',
  'estservercertandkey',
  'pem',
  'els',
  'static'

In [None]:
len(general2vec.dictionary)

6957

In [None]:
general2vec.dictionary

<gensim.corpora.dictionary.Dictionary at 0x7f048075cb70>

In [None]:
general2vec.df_all_system.head(1)

Unnamed: 0,ids,filenames,text,type,conv,bpe128k,bpe32k,bpe8k
0,test_data/LibEST_semeru_format/test/us903.c,us903.c,/*--------------------------------------------...,tc,unit test user stori server simpl enrol august...,"['▁/*', '----------------', '----------------'...","['▁/', '*', '--------', '--------', '--------'...","['▁/', '*', '-', '-', '-', '-', '-', '-', '-',..."


In [None]:
general2vec.df_all_system.shape #data final tensor

(87, 8)

In [None]:
#tst for libest
path_to_ground_truth = parameters['path_mappings']
general2vec.ground_truth_processing(path_to_ground_truth)

[('RQ4.txt', 'us1864.c'),
 ('RQ4.txt', 'us901.c'),
 ('RQ4.txt', 'us1005.c'),
 ('RQ4.txt', 'us3512.c'),
 ('RQ4.txt', 'us895.c'),
 ('RQ4.txt', 'us897.c'),
 ('RQ4.txt', 'us900.c'),
 ('RQ6.txt', 'us1005.c'),
 ('RQ6.txt', 'us1159.c'),
 ('RQ6.txt', 'us3496.c'),
 ('RQ6.txt', 'us3512.c'),
 ('RQ6.txt', 'us3612.c'),
 ('RQ6.txt', 'us4020.c'),
 ('RQ6.txt', 'us748.c'),
 ('RQ6.txt', 'us893.c'),
 ('RQ6.txt', 'us895.c'),
 ('RQ6.txt', 'us896.c'),
 ('RQ6.txt', 'us897.c'),
 ('RQ6.txt', 'us898.c'),
 ('RQ6.txt', 'us899.c'),
 ('RQ6.txt', 'us900.c'),
 ('RQ8.txt', 'us1005.c'),
 ('RQ8.txt', 'us1159.c'),
 ('RQ8.txt', 'us1883.c'),
 ('RQ8.txt', 'us2174.c'),
 ('RQ8.txt', 'us3496.c'),
 ('RQ8.txt', 'us3512.c'),
 ('RQ8.txt', 'us3612.c'),
 ('RQ8.txt', 'us4020.c'),
 ('RQ8.txt', 'us748.c'),
 ('RQ8.txt', 'us893.c'),
 ('RQ8.txt', 'us895.c'),
 ('RQ8.txt', 'us896.c'),
 ('RQ8.txt', 'us897.c'),
 ('RQ8.txt', 'us898.c'),
 ('RQ8.txt', 'us899.c'),
 ('RQ8.txt', 'us900.c'),
 ('RQ11.txt', 'us1159.c'),
 ('RQ11.txt', 'us1883.c'),
 ('R

In [None]:
#tst for sacp <----- Warning!
#general2vec.ground_truth_processing(parameters['path_mappings'], from_mappings = True)

## 2. Artifacts Similarity with Word2Vec

In [None]:
#export
from collections import Counter
import dit
import math

In [None]:
#export
class Word2VecSeqVect(BasicSequenceVectorization):       
    
    def __init__(self, params):
        super().__init__(params)
        self.new_model = gensim.models.Word2Vec.load( params['path_to_trained_model'] )
        self.new_model.init_sims(replace=True)  # Normalizes the vectors in the word2vec class.
        #Computes cosine similarities between word embeddings and retrieves the closest 
        #word embeddings by cosine similarity for a given word embedding.
        self.similarity_index = WordEmbeddingSimilarityIndex(self.new_model.wv)
        #Build a term similarity matrix and compute the Soft Cosine Measure.
        self.similarity_matrix = SparseTermSimilarityMatrix(self.similarity_index, self.dictionary)
        
        self.dict_distance_dispatcher = {
            DistanceMetric.COS: self.cos_scipy,
            SimilarityMetric.Pearson: self.pearson_abs_scipy,
            DistanceMetric.WMD: self.wmd_gensim,
            DistanceMetric.SCM: self.scm_gensim,
            EntropyMetric.MSI_I: self.msi,
            EntropyMetric.MI: self.mutual_info
        }
    
    def wmd_gensim(self, sentence_a, sentence_b ):
        wmd = self.new_model.wv.wmdistance(sentence_a, sentence_b)
        return [wmd, self.wmd_similarity(wmd)]
    
    def wmd_similarity(self, dist):
        return 1./( 1.+float( dist ) ) #Associated Similarity
    
    def scm_gensim(self, sentence_a, sentence_b ):
        '''Compute SoftCosine Similarity of Gensim'''
        #Convert the sentences into bag-of-words vectors.
        sentence_1 = self.dictionary.doc2bow(sentence_a)
        sentence_2 = self.dictionary.doc2bow(sentence_b)
        
        #Return the inner product(s) between real vectors / corpora vec1 and vec2 expressed in a non-orthogonal normalized basis,
        #where the dot product between the basis vectors is given by the sparse term similarity matrix.
        scm_similarity = self.similarity_matrix.inner_product(sentence_1, sentence_2, normalized=True)
        return [1-scm_similarity, scm_similarity]
    
    def msi(self, sentence_a, sentence_b):
        '''@danaderp
        Minimum Shared Information'''
        vocab = self.vocab.copy()
        token_counts_1 = self.__get_cnts(sentence_a, vocab)
        token_counts_2 = self.__get_cnts(sentence_b, vocab)
        logging.info('token count processed')
        #Minimum Shared Tokens
        #TODO create an if down to include Joint Entropy by summing token_counts_1 and token_counts_2
        token_counts = { token: min(token_counts_1[token],token_counts_2[token]) for token in vocab }
        
        alphabet = list(set(token_counts.keys())) #[ list(set(cnt.keys())) for cnt in token_counts ]
        frequencies = self.__get_freqs(token_counts) #[ get_freqs(cnt) for cnt in token_counts ]
        logging.info('frequencies processed')
            
        if not frequencies:
            #"List is empty"
            entropies = float('nan')
            extropies = float('nan')
        else:
            scalar_distribution = dit.ScalarDistribution(alphabet, frequencies) #[dit.ScalarDistribution(alphabet[id], frequencies[id]) for id in range( len(token_counts) )]
            logging.info('scalar_distribution processed')
            
            entropies = dit.shannon.entropy( scalar_distribution ) #[ dit.shannon.entropy( dist ) for dist in scalar_distribution ]
            logging.info('entropies processed')
            
            extropies = dit.other.extropy( scalar_distribution )# [ dit.other.extropy( dist ) for dist in scalar_distribution ]
            logging.info('extropies processed')
        return [entropies,extropies]
    
    def mutual_info(self, sentence_a, sentence_b):
        """ Computing the manifold of metric of information
        Mutual information 
        Joint Information
        Conditioned Information Loss
        Conditioned Information Noise
        Self-Information
        """
        vocab = self.vocab.copy()
        token_counts_1 = self.__get_cnts(sentence_a, vocab)
        token_counts_2 = self.__get_cnts(sentence_b, vocab)
        logging.info('token count processed')


        #TODO verify redundancies in the alphabet
        alphabet_source = list(set(token_counts_1.keys()))
        logging.info('alphabet_source #'+ str(len(alphabet_source)))
        alphabet_target = list(set(token_counts_2.keys()))
        logging.info('alphabet_target #'+ str(len(alphabet_target)))
        
        logging.info('vocab #'+ str(len(self.vocab.keys())))
        logging.info('diff #'+ str(set(token_counts_1.keys()) - set(token_counts_2.keys())))
        #Computing Self-Information (or Entropy)
        scalar_distribution_source = dit.ScalarDistribution(alphabet_source, self.__get_freqs( token_counts_1 ) )
        entropy_source = dit.shannon.entropy( scalar_distribution_source )
        
        scalar_distribution_target = dit.ScalarDistribution(alphabet_target, self.__get_freqs( token_counts_2 ) )
        entropy_target = dit.shannon.entropy( scalar_distribution_target )
        
        #Computing Joint-information
        token_counts = { token: (token_counts_1[token] + token_counts_2[token]) for token in vocab }
        alphabet = list(set(token_counts.keys()))
        logging.info('alphabet #'+ str(len(alphabet)))
        frequencies = self.__get_freqs(token_counts)
        ##WARNING! if a document is empty frequencies might create an issue!
        scalar_distribution = dit.ScalarDistribution(alphabet, frequencies)
        joint_entropy = dit.shannon.entropy( scalar_distribution )
        
        #Computing Mutual-Information
        mutual_information = entropy_source + entropy_target - joint_entropy
        
        #Computing Noise
        noise = joint_entropy - entropy_target
        
        #Computing Loss
        loss = joint_entropy - entropy_source
        
        return [entropy_source, entropy_target, joint_entropy, 
                mutual_information, loss, noise]
    
    #ToDo Mutual information
    
    def distance(self, metric_list,link):
        '''Iterate on the metrics'''
        #Computation of sentences can be moved directly to wmd_gensim method if we cannot generalize it for 
        #the remaining metrics
        ids = self.params['system_path_config']['names'][0]
        txt = self.params['system_path_config']['names'][1]
        
        if self.params['system_path_config']['prep'] == Preprocessing.conv: #if conventional preprocessing
            sentence_a = self.df_source[self.df_source[ids].str.contains(link[0])][txt].values[0].split()
            sentence_b = self.df_target[self.df_target[ids].str.contains(link[1])][txt].values[0].split()
        elif self.params['system_path_config']['prep'] == Preprocessing.bpe:
            sentence_a = eval(self.df_source[self.df_source[ids].str.contains(link[0])][txt].values[0])
            sentence_b = eval(self.df_target[self.df_target[ids].str.contains(link[1])][txt].values[0])
        
        dist = [ self.dict_distance_dispatcher[metric](sentence_a,sentence_b) for metric in metric_list]
        logging.info("Computed distances or similarities "+ str(link) + str(dist))    
        return functools.reduce(lambda a,b : a+b, dist) #Always return a list
    
    #################################3TODO substitute this block in the future by importing information science module
    def __get_cnts(self, toks, vocab):
        '''@danaderp
        Counts tokens within ONE document'''
        #logging.info("encoding_size:" len
        cnt = Counter(vocab)
        for tok in toks:
            cnt[tok] += 1
        return cnt

    def __get_freqs(self, dict_token_counts):

        num_tokens = sum( dict_token_counts.values() ) #number of subwords inside the document
        if num_tokens == 0.0:
            frequencies = []
            logging.info('---------------> NO SHARED INFORMATION <-------------------------')
        else:
            frequencies = [ (dict_token_counts[token])/num_tokens for token in dict_token_counts ]
        return frequencies
    #################################3


In [None]:
#export
def LoadLinks(timestamp, params, grtruth=False, sep=' ' ):
    '''Returns a pandas from a saved link computation at a give timestamp
    @timestamp is the version of the model for a given system'''
    
    path= params['saving_path'] + '['+ params['system'] + '-' + str(params['vectorizationType']) + '-' + str(params['linkType']) + '-' + str(grtruth) + '-{}].csv'.format(timestamp)
    
    logging.info("Loading computed links from... "+ path)

    return pd.read_csv(path, header=0, index_col=0, sep=sep)

### Testing Word2Vec SequenceVectorization

In [None]:
#hide
#tst
metric_list = ['a','b']
A = [[1,3,4],[4,5],[1,8,9,7]]
B = ((1,3,4),(4,5),(1,8,9,7))
functools.reduce(lambda a,b : a+b, B)
dist_sim_T = [([12,13],['metric1','metric2']),([12,13],['metric1','metric2'])]
dist_sim_T
separated_merged_list_a = functools.reduce(lambda a,b : a[1]+b[1], dist_sim_T)
separated_merged_list_a

In [None]:
#[step 1]Creating the Vectorization Class
word2vec = Word2VecSeqVect( params = parameters )

2020-12-19 00:52:11,849 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:52:11,905 : INFO : built Dictionary(6957 unique tokens: ['");', '"../../', '("\\', '();', ')))']...) from 87 documents (total 88944 corpus positions)
2020-12-19 00:52:11,906 : INFO : conventional preprocessing documents, dictionary, and vocab for the test corpus
2020-12-19 00:52:11,907 : INFO : loading Word2Vec object from ../dvc-ds4se/models/wv/bpe8k/[word2vec-Java-Py-SK-500-20E-8k-1594090297.869643].model
2020-12-19 00:52:12,357 : INFO : loading wv recursively from ../dvc-ds4se/models/wv/bpe8k/[word2vec-Java-Py-SK-500-20E-8k-1594090297.869643].model.wv.* with mmap=None
2020-12-19 00:52:12,358 : INFO : setting ignored attribute vectors_norm to None
2020-12-19 00:52:12,360 : INFO : loading vocabulary recursively from ../dvc-ds4se/models/wv/bpe8k/[word2vec-Java-Py-SK-500-20E-8k-1594090297.869643].model.vocabulary.* with mmap=None
2020-12-19 00:52:12,361 : INFO : loading trainables recurs

In [None]:
len(word2vec.new_model.wv.vocab)

7888

In [None]:
word2vec.df_source['ids'][35]

'test_data/LibEST_semeru_format/requirements/RQ17.txt'

In [None]:
word2vec.df_source['ids'][35] #In LIBEST REQ starts at 35

'test_data/LibEST_semeru_format/requirements/RQ17.txt'

In [None]:
ids = parameters['system_path_config']['names'][0]
txt = parameters['system_path_config']['names'][1]
print(ids,txt)

ids conv


In [None]:
idss = word2vec.df_source[ids][35] #Selecting an ID
idss = word2vec.df_source[ids] == idss #Search for an specific ID
list(word2vec.df_source[idss][txt])[0].split() #Retrieving text and splitting

['requir',
 'http',
 'uri',
 'control',
 'est',
 'server',
 'must',
 'support',
 'use',
 'path',
 'prefix',
 'well',
 'known',
 'defin',
 'rfc',
 'regist',
 'name',
 'est',
 'thus',
 'valid',
 'est',
 'server',
 'uri',
 'path',
 'begin',
 'https',
 'www',
 'exampl',
 'com',
 'well',
 'known',
 'est',
 'est',
 'oper',
 'indic',
 'path',
 'suffix',
 'indic',
 'intend',
 'oper',
 'oper',
 'correspond',
 'uri',
 'oper',
 'oper',
 'path',
 'detail',
 'distribut',
 'cacert',
 'section',
 'certif',
 'must',
 'enrol',
 'simpleenrol',
 'section',
 'client',
 'must',
 'enrol',
 'simplereenrol',
 'section',
 'client',
 'must',
 'full',
 'cmc',
 'option',
 'fullcmc',
 'section',
 'server',
 'side',
 'key',
 'serverkeygen',
 'section',
 'generat',
 'option',
 'csr',
 'attribut',
 'csrattr',
 'section',
 'option',
 'figur',
 'oper',
 'path',
 'figur',
 'append',
 'path',
 'prefix',
 'form',
 'uri',
 'use',
 'http',
 'get',
 'post',
 'perform',
 'desir',
 'est',
 'oper',
 'exampl',
 'valid',
 'uri',


In [None]:
word2vec.df_source.head(2)

Unnamed: 0,ids,conv
35,test_data/LibEST_semeru_format/requirements/RQ...,requir http uri control est server must suppor...
36,test_data/LibEST_semeru_format/requirements/RQ...,requir server side key generat respons request...


In [None]:
word2vec.df_target.head(2)

Unnamed: 0,ids,conv
0,test_data/LibEST_semeru_format/test/us903.c,unit test user stori server simpl enrol august...
1,test_data/LibEST_semeru_format/test/us3496.c,unit test uri path segment extens support marc...


In [None]:
links = word2vec.samplingLinks(sampling=True, samples = 2)
links

[('test_data/LibEST_semeru_format/requirements/RQ35.txt',
  'test_data/LibEST_semeru_format/test/us3496.c'),
 ('test_data/LibEST_semeru_format/requirements/RQ28.txt',
  'test_data/LibEST_semeru_format/test/us3612.c')]

In [None]:
print( len(links), word2vec.df_source.shape, word2vec.df_target.shape )

2 (52, 2) (21, 2)


In [None]:
links[0][0]

'test_data/LibEST_semeru_format/requirements/RQ35.txt'

In [None]:
#tst
word2vec.df_source[word2vec.df_source[ids].str.contains(links[0][0])][txt].values[0].split() #conventioanal
#eval(word2vec.df_source[word2vec.df_source[ids].str.contains(links[0][0])][txt].values[0]) #BPE

['requir',
 'client',
 'certif',
 'request',
 'function',
 'est',
 'client',
 'request',
 'certif',
 'est',
 'server',
 'https',
 'post',
 'use',
 'oper',
 'path',
 'valu',
 'simpleenrol',
 'est',
 'client',
 'request',
 'renew',
 'rekey',
 'exist',
 'certif',
 'http',
 'post',
 'use',
 'oper',
 'path',
 'valu',
 'simplereenrol',
 'est',
 'server',
 'must',
 'support',
 'simpleenrol',
 'simplereenrol',
 'function',
 'recommend',
 'client',
 'obtain',
 'current',
 'certif',
 'describ',
 'section',
 'perform',
 'certif',
 'request',
 'function',
 'ensur',
 'client',
 'abl',
 'valid',
 'est',
 'server',
 'certif',
 'client',
 'must',
 'authent',
 'est',
 'server',
 'specifi',
 'section',
 'certif',
 'base',
 'authent',
 'use',
 'section',
 'option',
 'certif',
 'less',
 'authent',
 'use',
 'client',
 'must',
 'verifi',
 'author',
 'est',
 'server',
 'specifi',
 'section',
 'server',
 'must',
 'authent',
 'client',
 'specifi',
 'section',
 'certif',
 'base',
 'authent',
 'use',
 'section',

In [None]:
#tst
word2vec.df_target[word2vec.df_target[ids].str.contains(links[0][1])][txt].values[0].split()

['unit',
 'test',
 'uri',
 'path',
 'segment',
 'extens',
 'support',
 'march',
 'copyright',
 'cisco',
 'system',
 'inc',
 'right',
 'reserv',
 'includ',
 'stdio',
 'ifndef',
 'win',
 'includ',
 'unistd',
 'endif',
 'includ',
 'est',
 'includ',
 'curl',
 'curl',
 'includ',
 'curl',
 'util',
 'ifdef',
 'cunit',
 'includ',
 'cunit',
 'basic',
 'includ',
 'cunit',
 'autom',
 'endif',
 'includ',
 'util',
 'test',
 'util',
 'includ',
 'server',
 'includ',
 'src',
 'est',
 'est',
 'locl',
 'extern',
 'char',
 'tst',
 'srvr',
 'path',
 'seg',
 'enrol',
 'extern',
 'char',
 'tst',
 'srvr',
 'path',
 'seg',
 'auth',
 'static',
 'int',
 'path',
 'segment',
 'support',
 'max',
 'command',
 'line',
 'length',
 'generat',
 'system',
 'command',
 'defin',
 'est',
 'max',
 'cmd_len',
 '256',
 'certif',
 'use',
 'verifi',
 'est',
 'server',
 'grab',
 'server',
 'directori',
 'defin',
 'client_ut_cacert',
 '"../../',
 'exampl',
 'server',
 'est',
 'cacert',
 'crt',
 'defin',
 'client_ut_cacert',
 'est

In [None]:
metric_list = [DistanceMetric.WMD,DistanceMetric.SCM,EntropyMetric.MSI_I,EntropyMetric.MI]
#metric_list = [EntropyMetric.MSI_I,EntropyMetric.MI]

In [None]:
#[optional] computeDistanceMetric Testing [WARNING!] Time Consuming!!
computeDistanceMetric = word2vec.computeDistanceMetric(links, metric_list = metric_list )
computeDistanceMetric

2020-12-19 00:53:03,994 : INFO : Removed 110 and 1487 OOV words from document 1 and 2 (respectively).
2020-12-19 00:53:03,996 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:53:03,997 : INFO : built Dictionary(32 unique tokens: ['base', 'est', 'less', 'may', 'one']...) from 2 documents (total 250 corpus positions)
2020-12-19 00:53:04,012 : INFO : token count processed
2020-12-19 00:53:04,018 : INFO : frequencies processed
2020-12-19 00:53:04,652 : INFO : scalar_distribution processed
2020-12-19 00:53:04,653 : INFO : entropies processed
2020-12-19 00:53:04,654 : INFO : extropies processed
2020-12-19 00:53:04,657 : INFO : token count processed
2020-12-19 00:53:04,658 : INFO : alphabet_source #6957
2020-12-19 00:53:04,659 : INFO : alphabet_target #6957
2020-12-19 00:53:04,660 : INFO : vocab #6957
2020-12-19 00:53:04,662 : INFO : diff #set()
2020-12-19 00:53:05,899 : INFO : alphabet #6957
2020-12-19 00:53:06,515 : INFO : Computed distances or similarities ('tes

([['test_data/LibEST_semeru_format/requirements/RQ35.txt',
   'test_data/LibEST_semeru_format/test/us3496.c',
   1.0365468004777898,
   0.4910272623076436,
   0.8108923137187958,
   0.18910769,
   3.475098311604626,
   1.355589451724954,
   5.0288078800464975,
   7.2878904389256105,
   7.401793526889205,
   4.914904792082903,
   2.3729856468427073,
   0.11390308796359427],
  ['test_data/LibEST_semeru_format/requirements/RQ28.txt',
   'test_data/LibEST_semeru_format/test/us3612.c',
   1.0571186696703938,
   0.4861168267751063,
   0.8713501840829849,
   0.12864982,
   3.00623892865339,
   1.3374329493342525,
   4.6420846845805,
   7.345352443618291,
   7.429494794299984,
   4.557942333898808,
   2.7874101097194837,
   0.0841423506816934]],
 [<DistanceMetric.WMD: 1>,
  <SimilarityMetric.WMD_sim: 1>,
  <DistanceMetric.SCM: 3>,
  <SimilarityMetric.SCM_sim: 3>,
  <EntropyMetric.MSI_I: 1>,
  <EntropyMetric.MSI_X: 2>,
  <EntropyMetric.Entropy_src: 7>,
  <EntropyMetric.Entropy_tgt: 8>,
  <Entro

In [None]:
#[step 2]NonGroundTruth Computation
word2vec.ComputeDistanceArtifacts( sampling=False, samples = 5, metric_list = metric_list )
word2vec.df_nonground_link.head()

2020-12-19 00:53:22,170 : INFO : Removed 172 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 00:53:22,171 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:53:22,173 : INFO : built Dictionary(44 unique tokens: ['://', 'com', 'est', 'form', 'get']...) from 2 documents (total 245 corpus positions)
2020-12-19 00:53:22,200 : INFO : token count processed
2020-12-19 00:53:22,209 : INFO : frequencies processed
2020-12-19 00:53:22,849 : INFO : scalar_distribution processed
2020-12-19 00:53:22,850 : INFO : entropies processed
2020-12-19 00:53:22,851 : INFO : extropies processed
2020-12-19 00:53:22,853 : INFO : token count processed
2020-12-19 00:53:22,855 : INFO : alphabet_source #6957
2020-12-19 00:53:22,856 : INFO : alphabet_target #6957
2020-12-19 00:53:22,857 : INFO : vocab #6957
2020-12-19 00:53:22,859 : INFO : diff #set()
2020-12-19 00:53:24,102 : INFO : alphabet #6957
2020-12-19 00:53:24,718 : INFO : Computed distances or similarities ('test

2020-12-19 00:53:37,522 : INFO : token count processed
2020-12-19 00:53:37,529 : INFO : frequencies processed
2020-12-19 00:53:38,153 : INFO : scalar_distribution processed
2020-12-19 00:53:38,154 : INFO : entropies processed
2020-12-19 00:53:38,155 : INFO : extropies processed
2020-12-19 00:53:38,160 : INFO : token count processed
2020-12-19 00:53:38,162 : INFO : alphabet_source #6957
2020-12-19 00:53:38,164 : INFO : alphabet_target #6957
2020-12-19 00:53:38,165 : INFO : vocab #6957
2020-12-19 00:53:38,168 : INFO : diff #set()
2020-12-19 00:53:39,413 : INFO : alphabet #6957
2020-12-19 00:53:40,030 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ17.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.021296649080398, 0.49473193380840785], [0.939129114151001, 0.060870886], [3.7277573751069215, 1.3437088691066412], [6.0246745048551915, 7.127980212598745, 7.403072733067834, 5.7495819843861025, 1.3783982282126424, 0.27509252046908905]]
2020-12

2020-12-19 00:53:53,543 : INFO : extropies processed
2020-12-19 00:53:53,546 : INFO : token count processed
2020-12-19 00:53:53,547 : INFO : alphabet_source #6957
2020-12-19 00:53:53,548 : INFO : alphabet_target #6957
2020-12-19 00:53:53,549 : INFO : vocab #6957
2020-12-19 00:53:53,551 : INFO : diff #set()
2020-12-19 00:53:54,793 : INFO : alphabet #6957
2020-12-19 00:53:55,423 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ17.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[0.9174282265917624, 0.5215319072346738], [0.7270097732543945, 0.27299023], [4.824116405014095, 1.4020498971400284], [6.0246745048551915, 9.006525301292756, 9.060589959535656, 5.970609846612291, 3.035915454680465, 0.05406465824290052]]
2020-12-19 00:53:55,426 : INFO : Removed 172 and 1726 OOV words from document 1 and 2 (respectively).
2020-12-19 00:53:55,427 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:53:55,429 : INFO : built Diction

2020-12-19 00:54:08,861 : INFO : vocab #6957
2020-12-19 00:54:08,862 : INFO : diff #set()
2020-12-19 00:54:10,111 : INFO : alphabet #6957
2020-12-19 00:54:10,727 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ17.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.866200398870977, 0.5358481332470966], [0.7431217133998871, 0.2568783], [4.489963579885211, 1.3876211312696747], [6.0246745048551915, 9.228957519717383, 9.276886157150905, 5.97674586742167, 3.2522116522957134, 0.047928637433521715]]
2020-12-19 00:54:10,731 : INFO : Removed 172 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 00:54:10,731 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:54:10,732 : INFO : built Dictionary(50 unique tokens: ['://', 'com', 'est', 'form', 'get']...) from 2 documents (total 335 corpus positions)
2020-12-19 00:54:10,753 : INFO : token count processed
2020-12-19 00:54:10,758 : INFO : frequencies processed


2020-12-19 00:54:26,073 : INFO : Removed 322 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 00:54:26,074 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:54:26,076 : INFO : built Dictionary(43 unique tokens: ['associ', 'base', 'der', 'est', 'field']...) from 2 documents (total 431 corpus positions)
2020-12-19 00:54:26,106 : INFO : token count processed
2020-12-19 00:54:26,113 : INFO : frequencies processed
2020-12-19 00:54:26,731 : INFO : scalar_distribution processed
2020-12-19 00:54:26,732 : INFO : entropies processed
2020-12-19 00:54:26,733 : INFO : extropies processed
2020-12-19 00:54:26,736 : INFO : token count processed
2020-12-19 00:54:26,737 : INFO : alphabet_source #6957
2020-12-19 00:54:26,739 : INFO : alphabet_target #6957
2020-12-19 00:54:26,739 : INFO : vocab #6957
2020-12-19 00:54:26,741 : INFO : diff #set()
2020-12-19 00:54:27,981 : INFO : alphabet #6957
2020-12-19 00:54:28,609 : INFO : Computed distances or similarities (

2020-12-19 00:54:41,345 : INFO : built Dictionary(43 unique tokens: ['associ', 'base', 'der', 'est', 'field']...) from 2 documents (total 460 corpus positions)
2020-12-19 00:54:41,372 : INFO : token count processed
2020-12-19 00:54:41,377 : INFO : frequencies processed
2020-12-19 00:54:41,995 : INFO : scalar_distribution processed
2020-12-19 00:54:41,996 : INFO : entropies processed
2020-12-19 00:54:41,997 : INFO : extropies processed
2020-12-19 00:54:42,000 : INFO : token count processed
2020-12-19 00:54:42,001 : INFO : alphabet_source #6957
2020-12-19 00:54:42,002 : INFO : alphabet_target #6957
2020-12-19 00:54:42,003 : INFO : vocab #6957
2020-12-19 00:54:42,005 : INFO : diff #set()
2020-12-19 00:54:43,249 : INFO : alphabet #6957
2020-12-19 00:54:43,861 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ46.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[0.8384610962229618, 0.5439331852354431], [0.7748338431119919, 0.22516616], [4.0543932

2020-12-19 00:54:56,543 : INFO : frequencies processed
2020-12-19 00:54:57,166 : INFO : scalar_distribution processed
2020-12-19 00:54:57,166 : INFO : entropies processed
2020-12-19 00:54:57,167 : INFO : extropies processed
2020-12-19 00:54:57,171 : INFO : token count processed
2020-12-19 00:54:57,173 : INFO : alphabet_source #6957
2020-12-19 00:54:57,175 : INFO : alphabet_target #6957
2020-12-19 00:54:57,176 : INFO : vocab #6957
2020-12-19 00:54:57,178 : INFO : diff #set()
2020-12-19 00:54:58,478 : INFO : alphabet #6957
2020-12-19 00:54:59,201 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ46.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[1.0535172819102312, 0.48696936169428084], [0.8597773760557175, 0.14022262], [4.43471486242622, 1.3956964569083607], [6.15058306747227, 8.610843107887472, 8.63158901044406, 6.129837164915681, 2.48100594297179, 0.02074590255658748]]
2020-12-19 00:54:59,204 : INFO : Removed 322 and 1850 OOV words from

2020-12-19 00:55:12,531 : INFO : extropies processed
2020-12-19 00:55:12,536 : INFO : token count processed
2020-12-19 00:55:12,538 : INFO : alphabet_source #6957
2020-12-19 00:55:12,540 : INFO : alphabet_target #6957
2020-12-19 00:55:12,541 : INFO : vocab #6957
2020-12-19 00:55:12,546 : INFO : diff #set()
2020-12-19 00:55:13,774 : INFO : alphabet #6957
2020-12-19 00:55:14,388 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ18.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[0.9987033070056834, 0.500324383561525], [0.8491760194301605, 0.15082398], [3.6060412330903553, 1.3544295803518744], [5.622178999487696, 7.2878904389256105, 7.502004634941104, 5.408064803472202, 1.8798256354534075, 0.21411419601549309]]
2020-12-19 00:55:14,392 : INFO : Removed 172 and 3144 OOV words from document 1 and 2 (respectively).
2020-12-19 00:55:14,393 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:55:14,394 : INFO : built Dictio

2020-12-19 00:55:27,810 : INFO : alphabet_target #6957
2020-12-19 00:55:27,811 : INFO : vocab #6957
2020-12-19 00:55:27,812 : INFO : diff #set()
2020-12-19 00:55:29,052 : INFO : alphabet #6957
2020-12-19 00:55:29,669 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ18.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[0.8317089589500802, 0.5459382589760283], [0.8671447187662125, 0.13285528], [3.4006807444038896, 1.3453235653495432], [5.622178999487696, 7.373064697852536, 7.630916921582181, 5.364326775758051, 2.008737922094485, 0.2578522237296452]]
2020-12-19 00:55:29,672 : INFO : Removed 172 and 2544 OOV words from document 1 and 2 (respectively).
2020-12-19 00:55:29,673 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:55:29,674 : INFO : built Dictionary(45 unique tokens: ['associ', 'base', 'est', 'first', 'know']...) from 2 documents (total 315 corpus positions)
2020-12-19 00:55:29,696 : INFO : token count proce

2020-12-19 00:55:44,988 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ18.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[0.7681381675820014, 0.5655666612115158], [0.7678771018981934, 0.2321229], [4.377807166477031, 1.3958839687524909], [5.622178999487696, 8.67532843392421, 8.72145086992066, 5.576056563491246, 3.0992718704329647, 0.04612243599645005]]
2020-12-19 00:55:44,991 : INFO : Removed 172 and 926 OOV words from document 1 and 2 (respectively).
2020-12-19 00:55:44,992 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:55:44,993 : INFO : built Dictionary(33 unique tokens: ['associ', 'base', 'est', 'first', 'know']...) from 2 documents (total 198 corpus positions)
2020-12-19 00:55:45,008 : INFO : token count processed
2020-12-19 00:55:45,017 : INFO : frequencies processed
2020-12-19 00:55:45,641 : INFO : scalar_distribution processed
2020-12-19 00:55:45,642 : INFO : entropies processed
2020-12-19 00:55:45,

2020-12-19 00:56:00,269 : INFO : Removed 172 and 2291 OOV words from document 1 and 2 (respectively).
2020-12-19 00:56:00,270 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:56:00,271 : INFO : built Dictionary(34 unique tokens: ['associ', 'base', 'est', 'first', 'know']...) from 2 documents (total 181 corpus positions)
2020-12-19 00:56:00,282 : INFO : token count processed
2020-12-19 00:56:00,288 : INFO : frequencies processed
2020-12-19 00:56:00,908 : INFO : scalar_distribution processed
2020-12-19 00:56:00,908 : INFO : entropies processed
2020-12-19 00:56:00,911 : INFO : extropies processed
2020-12-19 00:56:00,914 : INFO : token count processed
2020-12-19 00:56:00,915 : INFO : alphabet_source #6957
2020-12-19 00:56:00,916 : INFO : alphabet_target #6957
2020-12-19 00:56:00,917 : INFO : vocab #6957
2020-12-19 00:56:00,919 : INFO : diff #set()
2020-12-19 00:56:02,191 : INFO : alphabet #6957
2020-12-19 00:56:02,806 : INFO : Computed distances or similarities 

2020-12-19 00:56:15,528 : INFO : built Dictionary(31 unique tokens: ['est', 'get', 'list', 'oper', '200']...) from 2 documents (total 150 corpus positions)
2020-12-19 00:56:15,542 : INFO : token count processed
2020-12-19 00:56:15,548 : INFO : frequencies processed
2020-12-19 00:56:16,187 : INFO : scalar_distribution processed
2020-12-19 00:56:16,188 : INFO : entropies processed
2020-12-19 00:56:16,189 : INFO : extropies processed
2020-12-19 00:56:16,191 : INFO : token count processed
2020-12-19 00:56:16,192 : INFO : alphabet_source #6957
2020-12-19 00:56:16,194 : INFO : alphabet_target #6957
2020-12-19 00:56:16,194 : INFO : vocab #6957
2020-12-19 00:56:16,196 : INFO : diff #set()
2020-12-19 00:56:17,445 : INFO : alphabet #6957
2020-12-19 00:56:18,058 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ48.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[1.0990788033848857, 0.47639945598394984], [0.9128353670239449, 0.08716463], [2.947702779

2020-12-19 00:56:30,663 : INFO : frequencies processed
2020-12-19 00:56:31,382 : INFO : scalar_distribution processed
2020-12-19 00:56:31,383 : INFO : entropies processed
2020-12-19 00:56:31,384 : INFO : extropies processed
2020-12-19 00:56:31,386 : INFO : token count processed
2020-12-19 00:56:31,388 : INFO : alphabet_source #6957
2020-12-19 00:56:31,389 : INFO : alphabet_target #6957
2020-12-19 00:56:31,390 : INFO : vocab #6957
2020-12-19 00:56:31,392 : INFO : diff #set()
2020-12-19 00:56:32,633 : INFO : alphabet #6957
2020-12-19 00:56:33,250 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ48.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[0.9282576433145365, 0.5186028970076175], [0.7249983847141266, 0.27500162], [3.6168746059562222, 1.3781755222681253], [3.9219280948873627, 8.081846397171304, 8.087982770619583, 3.915791721439085, 4.166054675732219, 0.006136373448278221]]
2020-12-19 00:56:33,254 : INFO : Removed 15 and 2642 OOV words

2020-12-19 00:56:46,509 : INFO : token count processed
2020-12-19 00:56:46,512 : INFO : alphabet_source #6957
2020-12-19 00:56:46,514 : INFO : alphabet_target #6957
2020-12-19 00:56:46,515 : INFO : vocab #6957
2020-12-19 00:56:46,518 : INFO : diff #set()
2020-12-19 00:56:47,767 : INFO : alphabet #6957
2020-12-19 00:56:48,381 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ48.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[1.0688578791682155, 0.4833584800914648], [0.7693780660629272, 0.23062193], [2.918295834054489, 1.3370612537534623], [3.9219280948873627, 7.3308860974230985, 7.347790056161219, 3.905024136149243, 3.425861961273856, 0.016903958738120117]]
2020-12-19 00:56:48,384 : INFO : Removed 15 and 2276 OOV words from document 1 and 2 (respectively).
2020-12-19 00:56:48,385 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:56:48,386 : INFO : built Dictionary(43 unique tokens: ['est', 'get', 'list', 'oper',

2020-12-19 00:57:01,840 : INFO : diff #set()
2020-12-19 00:57:03,071 : INFO : alphabet #6957
2020-12-19 00:57:03,687 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ42.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.6355386524804606, 0.6114193623509896], [0.6297758221626282, 0.37022418], [4.331251093058335, 1.394390649454181], [5.454631998657437, 8.158196888137685, 8.200778087944533, 5.412050798850588, 2.7461460892870964, 0.0425811998068486]]
2020-12-19 00:57:03,690 : INFO : Removed 124 and 1854 OOV words from document 1 and 2 (respectively).
2020-12-19 00:57:03,691 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:57:03,692 : INFO : built Dictionary(30 unique tokens: ['associ', 'base', 'est', 'key', 'less']...) from 2 documents (total 279 corpus positions)
2020-12-19 00:57:03,718 : INFO : token count processed
2020-12-19 00:57:03,727 : INFO : frequencies processed
2020-12-19 00:57:04,351 : INFO : scalar_dis

2020-12-19 00:57:19,026 : INFO : Removed 124 and 1285 OOV words from document 1 and 2 (respectively).
2020-12-19 00:57:19,027 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:57:19,028 : INFO : built Dictionary(28 unique tokens: ['associ', 'base', 'est', 'key', 'less']...) from 2 documents (total 205 corpus positions)
2020-12-19 00:57:19,040 : INFO : token count processed
2020-12-19 00:57:19,045 : INFO : frequencies processed
2020-12-19 00:57:19,657 : INFO : scalar_distribution processed
2020-12-19 00:57:19,658 : INFO : entropies processed
2020-12-19 00:57:19,659 : INFO : extropies processed
2020-12-19 00:57:19,661 : INFO : token count processed
2020-12-19 00:57:19,663 : INFO : alphabet_source #6957
2020-12-19 00:57:19,664 : INFO : alphabet_target #6957
2020-12-19 00:57:19,665 : INFO : vocab #6957
2020-12-19 00:57:19,667 : INFO : diff #set()
2020-12-19 00:57:20,907 : INFO : alphabet #6957
2020-12-19 00:57:21,519 : INFO : Computed distances or similarities ('

2020-12-19 00:57:34,257 : INFO : built Dictionary(41 unique tokens: ['associ', 'base', 'est', 'key', 'less']...) from 2 documents (total 284 corpus positions)
2020-12-19 00:57:34,276 : INFO : token count processed
2020-12-19 00:57:34,284 : INFO : frequencies processed
2020-12-19 00:57:34,895 : INFO : scalar_distribution processed
2020-12-19 00:57:34,896 : INFO : entropies processed
2020-12-19 00:57:34,897 : INFO : extropies processed
2020-12-19 00:57:34,899 : INFO : token count processed
2020-12-19 00:57:34,901 : INFO : alphabet_source #6957
2020-12-19 00:57:34,904 : INFO : alphabet_target #6957
2020-12-19 00:57:34,905 : INFO : vocab #6957
2020-12-19 00:57:34,908 : INFO : diff #set()
2020-12-19 00:57:36,161 : INFO : alphabet #6957
2020-12-19 00:57:36,776 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ42.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[0.7832380788045246, 0.5607776167893386], [0.6669662892818451, 0.3330337], [4.149034564

2020-12-19 00:57:49,550 : INFO : frequencies processed
2020-12-19 00:57:50,177 : INFO : scalar_distribution processed
2020-12-19 00:57:50,178 : INFO : entropies processed
2020-12-19 00:57:50,179 : INFO : extropies processed
2020-12-19 00:57:50,181 : INFO : token count processed
2020-12-19 00:57:50,183 : INFO : alphabet_source #6957
2020-12-19 00:57:50,184 : INFO : alphabet_target #6957
2020-12-19 00:57:50,185 : INFO : vocab #6957
2020-12-19 00:57:50,187 : INFO : diff #set()
2020-12-19 00:57:51,422 : INFO : alphabet #6957
2020-12-19 00:57:52,046 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ29.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[0.9913573407071082, 0.5021700422913105], [0.7676021307706833, 0.23239787], [3.8912257558324166, 1.3656546340631452], [5.499429425916341, 8.626714101844048, 8.6842997817538, 5.4418437460065885, 3.184870355837459, 0.05758567990975294]]
2020-12-19 00:57:52,049 : INFO : Removed 84 and 1487 OOV words fro

2020-12-19 00:58:05,496 : INFO : token count processed
2020-12-19 00:58:05,498 : INFO : alphabet_source #6957
2020-12-19 00:58:05,500 : INFO : alphabet_target #6957
2020-12-19 00:58:05,502 : INFO : vocab #6957
2020-12-19 00:58:05,505 : INFO : diff #set()
2020-12-19 00:58:06,755 : INFO : alphabet #6957
2020-12-19 00:58:07,368 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ29.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.0593010715403908, 0.4856016508805017], [0.949021466076374, 0.050978534], [3.1941850767035573, 1.3102667959509182], [5.499429425916341, 7.127980212598745, 7.262151279056631, 5.365258359458455, 1.76272185314029, 0.13417106645788657]]
2020-12-19 00:58:07,371 : INFO : Removed 84 and 774 OOV words from document 1 and 2 (respectively).
2020-12-19 00:58:07,372 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:58:07,373 : INFO : built Dictionary(20 unique tokens: ['act', 'est', 'ident', 'key', 'pop

2020-12-19 00:58:20,622 : INFO : diff #set()
2020-12-19 00:58:21,979 : INFO : alphabet #6957
2020-12-19 00:58:22,597 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ29.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[0.9776834328203248, 0.5056420979235918], [0.7151612341403961, 0.28483877], [4.075461313450679, 1.3726243570551642], [5.499429425916341, 9.006525301292756, 9.042831404143573, 5.463123323065524, 3.543401978227232, 0.03630610285081737]]
2020-12-19 00:58:22,600 : INFO : Removed 84 and 1726 OOV words from document 1 and 2 (respectively).
2020-12-19 00:58:22,600 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:58:22,602 : INFO : built Dictionary(37 unique tokens: ['act', 'est', 'ident', 'key', 'pop']...) from 2 documents (total 164 corpus positions)
2020-12-19 00:58:22,618 : INFO : token count processed
2020-12-19 00:58:22,628 : INFO : frequencies processed
2020-12-19 00:58:23,242 : INFO : scalar_distr

2020-12-19 00:58:37,779 : INFO : Removed 84 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 00:58:37,780 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:58:37,781 : INFO : built Dictionary(41 unique tokens: ['act', 'est', 'ident', 'key', 'pop']...) from 2 documents (total 266 corpus positions)
2020-12-19 00:58:37,804 : INFO : token count processed
2020-12-19 00:58:37,813 : INFO : frequencies processed
2020-12-19 00:58:38,427 : INFO : scalar_distribution processed
2020-12-19 00:58:38,428 : INFO : entropies processed
2020-12-19 00:58:38,429 : INFO : extropies processed
2020-12-19 00:58:38,432 : INFO : token count processed
2020-12-19 00:58:38,433 : INFO : alphabet_source #6957
2020-12-19 00:58:38,434 : INFO : alphabet_target #6957
2020-12-19 00:58:38,435 : INFO : vocab #6957
2020-12-19 00:58:38,437 : INFO : diff #set()
2020-12-19 00:58:39,682 : INFO : alphabet #6957
2020-12-19 00:58:40,404 : INFO : Computed distances or similarities ('test

2020-12-19 00:58:52,986 : INFO : built Dictionary(32 unique tokens: ['est', 'key', 'list', 'may', 'type']...) from 2 documents (total 341 corpus positions)
2020-12-19 00:58:53,004 : INFO : token count processed
2020-12-19 00:58:53,011 : INFO : frequencies processed
2020-12-19 00:58:53,623 : INFO : scalar_distribution processed
2020-12-19 00:58:53,624 : INFO : entropies processed
2020-12-19 00:58:53,625 : INFO : extropies processed
2020-12-19 00:58:53,627 : INFO : token count processed
2020-12-19 00:58:53,628 : INFO : alphabet_source #6957
2020-12-19 00:58:53,630 : INFO : alphabet_target #6957
2020-12-19 00:58:53,630 : INFO : vocab #6957
2020-12-19 00:58:53,632 : INFO : diff #set()
2020-12-19 00:58:54,871 : INFO : alphabet #6957
2020-12-19 00:58:55,486 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ47.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[1.009730971778702, 0.4975790362204326], [0.8983174338936806, 0.101682566], [3.02586009508

2020-12-19 00:59:08,221 : INFO : frequencies processed
2020-12-19 00:59:08,836 : INFO : scalar_distribution processed
2020-12-19 00:59:08,837 : INFO : entropies processed
2020-12-19 00:59:08,838 : INFO : extropies processed
2020-12-19 00:59:08,841 : INFO : token count processed
2020-12-19 00:59:08,842 : INFO : alphabet_source #6957
2020-12-19 00:59:08,843 : INFO : alphabet_target #6957
2020-12-19 00:59:08,844 : INFO : vocab #6957
2020-12-19 00:59:08,846 : INFO : diff #set()
2020-12-19 00:59:10,082 : INFO : alphabet #6957
2020-12-19 00:59:10,695 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ47.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[1.029796827108741, 0.4926601454118973], [0.8901183903217316, 0.10988161], [3.7554234623331895, 1.3716679346457679], [5.0693164760734675, 7.183148764136712, 7.257421624812781, 4.995043615397399, 2.1881051487393135, 0.07427286067606875]]
2020-12-19 00:59:10,698 : INFO : Removed 56 and 2059 OOV words f

2020-12-19 00:59:24,060 : INFO : token count processed
2020-12-19 00:59:24,061 : INFO : alphabet_source #6957
2020-12-19 00:59:24,063 : INFO : alphabet_target #6957
2020-12-19 00:59:24,063 : INFO : vocab #6957
2020-12-19 00:59:24,065 : INFO : diff #set()
2020-12-19 00:59:25,325 : INFO : alphabet #6957
2020-12-19 00:59:25,941 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ47.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[1.1052970727495897, 0.4749923480841428], [0.9194434359669685, 0.080556564], [3.75, 1.3846096858033596], [5.0693164760734675, 8.610843107887472, 8.68281965979615, 4.997339924164789, 3.613503183722683, 0.07197655190867813]]
2020-12-19 00:59:25,944 : INFO : Removed 56 and 1850 OOV words from document 1 and 2 (respectively).
2020-12-19 00:59:25,945 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:59:25,946 : INFO : built Dictionary(34 unique tokens: ['est', 'key', 'list', 'may', 'type']...) fro

2020-12-19 00:59:39,309 : INFO : diff #set()
2020-12-19 00:59:40,565 : INFO : alphabet #6957
2020-12-19 00:59:41,185 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ36.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[0.9496688973781892, 0.5129076025907511], [0.8331856578588486, 0.16681434], [4.049207821672111, 1.3799891263369026], [5.815712795089782, 7.2878904389256105, 7.502447937221839, 5.601155296793553, 1.6867351421320569, 0.2145574982962284]]
2020-12-19 00:59:41,189 : INFO : Removed 148 and 3144 OOV words from document 1 and 2 (respectively).
2020-12-19 00:59:41,190 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:59:41,191 : INFO : built Dictionary(38 unique tokens: ['base', 'est', 'key', 'may', 'name']...) from 2 documents (total 425 corpus positions)
2020-12-19 00:59:41,212 : INFO : token count processed
2020-12-19 00:59:41,219 : INFO : frequencies processed
2020-12-19 00:59:41,840 : INFO : scalar_dis

2020-12-19 00:59:56,506 : INFO : Removed 148 and 2544 OOV words from document 1 and 2 (respectively).
2020-12-19 00:59:56,507 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 00:59:56,508 : INFO : built Dictionary(43 unique tokens: ['base', 'est', 'key', 'may', 'name']...) from 2 documents (total 322 corpus positions)
2020-12-19 00:59:56,529 : INFO : token count processed
2020-12-19 00:59:56,534 : INFO : frequencies processed
2020-12-19 00:59:57,153 : INFO : scalar_distribution processed
2020-12-19 00:59:57,154 : INFO : entropies processed
2020-12-19 00:59:57,155 : INFO : extropies processed
2020-12-19 00:59:57,157 : INFO : token count processed
2020-12-19 00:59:57,159 : INFO : alphabet_source #6957
2020-12-19 00:59:57,160 : INFO : alphabet_target #6957
2020-12-19 00:59:57,161 : INFO : vocab #6957
2020-12-19 00:59:57,162 : INFO : diff #set()
2020-12-19 00:59:58,402 : INFO : alphabet #6957
2020-12-19 00:59:59,021 : INFO : Computed distances or similarities ('tes

2020-12-19 01:00:11,746 : INFO : built Dictionary(32 unique tokens: ['base', 'est', 'key', 'may', 'name']...) from 2 documents (total 205 corpus positions)
2020-12-19 01:00:11,758 : INFO : token count processed
2020-12-19 01:00:11,764 : INFO : frequencies processed
2020-12-19 01:00:12,482 : INFO : scalar_distribution processed
2020-12-19 01:00:12,483 : INFO : entropies processed
2020-12-19 01:00:12,484 : INFO : extropies processed
2020-12-19 01:00:12,486 : INFO : token count processed
2020-12-19 01:00:12,487 : INFO : alphabet_source #6957
2020-12-19 01:00:12,488 : INFO : alphabet_target #6957
2020-12-19 01:00:12,489 : INFO : vocab #6957
2020-12-19 01:00:12,490 : INFO : diff #set()
2020-12-19 01:00:13,728 : INFO : alphabet #6957
2020-12-19 01:00:14,342 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ36.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[0.7963153525494502, 0.5566951251520138], [0.7262177169322968, 0.27378228], [3.9154024206

2020-12-19 01:00:27,037 : INFO : frequencies processed
2020-12-19 01:00:27,656 : INFO : scalar_distribution processed
2020-12-19 01:00:27,657 : INFO : entropies processed
2020-12-19 01:00:27,658 : INFO : extropies processed
2020-12-19 01:00:27,662 : INFO : token count processed
2020-12-19 01:00:27,664 : INFO : alphabet_source #6957
2020-12-19 01:00:27,666 : INFO : alphabet_target #6957
2020-12-19 01:00:27,667 : INFO : vocab #6957
2020-12-19 01:00:27,670 : INFO : diff #set()
2020-12-19 01:00:28,921 : INFO : alphabet #6957
2020-12-19 01:00:29,536 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ36.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[1.093936772097751, 0.47756933892429726], [0.9577655643224716, 0.042234436], [3.5936576653909618, 1.3670810167470073], [5.815712795089782, 7.102818334145153, 7.340483574880272, 5.578047554354663, 1.5247707797904901, 0.23766524073511874]]
2020-12-19 01:00:29,539 : INFO : Removed 92 and 1761 OOV words 

2020-12-19 01:00:42,910 : INFO : token count processed
2020-12-19 01:00:42,911 : INFO : alphabet_source #6957
2020-12-19 01:00:42,912 : INFO : alphabet_target #6957
2020-12-19 01:00:42,913 : INFO : vocab #6957
2020-12-19 01:00:42,915 : INFO : diff #set()
2020-12-19 01:00:44,159 : INFO : alphabet #6957
2020-12-19 01:00:44,774 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ56.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[0.9893597185401645, 0.5026742980067083], [0.8631537854671478, 0.13684621], [4.418722908066561, 1.4017043738086556], [6.078780557638893, 7.587625711074752, 7.759165015239809, 5.9072412534738366, 1.6803844576009155, 0.1715393041650568]]
2020-12-19 01:00:44,777 : INFO : Removed 92 and 2503 OOV words from document 1 and 2 (respectively).
2020-12-19 01:00:44,778 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:00:44,779 : INFO : built Dictionary(29 unique tokens: ['also', 'base', 'est', 'get', '

2020-12-19 01:00:58,144 : INFO : diff #set()
2020-12-19 01:00:59,401 : INFO : alphabet #6957
2020-12-19 01:01:00,017 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ56.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[0.808790625526531, 0.5528555853217695], [0.7813359051942825, 0.2186641], [4.5970828184076655, 1.4074108482235035], [6.078780557638893, 8.081846397171304, 8.187244551430911, 5.973382403379286, 2.108463993792018, 0.10539815425960697]]
2020-12-19 01:01:00,020 : INFO : Removed 92 and 2642 OOV words from document 1 and 2 (respectively).
2020-12-19 01:01:00,021 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:01:00,023 : INFO : built Dictionary(49 unique tokens: ['also', 'base', 'est', 'get', 'ident']...) from 2 documents (total 312 corpus positions)
2020-12-19 01:01:00,045 : INFO : token count processed
2020-12-19 01:01:00,051 : INFO : frequencies processed
2020-12-19 01:01:00,675 : INFO : scalar_dist

2020-12-19 01:01:15,305 : INFO : Removed 92 and 2276 OOV words from document 1 and 2 (respectively).
2020-12-19 01:01:15,306 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:01:15,307 : INFO : built Dictionary(46 unique tokens: ['also', 'base', 'est', 'get', 'ident']...) from 2 documents (total 196 corpus positions)
2020-12-19 01:01:15,329 : INFO : token count processed
2020-12-19 01:01:15,336 : INFO : frequencies processed
2020-12-19 01:01:15,951 : INFO : scalar_distribution processed
2020-12-19 01:01:15,952 : INFO : entropies processed
2020-12-19 01:01:15,953 : INFO : extropies processed
2020-12-19 01:01:15,955 : INFO : token count processed
2020-12-19 01:01:15,957 : INFO : alphabet_source #6957
2020-12-19 01:01:15,958 : INFO : alphabet_target #6957
2020-12-19 01:01:15,959 : INFO : vocab #6957
2020-12-19 01:01:15,961 : INFO : diff #set()
2020-12-19 01:01:17,244 : INFO : alphabet #6957
2020-12-19 01:01:17,858 : INFO : Computed distances or similarities ('te

2020-12-19 01:01:30,606 : INFO : built Dictionary(32 unique tokens: ['also', 'associ', 'est', 'load', 'may']...) from 2 documents (total 266 corpus positions)
2020-12-19 01:01:30,621 : INFO : token count processed
2020-12-19 01:01:30,627 : INFO : frequencies processed
2020-12-19 01:01:31,240 : INFO : scalar_distribution processed
2020-12-19 01:01:31,241 : INFO : entropies processed
2020-12-19 01:01:31,242 : INFO : extropies processed
2020-12-19 01:01:31,247 : INFO : token count processed
2020-12-19 01:01:31,249 : INFO : alphabet_source #6957
2020-12-19 01:01:31,251 : INFO : alphabet_target #6957
2020-12-19 01:01:31,252 : INFO : vocab #6957
2020-12-19 01:01:31,254 : INFO : diff #set()
2020-12-19 01:01:32,511 : INFO : alphabet #6957
2020-12-19 01:01:33,128 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ15.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[0.9745010274704029, 0.5064570674248432], [0.8771436586976051, 0.12285634], [3.3553377

2020-12-19 01:01:45,919 : INFO : frequencies processed
2020-12-19 01:01:46,550 : INFO : scalar_distribution processed
2020-12-19 01:01:46,551 : INFO : entropies processed
2020-12-19 01:01:46,552 : INFO : extropies processed
2020-12-19 01:01:46,554 : INFO : token count processed
2020-12-19 01:01:46,556 : INFO : alphabet_source #6957
2020-12-19 01:01:46,557 : INFO : alphabet_target #6957
2020-12-19 01:01:46,558 : INFO : vocab #6957
2020-12-19 01:01:46,559 : INFO : diff #set()
2020-12-19 01:01:47,798 : INFO : alphabet #6957
2020-12-19 01:01:48,411 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ15.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[1.0078132111936406, 0.4980542982907768], [0.8013169765472412, 0.19868302], [3.1910226206090546, 1.3423405423922619], [5.2182844447684475, 7.278774046648788, 7.405915227691283, 5.091143263725952, 2.1876307829228354, 0.12714118104249472]]
2020-12-19 01:01:48,414 : INFO : Removed 66 and 2288 OOV words

2020-12-19 01:02:01,696 : INFO : extropies processed
2020-12-19 01:02:01,698 : INFO : token count processed
2020-12-19 01:02:01,699 : INFO : alphabet_source #6957
2020-12-19 01:02:01,701 : INFO : alphabet_target #6957
2020-12-19 01:02:01,701 : INFO : vocab #6957
2020-12-19 01:02:01,703 : INFO : diff #set()
2020-12-19 01:02:03,047 : INFO : alphabet #6957
2020-12-19 01:02:03,660 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ15.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[0.8654748057059796, 0.5360565561868068], [0.7531549781560898, 0.24684502], [3.896721616561627, 1.3823399492367783], [5.2182844447684475, 7.643402829602536, 7.710479274553965, 5.151207999817018, 2.492194829785517, 0.06707644495142873]]
2020-12-19 01:02:03,663 : INFO : Removed 66 and 954 OOV words from document 1 and 2 (respectively).
2020-12-19 01:02:03,664 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:02:03,665 : INFO : built Dictionary

2020-12-19 01:02:16,975 : INFO : vocab #6957
2020-12-19 01:02:16,976 : INFO : diff #set()
2020-12-19 01:02:18,221 : INFO : alphabet #6957
2020-12-19 01:02:18,837 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ35.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[0.8585805142338172, 0.5380450254059834], [0.7361418306827545, 0.26385817], [3.9720385328209344, 1.3801749794364646], [5.0288078800464975, 8.626714101844048, 8.640686236541264, 5.014835745349281, 3.611878356494766, 0.013972134697215921]]
2020-12-19 01:02:18,840 : INFO : Removed 110 and 1487 OOV words from document 1 and 2 (respectively).
2020-12-19 01:02:18,841 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:02:18,842 : INFO : built Dictionary(32 unique tokens: ['base', 'est', 'less', 'may', 'one']...) from 2 documents (total 250 corpus positions)
2020-12-19 01:02:18,850 : INFO : token count processed
2020-12-19 01:02:18,856 : INFO : frequencies process

2020-12-19 01:02:34,107 : INFO : Removed 110 and 774 OOV words from document 1 and 2 (respectively).
2020-12-19 01:02:34,108 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:02:34,109 : INFO : built Dictionary(22 unique tokens: ['base', 'est', 'less', 'may', 'one']...) from 2 documents (total 86 corpus positions)
2020-12-19 01:02:34,121 : INFO : token count processed
2020-12-19 01:02:34,127 : INFO : frequencies processed
2020-12-19 01:02:34,744 : INFO : scalar_distribution processed
2020-12-19 01:02:34,744 : INFO : entropies processed
2020-12-19 01:02:34,745 : INFO : extropies processed
2020-12-19 01:02:34,747 : INFO : token count processed
2020-12-19 01:02:34,748 : INFO : alphabet_source #6957
2020-12-19 01:02:34,749 : INFO : alphabet_target #6957
2020-12-19 01:02:34,750 : INFO : vocab #6957
2020-12-19 01:02:34,751 : INFO : diff #set()
2020-12-19 01:02:35,988 : INFO : alphabet #6957
2020-12-19 01:02:36,601 : INFO : Computed distances or similarities ('test_

2020-12-19 01:02:49,342 : INFO : built Dictionary(40 unique tokens: ['base', 'est', 'less', 'may', 'one']...) from 2 documents (total 174 corpus positions)
2020-12-19 01:02:49,356 : INFO : token count processed
2020-12-19 01:02:49,362 : INFO : frequencies processed
2020-12-19 01:02:49,978 : INFO : scalar_distribution processed
2020-12-19 01:02:49,979 : INFO : entropies processed
2020-12-19 01:02:49,980 : INFO : extropies processed
2020-12-19 01:02:49,982 : INFO : token count processed
2020-12-19 01:02:49,983 : INFO : alphabet_source #6957
2020-12-19 01:02:49,985 : INFO : alphabet_target #6957
2020-12-19 01:02:49,986 : INFO : vocab #6957
2020-12-19 01:02:49,987 : INFO : diff #set()
2020-12-19 01:02:51,229 : INFO : alphabet #6957
2020-12-19 01:02:51,844 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ35.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[0.8317452458547172, 0.5459274439297842], [0.757096067070961, 0.24290393], [4.019959714517

2020-12-19 01:03:04,586 : INFO : frequencies processed
2020-12-19 01:03:05,200 : INFO : scalar_distribution processed
2020-12-19 01:03:05,200 : INFO : entropies processed
2020-12-19 01:03:05,201 : INFO : extropies processed
2020-12-19 01:03:05,204 : INFO : token count processed
2020-12-19 01:03:05,205 : INFO : alphabet_source #6957
2020-12-19 01:03:05,207 : INFO : alphabet_target #6957
2020-12-19 01:03:05,207 : INFO : vocab #6957
2020-12-19 01:03:05,209 : INFO : diff #set()
2020-12-19 01:03:06,451 : INFO : alphabet #6957
2020-12-19 01:03:07,066 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ35.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[0.9037391853556875, 0.5252820384706026], [0.7933505475521088, 0.20664945], [3.9071440517667324, 1.377071305126568], [5.0288078800464975, 7.9055766874171, 7.976283771797398, 4.9581007956661995, 2.9474758917509005, 0.07070708438029794]]
2020-12-19 01:03:07,069 : INFO : Removed 110 and 2291 OOV words f

2020-12-19 01:03:20,534 : INFO : token count processed
2020-12-19 01:03:20,535 : INFO : alphabet_source #6957
2020-12-19 01:03:20,537 : INFO : alphabet_target #6957
2020-12-19 01:03:20,537 : INFO : vocab #6957
2020-12-19 01:03:20,539 : INFO : diff #set()
2020-12-19 01:03:21,779 : INFO : alphabet #6957
2020-12-19 01:03:22,394 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ51.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[0.9086076673619189, 0.523942147514372], [0.781807616353035, 0.21819238], [4.513517627916675, 1.394329208632712], [7.343933716120418, 6.792406615896576, 7.378424511118455, 6.757915820898539, 0.03449079499803709, 0.5860178952218789]]
2020-12-19 01:03:22,397 : INFO : Removed 420 and 1444 OOV words from document 1 and 2 (respectively).
2020-12-19 01:03:22,398 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:03:22,399 : INFO : built Dictionary(51 unique tokens: ['act', 'base', 'ber', 'bit', 'der'

2020-12-19 01:03:35,820 : INFO : diff #set()
2020-12-19 01:03:37,058 : INFO : alphabet #6957
2020-12-19 01:03:37,673 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ51.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[0.7972794192228275, 0.5563965120306201], [0.7692511081695557, 0.23074889], [4.952004031079701, 1.4047405919021283], [7.343933716120418, 7.183148764136712, 7.690798905683815, 6.836283574573315, 0.3468651895633972, 0.5076501415471029]]
2020-12-19 01:03:37,676 : INFO : Removed 420 and 2059 OOV words from document 1 and 2 (respectively).
2020-12-19 01:03:37,677 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:03:37,679 : INFO : built Dictionary(62 unique tokens: ['act', 'base', 'ber', 'bit', 'der']...) from 2 documents (total 412 corpus positions)
2020-12-19 01:03:37,714 : INFO : token count processed
2020-12-19 01:03:37,722 : INFO : frequencies processed
2020-12-19 01:03:38,345 : INFO : scalar_distri

2020-12-19 01:03:52,927 : INFO : Removed 420 and 1850 OOV words from document 1 and 2 (respectively).
2020-12-19 01:03:52,928 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:03:52,929 : INFO : built Dictionary(54 unique tokens: ['act', 'base', 'ber', 'bit', 'der']...) from 2 documents (total 387 corpus positions)
2020-12-19 01:03:52,958 : INFO : token count processed
2020-12-19 01:03:52,964 : INFO : frequencies processed
2020-12-19 01:03:53,706 : INFO : scalar_distribution processed
2020-12-19 01:03:53,707 : INFO : entropies processed
2020-12-19 01:03:53,708 : INFO : extropies processed
2020-12-19 01:03:53,711 : INFO : token count processed
2020-12-19 01:03:53,712 : INFO : alphabet_source #6957
2020-12-19 01:03:53,714 : INFO : alphabet_target #6957
2020-12-19 01:03:53,715 : INFO : vocab #6957
2020-12-19 01:03:53,716 : INFO : diff #set()
2020-12-19 01:03:54,959 : INFO : alphabet #6957
2020-12-19 01:03:55,601 : INFO : Computed distances or similarities ('test

2020-12-19 01:04:08,281 : INFO : built Dictionary(35 unique tokens: ['est', 'less', 'may', 'respond', 'use']...) from 2 documents (total 396 corpus positions)
2020-12-19 01:04:08,296 : INFO : token count processed
2020-12-19 01:04:08,303 : INFO : frequencies processed
2020-12-19 01:04:08,916 : INFO : scalar_distribution processed
2020-12-19 01:04:08,917 : INFO : entropies processed
2020-12-19 01:04:08,917 : INFO : extropies processed
2020-12-19 01:04:08,920 : INFO : token count processed
2020-12-19 01:04:08,921 : INFO : alphabet_source #6957
2020-12-19 01:04:08,923 : INFO : alphabet_target #6957
2020-12-19 01:04:08,924 : INFO : vocab #6957
2020-12-19 01:04:08,925 : INFO : diff #set()
2020-12-19 01:04:10,162 : INFO : alphabet #6957
2020-12-19 01:04:10,775 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.8485832709678882, 0.5409548034460012], [0.6840291917324066, 0.3159708], [3.802655596

2020-12-19 01:04:23,501 : INFO : frequencies processed
2020-12-19 01:04:24,121 : INFO : scalar_distribution processed
2020-12-19 01:04:24,122 : INFO : entropies processed
2020-12-19 01:04:24,123 : INFO : extropies processed
2020-12-19 01:04:24,128 : INFO : token count processed
2020-12-19 01:04:24,131 : INFO : alphabet_source #6957
2020-12-19 01:04:24,133 : INFO : alphabet_target #6957
2020-12-19 01:04:24,134 : INFO : vocab #6957
2020-12-19 01:04:24,137 : INFO : diff #set()
2020-12-19 01:04:25,395 : INFO : alphabet #6957
2020-12-19 01:04:26,011 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[0.9014799655453716, 0.5259061458021651], [0.7221253514289856, 0.27787465], [3.9715277841122743, 1.3796661550149485], [4.694042263389069, 7.749661971001125, 7.773336015304817, 4.670368219085377, 3.079293751915748, 0.02367404430369202]]
2020-12-19 01:04:26,014 : INFO : Removed 49 and 1285 OOV words fr

2020-12-19 01:04:39,380 : INFO : token count processed
2020-12-19 01:04:39,383 : INFO : alphabet_source #6957
2020-12-19 01:04:39,385 : INFO : alphabet_target #6957
2020-12-19 01:04:39,386 : INFO : vocab #6957
2020-12-19 01:04:39,389 : INFO : diff #set()
2020-12-19 01:04:40,641 : INFO : alphabet #6957
2020-12-19 01:04:41,255 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[1.0576491042667437, 0.4859915123168468], [0.8308941572904587, 0.16910584], [3.2430190302453976, 1.345817513024151], [4.694042263389069, 7.345352443618291, 7.427362124353715, 4.6120325826536455, 2.733319860964646, 0.0820096807354247]]
2020-12-19 01:04:41,258 : INFO : Removed 49 and 1986 OOV words from document 1 and 2 (respectively).
2020-12-19 01:04:41,259 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:04:41,260 : INFO : built Dictionary(38 unique tokens: ['est', 'less', 'may', 'respond',

2020-12-19 01:04:54,678 : INFO : diff #set()
2020-12-19 01:04:55,910 : INFO : alphabet #6957
2020-12-19 01:04:56,524 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[1.0354391121509685, 0.49129447991359515], [0.9527373686432838, 0.04726263], [2.838909597982605, 1.2989154707116395], [4.694042263389069, 7.102818334145153, 7.183984191306162, 4.612876406228062, 2.4899419279170925, 0.08116585716100833]]
2020-12-19 01:04:56,528 : INFO : Removed 307 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 01:04:56,529 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:04:56,530 : INFO : built Dictionary(43 unique tokens: ['base', 'der', 'est', 'form', 'ident']...) from 2 documents (total 209 corpus positions)
2020-12-19 01:04:56,557 : INFO : token count processed
2020-12-19 01:04:56,571 : INFO : frequencies processed
2020-12-19 01:04:57,198 : INFO : scalar_d

2020-12-19 01:05:11,778 : INFO : Removed 307 and 2503 OOV words from document 1 and 2 (respectively).
2020-12-19 01:05:11,779 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:05:11,780 : INFO : built Dictionary(32 unique tokens: ['base', 'der', 'est', 'form', 'ident']...) from 2 documents (total 216 corpus positions)
2020-12-19 01:05:11,798 : INFO : token count processed
2020-12-19 01:05:11,803 : INFO : frequencies processed
2020-12-19 01:05:12,417 : INFO : scalar_distribution processed
2020-12-19 01:05:12,418 : INFO : entropies processed
2020-12-19 01:05:12,419 : INFO : extropies processed
2020-12-19 01:05:12,421 : INFO : token count processed
2020-12-19 01:05:12,422 : INFO : alphabet_source #6957
2020-12-19 01:05:12,424 : INFO : alphabet_target #6957
2020-12-19 01:05:12,424 : INFO : vocab #6957
2020-12-19 01:05:12,426 : INFO : diff #set()
2020-12-19 01:05:13,666 : INFO : alphabet #6957
2020-12-19 01:05:14,279 : INFO : Computed distances or similarities ('t

2020-12-19 01:05:27,031 : INFO : built Dictionary(54 unique tokens: ['base', 'der', 'est', 'form', 'ident']...) from 2 documents (total 343 corpus positions)
2020-12-19 01:05:27,064 : INFO : token count processed
2020-12-19 01:05:27,070 : INFO : frequencies processed
2020-12-19 01:05:27,694 : INFO : scalar_distribution processed
2020-12-19 01:05:27,695 : INFO : entropies processed
2020-12-19 01:05:27,696 : INFO : extropies processed
2020-12-19 01:05:27,698 : INFO : token count processed
2020-12-19 01:05:27,700 : INFO : alphabet_source #6957
2020-12-19 01:05:27,701 : INFO : alphabet_target #6957
2020-12-19 01:05:27,702 : INFO : vocab #6957
2020-12-19 01:05:27,704 : INFO : diff #set()
2020-12-19 01:05:28,946 : INFO : alphabet #6957
2020-12-19 01:05:29,560 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ49.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[0.8953831156723686, 0.5275978200561631], [0.8146521002054214, 0.1853479], [5.146523364

2020-12-19 01:05:42,158 : INFO : frequencies processed
2020-12-19 01:05:42,775 : INFO : scalar_distribution processed
2020-12-19 01:05:42,776 : INFO : entropies processed
2020-12-19 01:05:42,777 : INFO : extropies processed
2020-12-19 01:05:42,783 : INFO : token count processed
2020-12-19 01:05:42,785 : INFO : alphabet_source #6957
2020-12-19 01:05:42,788 : INFO : alphabet_target #6957
2020-12-19 01:05:42,789 : INFO : vocab #6957
2020-12-19 01:05:42,792 : INFO : diff #set()
2020-12-19 01:05:44,151 : INFO : alphabet #6957
2020-12-19 01:05:44,767 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ49.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.7971983107709676, 0.5564216224813927], [0.8205428421497345, 0.17945716], [4.9204708762900875, 1.4137842285258813], [6.480651340305256, 9.228957519717383, 9.322035104569505, 6.387573755453134, 2.8413837642642488, 0.09307758485212148]]
2020-12-19 01:05:44,770 : INFO : Removed 307 and 2212 OOV words

2020-12-19 01:05:58,130 : INFO : token count processed
2020-12-19 01:05:58,131 : INFO : alphabet_source #6957
2020-12-19 01:05:58,132 : INFO : alphabet_target #6957
2020-12-19 01:05:58,134 : INFO : vocab #6957
2020-12-19 01:05:58,135 : INFO : diff #set()
2020-12-19 01:05:59,376 : INFO : alphabet #6957
2020-12-19 01:05:59,992 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ10.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[0.843851194418892, 0.5423431147952045], [0.7945909947156906, 0.205409], [3.0849625007211556, 1.3480058660457088], [3.378783493486176, 6.985394788721831, 6.994082080732252, 3.3700962014757554, 3.615298587246076, 0.0086872920104204]]
2020-12-19 01:05:59,995 : INFO : Removed 10 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 01:05:59,996 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:05:59,998 : INFO : built Dictionary(30 unique tokens: ['est', 'key', 'see', 'back', 'char

2020-12-19 01:06:13,321 : INFO : diff #set()
2020-12-19 01:06:14,561 : INFO : alphabet #6957
2020-12-19 01:06:15,173 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ10.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[1.0377944965861001, 0.49072661726945066], [0.733261913061142, 0.2667381], [3.2516291673878226, 1.3589504783379556], [3.378783493486176, 7.278774046648788, 7.284250277251373, 3.37330726288359, 3.9054667837651973, 0.005476230602584842]]
2020-12-19 01:06:15,177 : INFO : Removed 10 and 2288 OOV words from document 1 and 2 (respectively).
2020-12-19 01:06:15,177 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:06:15,178 : INFO : built Dictionary(36 unique tokens: ['est', 'key', 'see', '200', 'back']...) from 2 documents (total 365 corpus positions)
2020-12-19 01:06:15,192 : INFO : token count processed
2020-12-19 01:06:15,197 : INFO : frequencies processed
2020-12-19 01:06:15,813 : INFO : scalar_distr

2020-12-19 01:06:30,345 : INFO : Removed 10 and 954 OOV words from document 1 and 2 (respectively).
2020-12-19 01:06:30,346 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:06:30,347 : INFO : built Dictionary(32 unique tokens: ['est', 'key', 'see', '200', '://']...) from 2 documents (total 87 corpus positions)
2020-12-19 01:06:30,364 : INFO : token count processed
2020-12-19 01:06:30,373 : INFO : frequencies processed
2020-12-19 01:06:31,012 : INFO : scalar_distribution processed
2020-12-19 01:06:31,013 : INFO : entropies processed
2020-12-19 01:06:31,013 : INFO : extropies processed
2020-12-19 01:06:31,016 : INFO : token count processed
2020-12-19 01:06:31,017 : INFO : alphabet_source #6957
2020-12-19 01:06:31,018 : INFO : alphabet_target #6957
2020-12-19 01:06:31,019 : INFO : vocab #6957
2020-12-19 01:06:31,021 : INFO : diff #set()
2020-12-19 01:06:32,262 : INFO : alphabet #6957
2020-12-19 01:06:32,875 : INFO : Computed distances or similarities ('test_dat

2020-12-19 01:06:45,559 : INFO : built Dictionary(35 unique tokens: ['also', 'band', 'base', 'build', 'end']...) from 2 documents (total 275 corpus positions)
2020-12-19 01:06:45,575 : INFO : token count processed
2020-12-19 01:06:45,586 : INFO : frequencies processed
2020-12-19 01:06:46,222 : INFO : scalar_distribution processed
2020-12-19 01:06:46,223 : INFO : entropies processed
2020-12-19 01:06:46,224 : INFO : extropies processed
2020-12-19 01:06:46,226 : INFO : token count processed
2020-12-19 01:06:46,227 : INFO : alphabet_source #6957
2020-12-19 01:06:46,229 : INFO : alphabet_target #6957
2020-12-19 01:06:46,230 : INFO : vocab #6957
2020-12-19 01:06:46,231 : INFO : diff #set()
2020-12-19 01:06:47,482 : INFO : alphabet #6957
2020-12-19 01:06:48,099 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[0.9948075756249883, 0.5013014850250368], [0.7874308973550797, 0.2125691], [3.82886624

2020-12-19 01:07:00,798 : INFO : frequencies processed
2020-12-19 01:07:01,429 : INFO : scalar_distribution processed
2020-12-19 01:07:01,430 : INFO : entropies processed
2020-12-19 01:07:01,431 : INFO : extropies processed
2020-12-19 01:07:01,433 : INFO : token count processed
2020-12-19 01:07:01,435 : INFO : alphabet_source #6957
2020-12-19 01:07:01,436 : INFO : alphabet_target #6957
2020-12-19 01:07:01,437 : INFO : vocab #6957
2020-12-19 01:07:01,439 : INFO : diff #set()
2020-12-19 01:07:02,679 : INFO : alphabet #6957
2020-12-19 01:07:03,293 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[0.8363182690467865, 0.5445679089818616], [0.8237166255712509, 0.17628337], [3.866275708187523, 1.363521128456867], [5.884648105522896, 7.373064697852536, 7.661560971671392, 5.59615183170404, 1.7769128661484963, 0.2884962738188559]]
2020-12-19 01:07:03,296 : INFO : Removed 204 and 2544 OOV words from

2020-12-19 01:07:16,664 : INFO : token count processed
2020-12-19 01:07:16,665 : INFO : alphabet_source #6957
2020-12-19 01:07:16,667 : INFO : alphabet_target #6957
2020-12-19 01:07:16,667 : INFO : vocab #6957
2020-12-19 01:07:16,670 : INFO : diff #set()
2020-12-19 01:07:17,935 : INFO : alphabet #6957
2020-12-19 01:07:18,552 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[0.7704068838434427, 0.5648419067536963], [0.7168496251106262, 0.28315037], [4.303174031399899, 1.3786351494276186], [5.884648105522896, 8.67532843392421, 8.732727175157581, 5.827249364289525, 2.848079069634686, 0.05739874123337074]]
2020-12-19 01:07:18,555 : INFO : Removed 204 and 926 OOV words from document 1 and 2 (respectively).
2020-12-19 01:07:18,556 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:07:18,557 : INFO : built Dictionary(32 unique tokens: ['also', 'band', 'base', 'build', '

2020-12-19 01:07:31,827 : INFO : vocab #6957
2020-12-19 01:07:31,828 : INFO : diff #set()
2020-12-19 01:07:33,071 : INFO : alphabet #6957
2020-12-19 01:07:33,694 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[0.7953179323297211, 0.557004406847502], [0.7633237689733505, 0.23667623], [4.569348001512555, 1.3875598085734802], [5.884648105522896, 7.9055766874171, 8.040284942760744, 5.749939850179251, 2.1556368372378483, 0.13470825534364383]]
2020-12-19 01:07:33,697 : INFO : Removed 204 and 2291 OOV words from document 1 and 2 (respectively).
2020-12-19 01:07:33,698 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:07:33,700 : INFO : built Dictionary(31 unique tokens: ['also', 'band', 'base', 'build', 'end']...) from 2 documents (total 197 corpus positions)
2020-12-19 01:07:33,722 : INFO : token count processed
2020-12-19 01:07:33,728 : INFO : frequencies processed

2020-12-19 01:07:48,958 : INFO : Removed 266 and 1444 OOV words from document 1 and 2 (respectively).
2020-12-19 01:07:48,959 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:07:48,960 : INFO : built Dictionary(40 unique tokens: ['associ', 'back', 'base', 'end', 'est']...) from 2 documents (total 188 corpus positions)
2020-12-19 01:07:48,982 : INFO : token count processed
2020-12-19 01:07:48,995 : INFO : frequencies processed
2020-12-19 01:07:49,609 : INFO : scalar_distribution processed
2020-12-19 01:07:49,609 : INFO : entropies processed
2020-12-19 01:07:49,610 : INFO : extropies processed
2020-12-19 01:07:49,613 : INFO : token count processed
2020-12-19 01:07:49,614 : INFO : alphabet_source #6957
2020-12-19 01:07:49,615 : INFO : alphabet_target #6957
2020-12-19 01:07:49,616 : INFO : vocab #6957
2020-12-19 01:07:49,618 : INFO : diff #set()
2020-12-19 01:07:50,856 : INFO : alphabet #6957
2020-12-19 01:07:51,467 : INFO : Computed distances or similarities ('

2020-12-19 01:08:04,207 : INFO : built Dictionary(54 unique tokens: ['associ', 'back', 'base', 'end', 'est']...) from 2 documents (total 366 corpus positions)
2020-12-19 01:08:04,236 : INFO : token count processed
2020-12-19 01:08:04,243 : INFO : frequencies processed
2020-12-19 01:08:04,856 : INFO : scalar_distribution processed
2020-12-19 01:08:04,857 : INFO : entropies processed
2020-12-19 01:08:04,858 : INFO : extropies processed
2020-12-19 01:08:04,861 : INFO : token count processed
2020-12-19 01:08:04,862 : INFO : alphabet_source #6957
2020-12-19 01:08:04,864 : INFO : alphabet_target #6957
2020-12-19 01:08:04,864 : INFO : vocab #6957
2020-12-19 01:08:04,867 : INFO : diff #set()
2020-12-19 01:08:06,122 : INFO : alphabet #6957
2020-12-19 01:08:06,739 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ25.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[0.8681981227358824, 0.5352751337398575], [0.80203777551651, 0.19796222], [4.930181631

2020-12-19 01:08:19,540 : INFO : frequencies processed
2020-12-19 01:08:20,165 : INFO : scalar_distribution processed
2020-12-19 01:08:20,166 : INFO : entropies processed
2020-12-19 01:08:20,167 : INFO : extropies processed
2020-12-19 01:08:20,172 : INFO : token count processed
2020-12-19 01:08:20,174 : INFO : alphabet_source #6957
2020-12-19 01:08:20,176 : INFO : alphabet_target #6957
2020-12-19 01:08:20,177 : INFO : vocab #6957
2020-12-19 01:08:20,180 : INFO : diff #set()
2020-12-19 01:08:21,432 : INFO : alphabet #6957
2020-12-19 01:08:22,046 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ25.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[0.886912774247735, 0.5299662038690024], [0.8036547005176544, 0.1963453], [4.688398662661256, 1.4060776704416835], [6.447723771921334, 7.3308860974230985, 7.630920751911566, 6.147689117432867, 1.1831969799902318, 0.3000346544884671]]
2020-12-19 01:08:22,049 : INFO : Removed 266 and 2276 OOV words fr

2020-12-19 01:08:35,571 : INFO : token count processed
2020-12-19 01:08:35,572 : INFO : alphabet_source #6957
2020-12-19 01:08:35,574 : INFO : alphabet_target #6957
2020-12-19 01:08:35,575 : INFO : vocab #6957
2020-12-19 01:08:35,578 : INFO : diff #set()
2020-12-19 01:08:36,827 : INFO : alphabet #6957
2020-12-19 01:08:37,444 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ16.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.7621896717994864, 0.5674758035432332], [0.7149451971054077, 0.2850548], [4.804574504151558, 1.411205921569873], [5.956780025038978, 8.158196888137685, 8.232164311751916, 5.882812601424746, 2.2753842867129377, 0.07396742361423136]]
2020-12-19 01:08:37,447 : INFO : Removed 95 and 1854 OOV words from document 1 and 2 (respectively).
2020-12-19 01:08:37,447 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:08:37,448 : INFO : built Dictionary(32 unique tokens: ['est', 'get', 'head', 'ident', 'ke

2020-12-19 01:08:50,850 : INFO : diff #set()
2020-12-19 01:08:52,104 : INFO : alphabet #6957
2020-12-19 01:08:52,720 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ16.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[0.7661906965459278, 0.5661902771629712], [0.778507724404335, 0.22149228], [4.784897702756408, 1.4089037146596168], [5.956780025038978, 7.749661971001125, 7.849739488553308, 5.856702507486796, 1.8929594635143294, 0.1000775175521822]]
2020-12-19 01:08:52,723 : INFO : Removed 95 and 1285 OOV words from document 1 and 2 (respectively).
2020-12-19 01:08:52,724 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:08:52,724 : INFO : built Dictionary(32 unique tokens: ['est', 'get', 'head', 'ident', 'key']...) from 2 documents (total 196 corpus positions)
2020-12-19 01:08:52,741 : INFO : token count processed
2020-12-19 01:08:52,748 : INFO : frequencies processed
2020-12-19 01:08:53,365 : INFO : scalar_distri

2020-12-19 01:09:07,976 : INFO : Removed 95 and 1986 OOV words from document 1 and 2 (respectively).
2020-12-19 01:09:07,977 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:09:07,978 : INFO : built Dictionary(42 unique tokens: ['est', 'get', 'head', 'ident', 'key']...) from 2 documents (total 275 corpus positions)
2020-12-19 01:09:08,005 : INFO : token count processed
2020-12-19 01:09:08,012 : INFO : frequencies processed
2020-12-19 01:09:08,626 : INFO : scalar_distribution processed
2020-12-19 01:09:08,627 : INFO : entropies processed
2020-12-19 01:09:08,627 : INFO : extropies processed
2020-12-19 01:09:08,630 : INFO : token count processed
2020-12-19 01:09:08,631 : INFO : alphabet_source #6957
2020-12-19 01:09:08,633 : INFO : alphabet_target #6957
2020-12-19 01:09:08,633 : INFO : vocab #6957
2020-12-19 01:09:08,635 : INFO : diff #set()
2020-12-19 01:09:09,886 : INFO : alphabet #6957
2020-12-19 01:09:10,502 : INFO : Computed distances or similarities ('tes

2020-12-19 01:09:23,117 : INFO : token count processed
2020-12-19 01:09:23,124 : INFO : frequencies processed
2020-12-19 01:09:23,751 : INFO : scalar_distribution processed
2020-12-19 01:09:23,752 : INFO : entropies processed
2020-12-19 01:09:23,753 : INFO : extropies processed
2020-12-19 01:09:23,755 : INFO : token count processed
2020-12-19 01:09:23,756 : INFO : alphabet_source #6957
2020-12-19 01:09:23,758 : INFO : alphabet_target #6957
2020-12-19 01:09:23,759 : INFO : vocab #6957
2020-12-19 01:09:23,760 : INFO : diff #set()
2020-12-19 01:09:25,098 : INFO : alphabet #6957
2020-12-19 01:09:25,712 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ4.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[0.8921278884574335, 0.5285055022444888], [0.7355755269527435, 0.26442447], [3.625478737026909, 1.3495812557244635], [5.492328872067779, 8.626714101844048, 8.681986008459138, 5.43705696545269, 3.189657136391358, 0.05527190661508996]]
2020-12-19 01

2020-12-19 01:09:39,010 : INFO : extropies processed
2020-12-19 01:09:39,016 : INFO : token count processed
2020-12-19 01:09:39,018 : INFO : alphabet_source #6957
2020-12-19 01:09:39,020 : INFO : alphabet_target #6957
2020-12-19 01:09:39,021 : INFO : vocab #6957
2020-12-19 01:09:39,023 : INFO : diff #set()
2020-12-19 01:09:40,261 : INFO : alphabet #6957
2020-12-19 01:09:40,875 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ4.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.081077830774292, 0.48052023101314617], [0.9405980445444584, 0.059401955], [2.9356482715464565, 1.2684538846961821], [5.492328872067779, 7.127980212598745, 7.296395631738742, 5.323913452927782, 1.8040667596709623, 0.1684154191399969]]
2020-12-19 01:09:40,878 : INFO : Removed 115 and 774 OOV words from document 1 and 2 (respectively).
2020-12-19 01:09:40,879 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:09:40,880 : INFO : built Dictionar

2020-12-19 01:09:54,201 : INFO : vocab #6957
2020-12-19 01:09:54,202 : INFO : diff #set()
2020-12-19 01:09:55,468 : INFO : alphabet #6957
2020-12-19 01:09:56,090 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ4.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[0.9765250024159748, 0.5059384519688167], [0.6985000967979431, 0.3014999], [4.229241606297446, 1.375258904435136], [5.492328872067779, 9.006525301292756, 9.034408992055177, 5.464445181305358, 3.5420801199873972, 0.0278836907624207]]
2020-12-19 01:09:56,093 : INFO : Removed 115 and 1726 OOV words from document 1 and 2 (respectively).
2020-12-19 01:09:56,094 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:09:56,095 : INFO : built Dictionary(41 unique tokens: ['end', 'est', 'known', 'less', 'list']...) from 2 documents (total 180 corpus positions)
2020-12-19 01:09:56,115 : INFO : token count processed
2020-12-19 01:09:56,124 : INFO : frequencies processed


2020-12-19 01:10:11,268 : INFO : Removed 115 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 01:10:11,269 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:10:11,270 : INFO : built Dictionary(46 unique tokens: ['end', 'est', 'known', 'less', 'list']...) from 2 documents (total 282 corpus positions)
2020-12-19 01:10:11,289 : INFO : token count processed
2020-12-19 01:10:11,299 : INFO : frequencies processed
2020-12-19 01:10:11,930 : INFO : scalar_distribution processed
2020-12-19 01:10:11,931 : INFO : entropies processed
2020-12-19 01:10:11,932 : INFO : extropies processed
2020-12-19 01:10:11,937 : INFO : token count processed
2020-12-19 01:10:11,940 : INFO : alphabet_source #6957
2020-12-19 01:10:11,942 : INFO : alphabet_target #6957
2020-12-19 01:10:11,943 : INFO : vocab #6957
2020-12-19 01:10:11,946 : INFO : diff #set()
2020-12-19 01:10:13,185 : INFO : alphabet #6957
2020-12-19 01:10:13,800 : INFO : Computed distances or similarities ('t

2020-12-19 01:10:26,559 : INFO : token count processed
2020-12-19 01:10:26,565 : INFO : frequencies processed
2020-12-19 01:10:27,180 : INFO : scalar_distribution processed
2020-12-19 01:10:27,181 : INFO : entropies processed
2020-12-19 01:10:27,182 : INFO : extropies processed
2020-12-19 01:10:27,185 : INFO : token count processed
2020-12-19 01:10:27,187 : INFO : alphabet_source #6957
2020-12-19 01:10:27,189 : INFO : alphabet_target #6957
2020-12-19 01:10:27,190 : INFO : vocab #6957
2020-12-19 01:10:27,193 : INFO : diff #set()
2020-12-19 01:10:28,447 : INFO : alphabet #6957
2020-12-19 01:10:29,062 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ27.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[0.8352295569616673, 0.5448909626627635], [0.812064066529274, 0.18793593], [3.3086949695628425, 1.359797510933984], [4.719349267862323, 6.792406615896576, 6.842348666689961, 4.669407217068937, 2.1229993988276386, 0.049942050793385206]]
2020-12-19

2020-12-19 01:10:42,460 : INFO : extropies processed
2020-12-19 01:10:42,465 : INFO : token count processed
2020-12-19 01:10:42,468 : INFO : alphabet_source #6957
2020-12-19 01:10:42,470 : INFO : alphabet_target #6957
2020-12-19 01:10:42,471 : INFO : vocab #6957
2020-12-19 01:10:42,472 : INFO : diff #set()
2020-12-19 01:10:43,713 : INFO : alphabet #6957
2020-12-19 01:10:44,325 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ27.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[0.9116147441781177, 0.523117957237739], [0.8392074406147003, 0.16079256], [3.823934896284056, 1.383097355087041], [4.719349267862323, 7.183148764136712, 7.2235501348471205, 4.678947897151914, 2.5042008669847977, 0.04040137071040828]]
2020-12-19 01:10:44,328 : INFO : Removed 33 and 2059 OOV words from document 1 and 2 (respectively).
2020-12-19 01:10:44,329 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:10:44,330 : INFO : built Dictionary

2020-12-19 01:10:57,730 : INFO : vocab #6957
2020-12-19 01:10:57,734 : INFO : diff #set()
2020-12-19 01:10:59,010 : INFO : alphabet #6957
2020-12-19 01:10:59,626 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ27.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[0.8621974353574848, 0.5369999877633977], [0.7769183814525604, 0.22308162], [3.29277019393699, 1.356236901569955], [4.719349267862323, 8.610843107887472, 8.646654705376424, 4.683537670373372, 3.927305437514101, 0.03581159748895146]]
2020-12-19 01:10:59,629 : INFO : Removed 33 and 1850 OOV words from document 1 and 2 (respectively).
2020-12-19 01:10:59,630 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:10:59,631 : INFO : built Dictionary(34 unique tokens: ['est', 'ident', 'key', 'uri', 'use']...) from 2 documents (total 307 corpus positions)
2020-12-19 01:10:59,646 : INFO : token count processed
2020-12-19 01:10:59,655 : INFO : frequencies processed
20

2020-12-19 01:11:14,845 : INFO : Removed 88 and 3144 OOV words from document 1 and 2 (respectively).
2020-12-19 01:11:14,846 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:11:14,847 : INFO : built Dictionary(34 unique tokens: ['base', 'type', 'cat', 'char', 'com']...) from 2 documents (total 390 corpus positions)
2020-12-19 01:11:14,864 : INFO : token count processed
2020-12-19 01:11:14,869 : INFO : frequencies processed
2020-12-19 01:11:15,590 : INFO : scalar_distribution processed
2020-12-19 01:11:15,591 : INFO : entropies processed
2020-12-19 01:11:15,592 : INFO : extropies processed
2020-12-19 01:11:15,598 : INFO : token count processed
2020-12-19 01:11:15,600 : INFO : alphabet_source #6957
2020-12-19 01:11:15,603 : INFO : alphabet_target #6957
2020-12-19 01:11:15,603 : INFO : vocab #6957
2020-12-19 01:11:15,605 : INFO : diff #set()
2020-12-19 01:11:16,856 : INFO : alphabet #6957
2020-12-19 01:11:17,473 : INFO : Computed distances or similarities ('tes

2020-12-19 01:11:30,048 : INFO : built Dictionary(37 unique tokens: ['base', 'type', '200', '://', 'act']...) from 2 documents (total 287 corpus positions)
2020-12-19 01:11:30,061 : INFO : token count processed
2020-12-19 01:11:30,067 : INFO : frequencies processed
2020-12-19 01:11:30,685 : INFO : scalar_distribution processed
2020-12-19 01:11:30,686 : INFO : entropies processed
2020-12-19 01:11:30,687 : INFO : extropies processed
2020-12-19 01:11:30,689 : INFO : token count processed
2020-12-19 01:11:30,691 : INFO : alphabet_source #6957
2020-12-19 01:11:30,692 : INFO : alphabet_target #6957
2020-12-19 01:11:30,693 : INFO : vocab #6957
2020-12-19 01:11:30,695 : INFO : diff #set()
2020-12-19 01:11:31,935 : INFO : alphabet #6957
2020-12-19 01:11:32,551 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ41.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[1.2636845993580017, 0.44175765487983953], [0.8837738335132599, 0.11622617], [4.2445729918

2020-12-19 01:11:45,325 : INFO : frequencies processed
2020-12-19 01:11:45,941 : INFO : scalar_distribution processed
2020-12-19 01:11:45,942 : INFO : entropies processed
2020-12-19 01:11:45,942 : INFO : extropies processed
2020-12-19 01:11:45,945 : INFO : token count processed
2020-12-19 01:11:45,946 : INFO : alphabet_source #6957
2020-12-19 01:11:45,947 : INFO : alphabet_target #6957
2020-12-19 01:11:45,948 : INFO : vocab #6957
2020-12-19 01:11:45,950 : INFO : diff #set()
2020-12-19 01:11:47,202 : INFO : alphabet #6957
2020-12-19 01:11:47,813 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ41.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[1.2463825984242158, 0.44516014355768085], [0.9275961890816689, 0.07240381], [3.6818808028034025, 1.3784731924832672], [5.093958510098267, 7.345352443618291, 7.510164197179838, 4.929146756536719, 2.416205687081571, 0.16481175356154765]]
2020-12-19 01:11:47,816 : INFO : Removed 88 and 1986 OOV words 

2020-12-19 01:12:01,152 : INFO : token count processed
2020-12-19 01:12:01,154 : INFO : alphabet_source #6957
2020-12-19 01:12:01,156 : INFO : alphabet_target #6957
2020-12-19 01:12:01,157 : INFO : vocab #6957
2020-12-19 01:12:01,159 : INFO : diff #set()
2020-12-19 01:12:02,414 : INFO : alphabet #6957
2020-12-19 01:12:03,032 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ41.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[1.2484899200277835, 0.4447429321754057], [0.9797842968255281, 0.020215703], [3.5368867237421666, 1.3697834857353341], [5.093958510098267, 7.102818334145153, 7.233414008834886, 4.9633628354085335, 2.139455498736619, 0.130595674689733]]
2020-12-19 01:12:03,035 : INFO : Removed 34 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 01:12:03,036 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:12:03,038 : INFO : built Dictionary(36 unique tokens: ['est', 'see', 'use', 'via', '200

2020-12-19 01:12:16,411 : INFO : diff #set()
2020-12-19 01:12:17,652 : INFO : alphabet #6957
2020-12-19 01:12:18,267 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ11.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[0.953515756638833, 0.5118975859813761], [0.932334192097187, 0.06766581], [2.9139770731827523, 1.3356231683419404], [4.169766962341044, 7.587625711074752, 7.642070411915824, 4.115322261499974, 3.4723034495747793, 0.05444470084107156]]
2020-12-19 01:12:18,270 : INFO : Removed 34 and 2503 OOV words from document 1 and 2 (respectively).
2020-12-19 01:12:18,271 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:12:18,272 : INFO : built Dictionary(25 unique tokens: ['est', 'see', 'use', 'via', 'char']...) from 2 documents (total 174 corpus positions)
2020-12-19 01:12:18,285 : INFO : token count processed
2020-12-19 01:12:18,291 : INFO : frequencies processed
2020-12-19 01:12:18,909 : INFO : scalar_distri

2020-12-19 01:12:33,477 : INFO : Removed 34 and 2642 OOV words from document 1 and 2 (respectively).
2020-12-19 01:12:33,478 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:12:33,479 : INFO : built Dictionary(48 unique tokens: ['est', 'see', 'use', 'via', '://']...) from 2 documents (total 301 corpus positions)
2020-12-19 01:12:33,504 : INFO : token count processed
2020-12-19 01:12:33,511 : INFO : frequencies processed
2020-12-19 01:12:34,142 : INFO : scalar_distribution processed
2020-12-19 01:12:34,143 : INFO : entropies processed
2020-12-19 01:12:34,144 : INFO : extropies processed
2020-12-19 01:12:34,147 : INFO : token count processed
2020-12-19 01:12:34,148 : INFO : alphabet_source #6957
2020-12-19 01:12:34,149 : INFO : alphabet_target #6957
2020-12-19 01:12:34,150 : INFO : vocab #6957
2020-12-19 01:12:34,152 : INFO : diff #set()
2020-12-19 01:12:35,390 : INFO : alphabet #6957
2020-12-19 01:12:36,005 : INFO : Computed distances or similarities ('test_d

2020-12-19 01:12:48,678 : INFO : token count processed
2020-12-19 01:12:48,685 : INFO : frequencies processed
2020-12-19 01:12:49,318 : INFO : scalar_distribution processed
2020-12-19 01:12:49,319 : INFO : entropies processed
2020-12-19 01:12:49,320 : INFO : extropies processed
2020-12-19 01:12:49,323 : INFO : token count processed
2020-12-19 01:12:49,324 : INFO : alphabet_source #6957
2020-12-19 01:12:49,325 : INFO : alphabet_target #6957
2020-12-19 01:12:49,326 : INFO : vocab #6957
2020-12-19 01:12:49,330 : INFO : diff #set()
2020-12-19 01:12:50,587 : INFO : alphabet #6957
2020-12-19 01:12:51,203 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ11.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.9088609072034188, 0.5238726385072511], [0.8626407533884048, 0.13735925], [3.2195282822995472, 1.3445081666934866], [4.169766962341044, 9.228957519717383, 9.240750786093232, 4.157973695965195, 5.070983823752187, 0.011793266375848432]]
2020-12-

2020-12-19 01:13:04,503 : INFO : extropies processed
2020-12-19 01:13:04,505 : INFO : token count processed
2020-12-19 01:13:04,506 : INFO : alphabet_source #6957
2020-12-19 01:13:04,508 : INFO : alphabet_target #6957
2020-12-19 01:13:04,509 : INFO : vocab #6957
2020-12-19 01:13:04,510 : INFO : diff #set()
2020-12-19 01:13:05,856 : INFO : alphabet #6957
2020-12-19 01:13:06,469 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ24.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[0.8898515953290651, 0.5291420778602871], [0.8899484872817993, 0.11005151], [3.5841837197791886, 1.375413252695311], [5.1942057381487485, 6.985394788721831, 7.086862532780849, 5.092737994089731, 1.8926567946321002, 0.10146774405901748]]
2020-12-19 01:13:06,472 : INFO : Removed 51 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 01:13:06,473 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:13:06,474 : INFO : built Diction

2020-12-19 01:13:19,766 : INFO : vocab #6957
2020-12-19 01:13:19,770 : INFO : diff #set()
2020-12-19 01:13:21,030 : INFO : alphabet #6957
2020-12-19 01:13:21,643 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ24.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[0.886969563150286, 0.5299502543806298], [0.8596499562263489, 0.14035004], [3.6897037321995465, 1.380114163501017], [5.1942057381487485, 7.278774046648788, 7.38894112758845, 5.084038657209088, 2.1947353894397015, 0.11016708093966177]]
2020-12-19 01:13:21,646 : INFO : Removed 51 and 2288 OOV words from document 1 and 2 (respectively).
2020-12-19 01:13:21,647 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:13:21,648 : INFO : built Dictionary(39 unique tokens: ['base', 'cover', 'end', 'key', 'point']...) from 2 documents (total 372 corpus positions)
2020-12-19 01:13:21,664 : INFO : token count processed
2020-12-19 01:13:21,670 : INFO : frequencies process

2020-12-19 01:13:36,891 : INFO : Removed 51 and 954 OOV words from document 1 and 2 (respectively).
2020-12-19 01:13:36,892 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:13:36,893 : INFO : built Dictionary(37 unique tokens: ['base', 'cover', 'end', 'key', 'point']...) from 2 documents (total 94 corpus positions)
2020-12-19 01:13:36,909 : INFO : token count processed
2020-12-19 01:13:36,914 : INFO : frequencies processed
2020-12-19 01:13:37,532 : INFO : scalar_distribution processed
2020-12-19 01:13:37,533 : INFO : entropies processed
2020-12-19 01:13:37,534 : INFO : extropies processed
2020-12-19 01:13:37,537 : INFO : token count processed
2020-12-19 01:13:37,539 : INFO : alphabet_source #6957
2020-12-19 01:13:37,541 : INFO : alphabet_target #6957
2020-12-19 01:13:37,542 : INFO : vocab #6957
2020-12-19 01:13:37,545 : INFO : diff #set()
2020-12-19 01:13:38,798 : INFO : alphabet #6957
2020-12-19 01:13:39,416 : INFO : Computed distances or similarities ('tes

2020-12-19 01:13:52,105 : INFO : built Dictionary(34 unique tokens: ['band', 'base', 'est', 'less', 'may']...) from 2 documents (total 242 corpus positions)
2020-12-19 01:13:52,117 : INFO : token count processed
2020-12-19 01:13:52,126 : INFO : frequencies processed
2020-12-19 01:13:52,742 : INFO : scalar_distribution processed
2020-12-19 01:13:52,743 : INFO : entropies processed
2020-12-19 01:13:52,743 : INFO : extropies processed
2020-12-19 01:13:52,746 : INFO : token count processed
2020-12-19 01:13:52,747 : INFO : alphabet_source #6957
2020-12-19 01:13:52,749 : INFO : alphabet_target #6957
2020-12-19 01:13:52,749 : INFO : vocab #6957
2020-12-19 01:13:52,751 : INFO : diff #set()
2020-12-19 01:13:53,988 : INFO : alphabet #6957
2020-12-19 01:13:54,633 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ32.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[1.049549722365399, 0.4879120467718604], [0.8265887051820755, 0.1734113], [3.50459807718

2020-12-19 01:14:07,402 : INFO : frequencies processed
2020-12-19 01:14:08,022 : INFO : scalar_distribution processed
2020-12-19 01:14:08,023 : INFO : entropies processed
2020-12-19 01:14:08,024 : INFO : extropies processed
2020-12-19 01:14:08,028 : INFO : token count processed
2020-12-19 01:14:08,030 : INFO : alphabet_source #6957
2020-12-19 01:14:08,033 : INFO : alphabet_target #6957
2020-12-19 01:14:08,034 : INFO : vocab #6957
2020-12-19 01:14:08,036 : INFO : diff #set()
2020-12-19 01:14:09,291 : INFO : alphabet #6957
2020-12-19 01:14:09,905 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ32.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[0.8765048804474623, 0.5329056217330727], [0.8729391545057297, 0.12706085], [3.6284451594312284, 1.3667702644700945], [6.025823465023754, 7.373064697852536, 7.696326051349094, 5.702562111527196, 1.6705025863253402, 0.3232613534965578]]
2020-12-19 01:14:09,908 : INFO : Removed 143 and 2544 OOV words f

2020-12-19 01:14:23,281 : INFO : token count processed
2020-12-19 01:14:23,283 : INFO : alphabet_source #6957
2020-12-19 01:14:23,286 : INFO : alphabet_target #6957
2020-12-19 01:14:23,287 : INFO : vocab #6957
2020-12-19 01:14:23,290 : INFO : diff #set()
2020-12-19 01:14:24,546 : INFO : alphabet #6957
2020-12-19 01:14:25,161 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ32.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[0.7598907639150437, 0.5682170851191953], [0.8053331077098846, 0.19466689], [4.360291307063722, 1.3977079124953073], [6.025823465023754, 8.67532843392421, 8.779658658843605, 5.921493240104359, 2.7538351938198513, 0.10433022491939425]]
2020-12-19 01:14:25,164 : INFO : Removed 143 and 926 OOV words from document 1 and 2 (respectively).
2020-12-19 01:14:25,165 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:14:25,165 : INFO : built Dictionary(28 unique tokens: ['band', 'base', 'est', 'less', 'm

2020-12-19 01:14:38,507 : INFO : diff #set()
2020-12-19 01:14:39,756 : INFO : alphabet #6957
2020-12-19 01:14:40,372 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ32.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[0.8482933720832134, 0.5410396504711257], [0.8481515645980835, 0.15184844], [4.369564431856576, 1.3988361830455718], [6.025823465023754, 7.9055766874171, 8.052589664721186, 5.878810487719669, 2.026766199697432, 0.1470129773040858]]
2020-12-19 01:14:40,375 : INFO : Removed 143 and 2291 OOV words from document 1 and 2 (respectively).
2020-12-19 01:14:40,376 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:14:40,377 : INFO : built Dictionary(29 unique tokens: ['band', 'base', 'est', 'less', 'may']...) from 2 documents (total 164 corpus positions)
2020-12-19 01:14:40,393 : INFO : token count processed
2020-12-19 01:14:40,399 : INFO : frequencies processed
2020-12-19 01:14:41,013 : INFO : scalar_distrib

2020-12-19 01:14:55,452 : INFO : Removed 41 and 1444 OOV words from document 1 and 2 (respectively).
2020-12-19 01:14:55,453 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:14:55,454 : INFO : built Dictionary(32 unique tokens: ['also', 'base', 'est', 'see', 'use']...) from 2 documents (total 157 corpus positions)
2020-12-19 01:14:55,469 : INFO : token count processed
2020-12-19 01:14:55,477 : INFO : frequencies processed
2020-12-19 01:14:56,194 : INFO : scalar_distribution processed
2020-12-19 01:14:56,195 : INFO : entropies processed
2020-12-19 01:14:56,196 : INFO : extropies processed
2020-12-19 01:14:56,201 : INFO : token count processed
2020-12-19 01:14:56,203 : INFO : alphabet_source #6957
2020-12-19 01:14:56,205 : INFO : alphabet_target #6957
2020-12-19 01:14:56,206 : INFO : vocab #6957
2020-12-19 01:14:56,210 : INFO : diff #set()
2020-12-19 01:14:57,468 : INFO : alphabet #6957
2020-12-19 01:14:58,081 : INFO : Computed distances or similarities ('test

2020-12-19 01:15:10,667 : INFO : token count processed
2020-12-19 01:15:10,672 : INFO : frequencies processed
2020-12-19 01:15:11,288 : INFO : scalar_distribution processed
2020-12-19 01:15:11,289 : INFO : entropies processed
2020-12-19 01:15:11,290 : INFO : extropies processed
2020-12-19 01:15:11,295 : INFO : token count processed
2020-12-19 01:15:11,297 : INFO : alphabet_source #6957
2020-12-19 01:15:11,300 : INFO : alphabet_target #6957
2020-12-19 01:15:11,301 : INFO : vocab #6957
2020-12-19 01:15:11,306 : INFO : diff #set()
2020-12-19 01:15:12,542 : INFO : alphabet #6957
2020-12-19 01:15:13,160 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ8.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[0.9737392151708933, 0.506652546756749], [0.820156991481781, 0.17984301], [3.1841837197791887, 1.3432870681129108], [4.664438794627344, 8.081846397171304, 8.126201726181472, 4.620083465617176, 3.4617629315541283, 0.04435532901016792]]
2020-12-19 

2020-12-19 01:15:26,519 : INFO : extropies processed
2020-12-19 01:15:26,521 : INFO : token count processed
2020-12-19 01:15:26,523 : INFO : alphabet_source #6957
2020-12-19 01:15:26,524 : INFO : alphabet_target #6957
2020-12-19 01:15:26,525 : INFO : vocab #6957
2020-12-19 01:15:26,526 : INFO : diff #set()
2020-12-19 01:15:27,762 : INFO : alphabet #6957
2020-12-19 01:15:28,374 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ8.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[1.0513408778914453, 0.4874860198895324], [0.8631306886672974, 0.13686931], [3.3794705707972517, 1.3537725264351061], [4.664438794627344, 7.3308860974230985, 7.389038989378379, 4.606285902672063, 2.7246001947510354, 0.05815289195528095]]
2020-12-19 01:15:28,378 : INFO : Removed 41 and 2276 OOV words from document 1 and 2 (respectively).
2020-12-19 01:15:28,378 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:15:28,380 : INFO : built Diction

2020-12-19 01:15:41,772 : INFO : vocab #6957
2020-12-19 01:15:41,774 : INFO : diff #set()
2020-12-19 01:15:43,017 : INFO : alphabet #6957
2020-12-19 01:15:43,634 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ14.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.9838491078159681, 0.5040705949158131], [0.7840054035186768, 0.2159946], [4.101681489107106, 1.3817417127226523], [6.066809241973625, 8.158196888137685, 8.271135450796873, 5.953870679314438, 2.2043262088232476, 0.11293856265918834]]
2020-12-19 01:15:43,637 : INFO : Removed 128 and 1854 OOV words from document 1 and 2 (respectively).
2020-12-19 01:15:43,638 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:15:43,639 : INFO : built Dictionary(34 unique tokens: ['abil', 'also', 'com', 'est', 'form']...) from 2 documents (total 282 corpus positions)
2020-12-19 01:15:43,652 : INFO : token count processed
2020-12-19 01:15:43,657 : INFO : frequencies processed

2020-12-19 01:15:58,866 : INFO : Removed 128 and 1285 OOV words from document 1 and 2 (respectively).
2020-12-19 01:15:58,867 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:15:58,869 : INFO : built Dictionary(34 unique tokens: ['abil', 'also', 'com', 'est', 'form']...) from 2 documents (total 208 corpus positions)
2020-12-19 01:15:58,889 : INFO : token count processed
2020-12-19 01:15:58,897 : INFO : frequencies processed
2020-12-19 01:15:59,517 : INFO : scalar_distribution processed
2020-12-19 01:15:59,518 : INFO : entropies processed
2020-12-19 01:15:59,519 : INFO : extropies processed
2020-12-19 01:15:59,521 : INFO : token count processed
2020-12-19 01:15:59,522 : INFO : alphabet_source #6957
2020-12-19 01:15:59,524 : INFO : alphabet_target #6957
2020-12-19 01:15:59,525 : INFO : vocab #6957
2020-12-19 01:15:59,527 : INFO : diff #set()
2020-12-19 01:16:00,798 : INFO : alphabet #6957
2020-12-19 01:16:01,415 : INFO : Computed distances or similarities ('te

2020-12-19 01:16:14,176 : INFO : built Dictionary(43 unique tokens: ['abil', 'also', 'com', 'est', 'form']...) from 2 documents (total 287 corpus positions)
2020-12-19 01:16:14,193 : INFO : token count processed
2020-12-19 01:16:14,201 : INFO : frequencies processed
2020-12-19 01:16:14,820 : INFO : scalar_distribution processed
2020-12-19 01:16:14,821 : INFO : entropies processed
2020-12-19 01:16:14,821 : INFO : extropies processed
2020-12-19 01:16:14,824 : INFO : token count processed
2020-12-19 01:16:14,825 : INFO : alphabet_source #6957
2020-12-19 01:16:14,827 : INFO : alphabet_target #6957
2020-12-19 01:16:14,827 : INFO : vocab #6957
2020-12-19 01:16:14,831 : INFO : diff #set()
2020-12-19 01:16:16,083 : INFO : alphabet #6957
2020-12-19 01:16:16,700 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ14.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[0.9378589589707252, 0.5160334271856091], [0.8091970384120941, 0.19080296], [4.3471938760

2020-12-19 01:16:29,461 : INFO : frequencies processed
2020-12-19 01:16:30,078 : INFO : scalar_distribution processed
2020-12-19 01:16:30,079 : INFO : entropies processed
2020-12-19 01:16:30,080 : INFO : extropies processed
2020-12-19 01:16:30,082 : INFO : token count processed
2020-12-19 01:16:30,083 : INFO : alphabet_source #6957
2020-12-19 01:16:30,084 : INFO : alphabet_target #6957
2020-12-19 01:16:30,085 : INFO : vocab #6957
2020-12-19 01:16:30,086 : INFO : diff #set()
2020-12-19 01:16:31,323 : INFO : alphabet #6957
2020-12-19 01:16:31,938 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ45.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[1.1128535074954076, 0.4732935797264087], [0.8800009712576866, 0.11999903], [4.0121987351738495, 1.387214080391114], [4.683084591928053, 8.626714101844048, 8.65732852980782, 4.652470163964281, 3.974243937879767, 0.030614427963772783]]
2020-12-19 01:16:31,941 : INFO : Removed 76 and 1487 OOV words fro

2020-12-19 01:16:45,193 : INFO : token count processed
2020-12-19 01:16:45,196 : INFO : alphabet_source #6957
2020-12-19 01:16:45,199 : INFO : alphabet_target #6957
2020-12-19 01:16:45,200 : INFO : vocab #6957
2020-12-19 01:16:45,203 : INFO : diff #set()
2020-12-19 01:16:46,558 : INFO : alphabet #6957
2020-12-19 01:16:47,174 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ45.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.1821608098693968, 0.458261368950096], [0.9665709882974625, 0.03342901], [4.060262039120378, 1.395003835128278], [4.683084591928053, 7.127980212598745, 7.226687673727431, 4.584377130799366, 2.543603081799378, 0.09870746112868645]]
2020-12-19 01:16:47,177 : INFO : Removed 76 and 774 OOV words from document 1 and 2 (respectively).
2020-12-19 01:16:47,177 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:16:47,178 : INFO : built Dictionary(19 unique tokens: ['bound', 'key', 'type', 'use', 'char

2020-12-19 01:17:00,447 : INFO : diff #set()
2020-12-19 01:17:01,683 : INFO : alphabet #6957
2020-12-19 01:17:02,298 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ45.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[1.1335912270793094, 0.46869334074311336], [0.8437434434890747, 0.15625656], [3.6275370974071204, 1.3264024830804215], [4.683084591928053, 9.006525301292756, 9.022939196216697, 4.666670697004111, 4.339854604288644, 0.016413894923941186]]
2020-12-19 01:17:02,302 : INFO : Removed 76 and 1726 OOV words from document 1 and 2 (respectively).
2020-12-19 01:17:02,303 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:17:02,304 : INFO : built Dictionary(36 unique tokens: ['bound', 'key', 'type', 'use', '200']...) from 2 documents (total 171 corpus positions)
2020-12-19 01:17:02,323 : INFO : token count processed
2020-12-19 01:17:02,331 : INFO : frequencies processed
2020-12-19 01:17:02,945 : INFO : scalar_d

2020-12-19 01:17:17,545 : INFO : Removed 76 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 01:17:17,546 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:17:17,547 : INFO : built Dictionary(41 unique tokens: ['bound', 'key', 'type', 'use', '200']...) from 2 documents (total 273 corpus positions)
2020-12-19 01:17:17,555 : INFO : token count processed
2020-12-19 01:17:17,561 : INFO : frequencies processed
2020-12-19 01:17:18,178 : INFO : scalar_distribution processed
2020-12-19 01:17:18,179 : INFO : entropies processed
2020-12-19 01:17:18,180 : INFO : extropies processed
2020-12-19 01:17:18,183 : INFO : token count processed
2020-12-19 01:17:18,184 : INFO : alphabet_source #6957
2020-12-19 01:17:18,185 : INFO : alphabet_target #6957
2020-12-19 01:17:18,186 : INFO : vocab #6957
2020-12-19 01:17:18,188 : INFO : diff #set()
2020-12-19 01:17:19,431 : INFO : alphabet #6957
2020-12-19 01:17:20,048 : INFO : Computed distances or similarities ('tes

2020-12-19 01:17:32,763 : INFO : built Dictionary(38 unique tokens: ['base', 'est', 'gal', 'get', 'isl']...) from 2 documents (total 344 corpus positions)
2020-12-19 01:17:32,786 : INFO : token count processed
2020-12-19 01:17:32,796 : INFO : frequencies processed
2020-12-19 01:17:33,413 : INFO : scalar_distribution processed
2020-12-19 01:17:33,414 : INFO : entropies processed
2020-12-19 01:17:33,415 : INFO : extropies processed
2020-12-19 01:17:33,418 : INFO : token count processed
2020-12-19 01:17:33,419 : INFO : alphabet_source #6957
2020-12-19 01:17:33,420 : INFO : alphabet_target #6957
2020-12-19 01:17:33,421 : INFO : vocab #6957
2020-12-19 01:17:33,423 : INFO : diff #set()
2020-12-19 01:17:34,666 : INFO : alphabet #6957
2020-12-19 01:17:35,284 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ55.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[1.1077727326548044, 0.4744344513558962], [0.9403850473463535, 0.059614953], [4.20986712190

2020-12-19 01:17:48,024 : INFO : frequencies processed
2020-12-19 01:17:48,653 : INFO : scalar_distribution processed
2020-12-19 01:17:48,654 : INFO : entropies processed
2020-12-19 01:17:48,654 : INFO : extropies processed
2020-12-19 01:17:48,657 : INFO : token count processed
2020-12-19 01:17:48,659 : INFO : alphabet_source #6957
2020-12-19 01:17:48,660 : INFO : alphabet_target #6957
2020-12-19 01:17:48,661 : INFO : vocab #6957
2020-12-19 01:17:48,662 : INFO : diff #set()
2020-12-19 01:17:49,910 : INFO : alphabet #6957
2020-12-19 01:17:50,526 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ55.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[1.1138128334847135, 0.47307878169679574], [0.9633768275380135, 0.036623172], [4.180451390892101, 1.3969771950782797], [9.099470562192366, 7.183148764136712, 8.237653322340881, 8.044966003988195, -0.8618172398514847, 1.0545045582041688]]
2020-12-19 01:17:50,529 : INFO : Removed 629 and 2059 OOV words

2020-12-19 01:18:03,908 : INFO : token count processed
2020-12-19 01:18:03,910 : INFO : alphabet_source #6957
2020-12-19 01:18:03,911 : INFO : alphabet_target #6957
2020-12-19 01:18:03,912 : INFO : vocab #6957
2020-12-19 01:18:03,914 : INFO : diff #set()
2020-12-19 01:18:05,153 : INFO : alphabet #6957
2020-12-19 01:18:05,769 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ55.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[1.112735942380179, 0.4733199165786018], [0.9256853386759758, 0.07431466], [4.5358485029514135, 1.4083674081821018], [9.099470562192366, 8.610843107887472, 9.702513261976033, 8.007800408103805, 0.6030426997836678, 1.0916701540885612]]
2020-12-19 01:18:05,772 : INFO : Removed 629 and 1850 OOV words from document 1 and 2 (respectively).
2020-12-19 01:18:05,773 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:18:05,774 : INFO : built Dictionary(42 unique tokens: ['base', 'est', 'gal', 'get', 'i

2020-12-19 01:18:19,178 : INFO : diff #set()
2020-12-19 01:18:20,417 : INFO : alphabet #6957
2020-12-19 01:18:21,033 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ39.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[1.0424353563090394, 0.48961157909405617], [0.8067083656787872, 0.19329163], [3.378783493486176, 1.3660934553878117], [4.095795255000934, 7.2878904389256105, 7.310841284229886, 4.072844409696659, 3.2150460292289518, 0.02295084530427527]]
2020-12-19 01:18:21,036 : INFO : Removed 18 and 3144 OOV words from document 1 and 2 (respectively).
2020-12-19 01:18:21,037 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:18:21,038 : INFO : built Dictionary(34 unique tokens: ['est', 'oper', 'use', 'cat', 'char']...) from 2 documents (total 389 corpus positions)
2020-12-19 01:18:21,054 : INFO : token count processed
2020-12-19 01:18:21,061 : INFO : frequencies processed
2020-12-19 01:18:21,674 : INFO : scalar_di

2020-12-19 01:18:36,103 : INFO : Removed 18 and 2544 OOV words from document 1 and 2 (respectively).
2020-12-19 01:18:36,104 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:18:36,105 : INFO : built Dictionary(36 unique tokens: ['est', 'oper', 'use', '200', '://']...) from 2 documents (total 286 corpus positions)
2020-12-19 01:18:36,122 : INFO : token count processed
2020-12-19 01:18:36,127 : INFO : frequencies processed
2020-12-19 01:18:36,865 : INFO : scalar_distribution processed
2020-12-19 01:18:36,866 : INFO : entropies processed
2020-12-19 01:18:36,867 : INFO : extropies processed
2020-12-19 01:18:36,870 : INFO : token count processed
2020-12-19 01:18:36,871 : INFO : alphabet_source #6957
2020-12-19 01:18:36,873 : INFO : alphabet_target #6957
2020-12-19 01:18:36,874 : INFO : vocab #6957
2020-12-19 01:18:36,875 : INFO : diff #set()
2020-12-19 01:18:38,119 : INFO : alphabet #6957
2020-12-19 01:18:38,737 : INFO : Computed distances or similarities ('test_

2020-12-19 01:18:51,297 : INFO : token count processed
2020-12-19 01:18:51,309 : INFO : frequencies processed
2020-12-19 01:18:51,923 : INFO : scalar_distribution processed
2020-12-19 01:18:51,924 : INFO : entropies processed
2020-12-19 01:18:51,925 : INFO : extropies processed
2020-12-19 01:18:51,929 : INFO : token count processed
2020-12-19 01:18:51,932 : INFO : alphabet_source #6957
2020-12-19 01:18:51,934 : INFO : alphabet_target #6957
2020-12-19 01:18:51,935 : INFO : vocab #6957
2020-12-19 01:18:51,938 : INFO : diff #set()
2020-12-19 01:18:53,186 : INFO : alphabet #6957
2020-12-19 01:18:53,798 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ39.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[1.0743059559833381, 0.4820889594977533], [0.862654447555542, 0.13734555], [3.238901256602631, 1.3579502728384498], [4.095795255000934, 7.345352443618291, 7.382857524956949, 4.058290173662277, 3.2870622699560146, 0.03750508133865793]]
2020-12-19

2020-12-19 01:19:07,161 : INFO : extropies processed
2020-12-19 01:19:07,164 : INFO : token count processed
2020-12-19 01:19:07,165 : INFO : alphabet_source #6957
2020-12-19 01:19:07,166 : INFO : alphabet_target #6957
2020-12-19 01:19:07,167 : INFO : vocab #6957
2020-12-19 01:19:07,168 : INFO : diff #set()
2020-12-19 01:19:08,406 : INFO : alphabet #6957
2020-12-19 01:19:09,019 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ39.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[1.0343553507711207, 0.4915562070416807], [0.9548215009272099, 0.0451785], [2.9219280948873623, 1.3359016564230495], [4.095795255000934, 7.102818334145153, 7.136116076330508, 4.062497512815581, 3.0403208213295736, 0.033297742185354195]]
2020-12-19 01:19:09,022 : INFO : Removed 65 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 01:19:09,023 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:19:09,024 : INFO : built Dictiona

2020-12-19 01:19:22,388 : INFO : vocab #6957
2020-12-19 01:19:22,391 : INFO : diff #set()
2020-12-19 01:19:23,640 : INFO : alphabet #6957
2020-12-19 01:19:24,255 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ37.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[0.8506824765249652, 0.5403412053037345], [0.8780169412493706, 0.12198306], [4.185867302411998, 1.3907284515103375], [5.07391321234758, 7.587625711074752, 7.674675174980269, 4.986863748442062, 2.6007619626326894, 0.08704946390551704]]
2020-12-19 01:19:24,259 : INFO : Removed 65 and 2503 OOV words from document 1 and 2 (respectively).
2020-12-19 01:19:24,260 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:19:24,260 : INFO : built Dictionary(28 unique tokens: ['est', 'field', 'ident', 'key', 'may']...) from 2 documents (total 186 corpus positions)
2020-12-19 01:19:24,275 : INFO : token count processed
2020-12-19 01:19:24,283 : INFO : frequencies processe

2020-12-19 01:19:39,533 : INFO : Removed 65 and 2642 OOV words from document 1 and 2 (respectively).
2020-12-19 01:19:39,534 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:19:39,536 : INFO : built Dictionary(51 unique tokens: ['est', 'field', 'ident', 'key', 'may']...) from 2 documents (total 313 corpus positions)
2020-12-19 01:19:39,564 : INFO : token count processed
2020-12-19 01:19:39,569 : INFO : frequencies processed
2020-12-19 01:19:40,189 : INFO : scalar_distribution processed
2020-12-19 01:19:40,189 : INFO : entropies processed
2020-12-19 01:19:40,190 : INFO : extropies processed
2020-12-19 01:19:40,193 : INFO : token count processed
2020-12-19 01:19:40,194 : INFO : alphabet_source #6957
2020-12-19 01:19:40,195 : INFO : alphabet_target #6957
2020-12-19 01:19:40,196 : INFO : vocab #6957
2020-12-19 01:19:40,198 : INFO : diff #set()
2020-12-19 01:19:41,441 : INFO : alphabet #6957
2020-12-19 01:19:42,055 : INFO : Computed distances or similarities ('te

2020-12-19 01:19:54,815 : INFO : built Dictionary(46 unique tokens: ['est', 'field', 'ident', 'key', 'may']...) from 2 documents (total 197 corpus positions)
2020-12-19 01:19:54,832 : INFO : token count processed
2020-12-19 01:19:54,838 : INFO : frequencies processed
2020-12-19 01:19:55,497 : INFO : scalar_distribution processed
2020-12-19 01:19:55,498 : INFO : entropies processed
2020-12-19 01:19:55,499 : INFO : extropies processed
2020-12-19 01:19:55,502 : INFO : token count processed
2020-12-19 01:19:55,503 : INFO : alphabet_source #6957
2020-12-19 01:19:55,504 : INFO : alphabet_target #6957
2020-12-19 01:19:55,505 : INFO : vocab #6957
2020-12-19 01:19:55,507 : INFO : diff #set()
2020-12-19 01:19:56,742 : INFO : alphabet #6957
2020-12-19 01:19:57,356 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ37.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.7251575896865401, 0.5796571895682291], [0.7971751838922501, 0.20282482], [4.21812882

2020-12-19 01:20:10,091 : INFO : frequencies processed
2020-12-19 01:20:10,709 : INFO : scalar_distribution processed
2020-12-19 01:20:10,710 : INFO : entropies processed
2020-12-19 01:20:10,710 : INFO : extropies processed
2020-12-19 01:20:10,713 : INFO : token count processed
2020-12-19 01:20:10,715 : INFO : alphabet_source #6957
2020-12-19 01:20:10,718 : INFO : alphabet_target #6957
2020-12-19 01:20:10,719 : INFO : vocab #6957
2020-12-19 01:20:10,722 : INFO : diff #set()
2020-12-19 01:20:11,958 : INFO : alphabet #6957
2020-12-19 01:20:12,572 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ20.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[0.9167148667527749, 0.5217260101363755], [0.8686621636152267, 0.13133784], [3.4473384506127904, 1.3552826691071784], [5.450611875554945, 6.985394788721831, 7.13744395566391, 5.298562708612867, 1.6868320801089647, 0.1520491669420787]]
2020-12-19 01:20:12,576 : INFO : Removed 98 and 2273 OOV words fr

2020-12-19 01:20:25,763 : INFO : token count processed
2020-12-19 01:20:25,765 : INFO : alphabet_source #6957
2020-12-19 01:20:25,766 : INFO : alphabet_target #6957
2020-12-19 01:20:25,767 : INFO : vocab #6957
2020-12-19 01:20:25,769 : INFO : diff #set()
2020-12-19 01:20:27,113 : INFO : alphabet #6957
2020-12-19 01:20:27,728 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ20.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[0.9366646485145858, 0.5163516568379539], [0.7881727069616318, 0.2118273], [3.706353711597133, 1.3728099894952972], [5.450611875554945, 7.278774046648788, 7.426103295031825, 5.303282627171908, 1.9754914194768798, 0.14732924838303685]]
2020-12-19 01:20:27,731 : INFO : Removed 98 and 2288 OOV words from document 1 and 2 (respectively).
2020-12-19 01:20:27,732 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:20:27,733 : INFO : built Dictionary(39 unique tokens: ['also', 'est', 'key', 'less', 'o

2020-12-19 01:20:41,053 : INFO : diff #set()
2020-12-19 01:20:42,301 : INFO : alphabet #6957
2020-12-19 01:20:42,918 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ20.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[0.8015538249054068, 0.5550763935973473], [0.7165583372116089, 0.28344166], [4.323617976393434, 1.39246149740346], [5.450611875554945, 7.643402829602536, 7.71748405829763, 5.376530646859851, 2.2668721827426843, 0.07408122869509359]]
2020-12-19 01:20:42,921 : INFO : Removed 98 and 954 OOV words from document 1 and 2 (respectively).
2020-12-19 01:20:42,922 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:20:42,922 : INFO : built Dictionary(35 unique tokens: ['also', 'est', 'key', 'less', 'order']...) from 2 documents (total 98 corpus positions)
2020-12-19 01:20:42,934 : INFO : token count processed
2020-12-19 01:20:42,947 : INFO : frequencies processed
2020-12-19 01:20:43,572 : INFO : scalar_distribu

2020-12-19 01:20:58,127 : INFO : Removed 56 and 1487 OOV words from document 1 and 2 (respectively).
2020-12-19 01:20:58,128 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:20:58,129 : INFO : built Dictionary(32 unique tokens: ['band', 'base', 'est', 'less', 'text']...) from 2 documents (total 236 corpus positions)
2020-12-19 01:20:58,146 : INFO : token count processed
2020-12-19 01:20:58,154 : INFO : frequencies processed
2020-12-19 01:20:58,773 : INFO : scalar_distribution processed
2020-12-19 01:20:58,774 : INFO : entropies processed
2020-12-19 01:20:58,775 : INFO : extropies processed
2020-12-19 01:20:58,777 : INFO : token count processed
2020-12-19 01:20:58,779 : INFO : alphabet_source #6957
2020-12-19 01:20:58,780 : INFO : alphabet_target #6957
2020-12-19 01:20:58,781 : INFO : vocab #6957
2020-12-19 01:20:58,783 : INFO : diff #set()
2020-12-19 01:21:00,019 : INFO : alphabet #6957
2020-12-19 01:21:00,631 : INFO : Computed distances or similarities ('te

2020-12-19 01:21:13,325 : INFO : built Dictionary(21 unique tokens: ['band', 'base', 'est', 'less', 'text']...) from 2 documents (total 72 corpus positions)
2020-12-19 01:21:13,331 : INFO : token count processed
2020-12-19 01:21:13,336 : INFO : frequencies processed
2020-12-19 01:21:13,953 : INFO : scalar_distribution processed
2020-12-19 01:21:13,954 : INFO : entropies processed
2020-12-19 01:21:13,955 : INFO : extropies processed
2020-12-19 01:21:13,957 : INFO : token count processed
2020-12-19 01:21:13,958 : INFO : alphabet_source #6957
2020-12-19 01:21:13,959 : INFO : alphabet_target #6957
2020-12-19 01:21:13,960 : INFO : vocab #6957
2020-12-19 01:21:13,962 : INFO : diff #set()
2020-12-19 01:21:15,202 : INFO : alphabet #6957
2020-12-19 01:21:15,815 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ5.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[0.9839572099533341, 0.5040431290468819], [0.8754306063055992, 0.12456939], [2.65425531321

2020-12-19 01:21:28,590 : INFO : frequencies processed
2020-12-19 01:21:29,222 : INFO : scalar_distribution processed
2020-12-19 01:21:29,223 : INFO : entropies processed
2020-12-19 01:21:29,224 : INFO : extropies processed
2020-12-19 01:21:29,226 : INFO : token count processed
2020-12-19 01:21:29,227 : INFO : alphabet_source #6957
2020-12-19 01:21:29,229 : INFO : alphabet_target #6957
2020-12-19 01:21:29,230 : INFO : vocab #6957
2020-12-19 01:21:29,231 : INFO : diff #set()
2020-12-19 01:21:30,475 : INFO : alphabet #6957
2020-12-19 01:21:31,091 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ5.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[1.057624130876405, 0.4859974107973109], [0.7803869247436523, 0.21961308], [3.5325732575026194, 1.3643077018128904], [5.021276127914053, 8.67532843392421, 8.714809478825028, 4.981795083013235, 3.6935333509109753, 0.03948104490081761]]
2020-12-19 01:21:31,094 : INFO : Removed 56 and 926 OOV words from 

2020-12-19 01:21:44,446 : INFO : token count processed
2020-12-19 01:21:44,447 : INFO : alphabet_source #6957
2020-12-19 01:21:44,449 : INFO : alphabet_target #6957
2020-12-19 01:21:44,450 : INFO : vocab #6957
2020-12-19 01:21:44,451 : INFO : diff #set()
2020-12-19 01:21:45,690 : INFO : alphabet #6957
2020-12-19 01:21:46,305 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ5.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[1.1351111884761012, 0.46835968327894567], [0.8441605865955353, 0.15583941], [3.8109283062792865, 1.3774151943746957], [5.021276127914053, 7.9055766874171, 7.961902540867959, 4.9649502744631935, 2.9406264129539057, 0.05632585345085861]]
2020-12-19 01:21:46,308 : INFO : Removed 56 and 2291 OOV words from document 1 and 2 (respectively).
2020-12-19 01:21:46,309 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:21:46,310 : INFO : built Dictionary(27 unique tokens: ['band', 'base', 'est', 'less', '

2020-12-19 01:21:59,659 : INFO : diff #set()
2020-12-19 01:22:00,903 : INFO : alphabet #6957
2020-12-19 01:22:01,516 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ9.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[0.9522127663513854, 0.5122392483217716], [0.8095400631427765, 0.19045994], [3.0581674217758743, 1.325947148064168], [4.461157253504293, 6.792406615896576, 6.851241053302209, 4.4023228160986605, 2.3900837997979156, 0.058834437405632656]]
2020-12-19 01:22:01,519 : INFO : Removed 58 and 1444 OOV words from document 1 and 2 (respectively).
2020-12-19 01:22:01,520 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:22:01,522 : INFO : built Dictionary(31 unique tokens: ['est', 'name', 'present', 'use', '200']...) from 2 documents (total 160 corpus positions)
2020-12-19 01:22:01,536 : INFO : token count processed
2020-12-19 01:22:01,544 : INFO : frequencies processed
2020-12-19 01:22:02,160 : INFO : scalar_d

2020-12-19 01:22:16,655 : INFO : Removed 58 and 2059 OOV words from document 1 and 2 (respectively).
2020-12-19 01:22:16,656 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:22:16,658 : INFO : built Dictionary(46 unique tokens: ['est', 'name', 'present', 'use', '200']...) from 2 documents (total 338 corpus positions)
2020-12-19 01:22:16,677 : INFO : token count processed
2020-12-19 01:22:16,687 : INFO : frequencies processed
2020-12-19 01:22:17,412 : INFO : scalar_distribution processed
2020-12-19 01:22:17,413 : INFO : entropies processed
2020-12-19 01:22:17,414 : INFO : extropies processed
2020-12-19 01:22:17,417 : INFO : token count processed
2020-12-19 01:22:17,418 : INFO : alphabet_source #6957
2020-12-19 01:22:17,419 : INFO : alphabet_target #6957
2020-12-19 01:22:17,420 : INFO : vocab #6957
2020-12-19 01:22:17,422 : INFO : diff #set()
2020-12-19 01:22:18,664 : INFO : alphabet #6957
2020-12-19 01:22:19,280 : INFO : Computed distances or similarities ('t

2020-12-19 01:22:31,923 : INFO : built Dictionary(33 unique tokens: ['est', 'name', 'present', 'use', 'act']...) from 2 documents (total 313 corpus positions)
2020-12-19 01:22:31,934 : INFO : token count processed
2020-12-19 01:22:31,945 : INFO : frequencies processed
2020-12-19 01:22:32,565 : INFO : scalar_distribution processed
2020-12-19 01:22:32,566 : INFO : entropies processed
2020-12-19 01:22:32,567 : INFO : extropies processed
2020-12-19 01:22:32,572 : INFO : token count processed
2020-12-19 01:22:32,574 : INFO : alphabet_source #6957
2020-12-19 01:22:32,577 : INFO : alphabet_target #6957
2020-12-19 01:22:32,578 : INFO : vocab #6957
2020-12-19 01:22:32,581 : INFO : diff #set()
2020-12-19 01:22:33,837 : INFO : alphabet #6957
2020-12-19 01:22:34,452 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ9.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[0.995797672331663, 0.5010527940097824], [0.8267272263765335, 0.17327277], [3.432416014

2020-12-19 01:22:47,182 : INFO : frequencies processed
2020-12-19 01:22:47,803 : INFO : scalar_distribution processed
2020-12-19 01:22:47,804 : INFO : entropies processed
2020-12-19 01:22:47,805 : INFO : extropies processed
2020-12-19 01:22:47,808 : INFO : token count processed
2020-12-19 01:22:47,809 : INFO : alphabet_source #6957
2020-12-19 01:22:47,811 : INFO : alphabet_target #6957
2020-12-19 01:22:47,812 : INFO : vocab #6957
2020-12-19 01:22:47,813 : INFO : diff #set()
2020-12-19 01:22:49,044 : INFO : alphabet #6957
2020-12-19 01:22:49,658 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ57.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.9070224646102009, 0.5243776717671765], [0.7988414615392685, 0.20115854], [5.094828784190263, 1.4167278864593638], [8.149185990038742, 8.158196888137685, 8.583213660841128, 7.724169217335298, 0.43402767080238647, 0.42501677270344373]]
2020-12-19 01:22:49,662 : INFO : Removed 406 and 1854 OOV words 

2020-12-19 01:23:03,035 : INFO : token count processed
2020-12-19 01:23:03,036 : INFO : alphabet_source #6957
2020-12-19 01:23:03,038 : INFO : alphabet_target #6957
2020-12-19 01:23:03,038 : INFO : vocab #6957
2020-12-19 01:23:03,040 : INFO : diff #set()
2020-12-19 01:23:04,278 : INFO : alphabet #6957
2020-12-19 01:23:04,908 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ57.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[0.7830820140722417, 0.5608266990008934], [0.7947631478309631, 0.20523685], [5.294363898971057, 1.4189113269999412], [8.149185990038742, 7.749661971001125, 8.262823106279141, 7.636024854760725, 0.1136371162403993, 0.5131611352780157]]
2020-12-19 01:23:04,911 : INFO : Removed 406 and 1285 OOV words from document 1 and 2 (respectively).
2020-12-19 01:23:04,912 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:23:04,913 : INFO : built Dictionary(37 unique tokens: ['200', 'band', 'base', 'dan', 'e

2020-12-19 01:23:18,318 : INFO : diff #set()
2020-12-19 01:23:19,573 : INFO : alphabet #6957
2020-12-19 01:23:20,186 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ57.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[1.0587708650372347, 0.48572671052536687], [0.8782370164990425, 0.12176298], [4.003103797225432, 1.3871060048624124], [8.149185990038742, 7.345352443618291, 8.352713831612732, 7.1418246020443, 0.2035278415739903, 1.0073613879944414]]
2020-12-19 01:23:20,190 : INFO : Removed 406 and 1986 OOV words from document 1 and 2 (respectively).
2020-12-19 01:23:20,191 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:23:20,192 : INFO : built Dictionary(43 unique tokens: ['200', 'band', 'base', 'dan', 'est']...) from 2 documents (total 282 corpus positions)
2020-12-19 01:23:20,213 : INFO : token count processed
2020-12-19 01:23:20,223 : INFO : frequencies processed
2020-12-19 01:23:20,841 : INFO : scalar_distr

2020-12-19 01:23:35,490 : INFO : Removed 85 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 01:23:35,491 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:23:35,492 : INFO : built Dictionary(40 unique tokens: ['base', 'des', 'est', 'may', 'order']...) from 2 documents (total 176 corpus positions)
2020-12-19 01:23:35,502 : INFO : token count processed
2020-12-19 01:23:35,508 : INFO : frequencies processed
2020-12-19 01:23:36,125 : INFO : scalar_distribution processed
2020-12-19 01:23:36,126 : INFO : entropies processed
2020-12-19 01:23:36,127 : INFO : extropies processed
2020-12-19 01:23:36,129 : INFO : token count processed
2020-12-19 01:23:36,131 : INFO : alphabet_source #6957
2020-12-19 01:23:36,132 : INFO : alphabet_target #6957
2020-12-19 01:23:36,133 : INFO : vocab #6957
2020-12-19 01:23:36,134 : INFO : diff #set()
2020-12-19 01:23:37,376 : INFO : alphabet #6957
2020-12-19 01:23:37,995 : INFO : Computed distances or similarities ('tes

2020-12-19 01:23:50,695 : INFO : built Dictionary(29 unique tokens: ['base', 'des', 'est', 'may', 'order']...) from 2 documents (total 183 corpus positions)
2020-12-19 01:23:50,707 : INFO : token count processed
2020-12-19 01:23:50,717 : INFO : frequencies processed
2020-12-19 01:23:51,340 : INFO : scalar_distribution processed
2020-12-19 01:23:51,340 : INFO : entropies processed
2020-12-19 01:23:51,341 : INFO : extropies processed
2020-12-19 01:23:51,344 : INFO : token count processed
2020-12-19 01:23:51,345 : INFO : alphabet_source #6957
2020-12-19 01:23:51,347 : INFO : alphabet_target #6957
2020-12-19 01:23:51,348 : INFO : vocab #6957
2020-12-19 01:23:51,349 : INFO : diff #set()
2020-12-19 01:23:52,594 : INFO : alphabet #6957
2020-12-19 01:23:53,209 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ21.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.0608608798797992, 0.48523411248328685], [0.9492822773754597, 0.050717723], [3.49658496

2020-12-19 01:24:05,817 : INFO : frequencies processed
2020-12-19 01:24:06,443 : INFO : scalar_distribution processed
2020-12-19 01:24:06,444 : INFO : entropies processed
2020-12-19 01:24:06,445 : INFO : extropies processed
2020-12-19 01:24:06,448 : INFO : token count processed
2020-12-19 01:24:06,449 : INFO : alphabet_source #6957
2020-12-19 01:24:06,450 : INFO : alphabet_target #6957
2020-12-19 01:24:06,451 : INFO : vocab #6957
2020-12-19 01:24:06,453 : INFO : diff #set()
2020-12-19 01:24:07,803 : INFO : alphabet #6957
2020-12-19 01:24:08,420 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ21.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[0.9749707751520859, 0.5063366063849696], [0.7418237328529358, 0.25817627], [4.319607332957947, 1.398527116786536], [5.286246651284378, 9.006525301292756, 9.03034506642592, 5.262426886151214, 3.7440984151415417, 0.02381976513316353]]
2020-12-19 01:24:08,423 : INFO : Removed 85 and 1726 OOV words fro

2020-12-19 01:24:21,693 : INFO : token count processed
2020-12-19 01:24:21,694 : INFO : alphabet_source #6957
2020-12-19 01:24:21,696 : INFO : alphabet_target #6957
2020-12-19 01:24:21,696 : INFO : vocab #6957
2020-12-19 01:24:21,698 : INFO : diff #set()
2020-12-19 01:24:22,936 : INFO : alphabet #6957
2020-12-19 01:24:23,552 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ21.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.8627614405501386, 0.5368373954018852], [0.7648104578256607, 0.23518954], [4.071271150031998, 1.3876286341510182], [5.286246651284378, 9.228957519717383, 9.255161895912131, 5.26004227508963, 3.9689152446277536, 0.026204376194748136]]
2020-12-19 01:24:23,555 : INFO : Removed 85 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 01:24:23,556 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:24:23,558 : INFO : built Dictionary(44 unique tokens: ['base', 'des', 'est', 'may', 'o

2020-12-19 01:24:36,903 : INFO : diff #set()
2020-12-19 01:24:38,137 : INFO : alphabet #6957
2020-12-19 01:24:38,748 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ22.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[0.9720142691131071, 0.5070957222078011], [0.8513474017381668, 0.1486526], [3.6435525361765446, 1.3588926826227992], [5.496902511499114, 6.985394788721831, 7.164122595713904, 5.318174704507042, 1.6672200842147902, 0.17872780699207258]]
2020-12-19 01:24:38,752 : INFO : Removed 132 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 01:24:38,753 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:24:38,754 : INFO : built Dictionary(37 unique tokens: ['base', 'des', 'est', 'general', 'ident']...) from 2 documents (total 361 corpus positions)
2020-12-19 01:24:38,774 : INFO : token count processed
2020-12-19 01:24:38,783 : INFO : frequencies processed
2020-12-19 01:24:39,409 : INFO : scala

2020-12-19 01:24:54,013 : INFO : Removed 132 and 2288 OOV words from document 1 and 2 (respectively).
2020-12-19 01:24:54,014 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:24:54,016 : INFO : built Dictionary(41 unique tokens: ['base', 'des', 'est', 'general', 'ident']...) from 2 documents (total 390 corpus positions)
2020-12-19 01:24:54,036 : INFO : token count processed
2020-12-19 01:24:54,042 : INFO : frequencies processed
2020-12-19 01:24:54,661 : INFO : scalar_distribution processed
2020-12-19 01:24:54,662 : INFO : entropies processed
2020-12-19 01:24:54,663 : INFO : extropies processed
2020-12-19 01:24:54,666 : INFO : token count processed
2020-12-19 01:24:54,667 : INFO : alphabet_source #6957
2020-12-19 01:24:54,668 : INFO : alphabet_target #6957
2020-12-19 01:24:54,669 : INFO : vocab #6957
2020-12-19 01:24:54,671 : INFO : diff #set()
2020-12-19 01:24:55,918 : INFO : alphabet #6957
2020-12-19 01:24:56,536 : INFO : Computed distances or similarities 

2020-12-19 01:25:09,276 : INFO : built Dictionary(41 unique tokens: ['base', 'des', 'est', 'general', 'ident']...) from 2 documents (total 112 corpus positions)
2020-12-19 01:25:09,294 : INFO : token count processed
2020-12-19 01:25:09,301 : INFO : frequencies processed
2020-12-19 01:25:09,915 : INFO : scalar_distribution processed
2020-12-19 01:25:09,916 : INFO : entropies processed
2020-12-19 01:25:09,917 : INFO : extropies processed
2020-12-19 01:25:09,921 : INFO : token count processed
2020-12-19 01:25:09,923 : INFO : alphabet_source #6957
2020-12-19 01:25:09,925 : INFO : alphabet_target #6957
2020-12-19 01:25:09,926 : INFO : vocab #6957
2020-12-19 01:25:09,929 : INFO : diff #set()
2020-12-19 01:25:11,178 : INFO : alphabet #6957
2020-12-19 01:25:11,794 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ22.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[0.8604571686308722, 0.5375022961350459], [0.7977630347013474, 0.20223697], [3.97450

2020-12-19 01:25:24,556 : INFO : frequencies processed
2020-12-19 01:25:25,186 : INFO : scalar_distribution processed
2020-12-19 01:25:25,187 : INFO : entropies processed
2020-12-19 01:25:25,188 : INFO : extropies processed
2020-12-19 01:25:25,193 : INFO : token count processed
2020-12-19 01:25:25,195 : INFO : alphabet_source #6957
2020-12-19 01:25:25,197 : INFO : alphabet_target #6957
2020-12-19 01:25:25,198 : INFO : vocab #6957
2020-12-19 01:25:25,201 : INFO : diff #set()
2020-12-19 01:25:26,447 : INFO : alphabet #6957
2020-12-19 01:25:27,062 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ2.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[0.9212589961393286, 0.5204920325731454], [0.8298606872558594, 0.17013931], [3.567900861710723, 1.3616082912853056], [6.096497730583769, 7.2878904389256105, 7.554176621959542, 5.830211547549837, 1.4576788913757728, 0.26628618303393115]]
2020-12-19 01:25:27,066 : INFO : Removed 165 and 3144 OOV words 

2020-12-19 01:25:40,441 : INFO : token count processed
2020-12-19 01:25:40,443 : INFO : alphabet_source #6957
2020-12-19 01:25:40,444 : INFO : alphabet_target #6957
2020-12-19 01:25:40,445 : INFO : vocab #6957
2020-12-19 01:25:40,447 : INFO : diff #set()
2020-12-19 01:25:41,693 : INFO : alphabet #6957
2020-12-19 01:25:42,307 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ2.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[0.8423455366993046, 0.5427863449500209], [0.8537268340587616, 0.14627317], [3.3790296758677485, 1.347648369689904], [6.096497730583769, 7.373064697852536, 7.7274295530745665, 5.742132875361738, 1.6309318224907976, 0.35436485522203043]]
2020-12-19 01:25:42,310 : INFO : Removed 165 and 2544 OOV words from document 1 and 2 (respectively).
2020-12-19 01:25:42,311 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:25:42,313 : INFO : built Dictionary(42 unique tokens: ['est', 'key', 'less', 'may', 'o

2020-12-19 01:25:55,578 : INFO : diff #set()
2020-12-19 01:25:56,852 : INFO : alphabet #6957
2020-12-19 01:25:57,468 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ2.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[0.7608668206954333, 0.5679021197100311], [0.7550807595252991, 0.24491924], [3.5980469811573, 1.3609003862234579], [6.096497730583769, 8.67532843392421, 8.800431337198564, 5.971394827309416, 2.703933606614795, 0.12510290327435314]]
2020-12-19 01:25:57,470 : INFO : Removed 165 and 926 OOV words from document 1 and 2 (respectively).
2020-12-19 01:25:57,471 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:25:57,473 : INFO : built Dictionary(29 unique tokens: ['est', 'key', 'less', 'may', 'order']...) from 2 documents (total 195 corpus positions)
2020-12-19 01:25:57,488 : INFO : token count processed
2020-12-19 01:25:57,494 : INFO : frequencies processed
2020-12-19 01:25:58,217 : INFO : scalar_distribut

2020-12-19 01:26:12,750 : INFO : Removed 165 and 2291 OOV words from document 1 and 2 (respectively).
2020-12-19 01:26:12,751 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:26:12,752 : INFO : built Dictionary(30 unique tokens: ['est', 'key', 'less', 'may', 'order']...) from 2 documents (total 178 corpus positions)
2020-12-19 01:26:12,773 : INFO : token count processed
2020-12-19 01:26:12,783 : INFO : frequencies processed
2020-12-19 01:26:13,406 : INFO : scalar_distribution processed
2020-12-19 01:26:13,407 : INFO : entropies processed
2020-12-19 01:26:13,408 : INFO : extropies processed
2020-12-19 01:26:13,410 : INFO : token count processed
2020-12-19 01:26:13,412 : INFO : alphabet_source #6957
2020-12-19 01:26:13,413 : INFO : alphabet_target #6957
2020-12-19 01:26:13,414 : INFO : vocab #6957
2020-12-19 01:26:13,416 : INFO : diff #set()
2020-12-19 01:26:14,654 : INFO : alphabet #6957
2020-12-19 01:26:15,270 : INFO : Computed distances or similarities ('te

2020-12-19 01:26:28,040 : INFO : built Dictionary(42 unique tokens: ['also', 'base', 'end', 'est', 'ident']...) from 2 documents (total 240 corpus positions)
2020-12-19 01:26:28,067 : INFO : token count processed
2020-12-19 01:26:28,073 : INFO : frequencies processed
2020-12-19 01:26:28,709 : INFO : scalar_distribution processed
2020-12-19 01:26:28,709 : INFO : entropies processed
2020-12-19 01:26:28,710 : INFO : extropies processed
2020-12-19 01:26:28,715 : INFO : token count processed
2020-12-19 01:26:28,717 : INFO : alphabet_source #6957
2020-12-19 01:26:28,719 : INFO : alphabet_target #6957
2020-12-19 01:26:28,720 : INFO : vocab #6957
2020-12-19 01:26:28,723 : INFO : diff #set()
2020-12-19 01:26:29,960 : INFO : alphabet #6957
2020-12-19 01:26:30,572 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ13.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[0.8671429725901207, 0.5355776256452334], [0.8058711290359497, 0.19412887], [4.29829095

2020-12-19 01:26:43,365 : INFO : frequencies processed
2020-12-19 01:26:43,979 : INFO : scalar_distribution processed
2020-12-19 01:26:43,980 : INFO : entropies processed
2020-12-19 01:26:43,981 : INFO : extropies processed
2020-12-19 01:26:43,983 : INFO : token count processed
2020-12-19 01:26:43,985 : INFO : alphabet_source #6957
2020-12-19 01:26:43,986 : INFO : alphabet_target #6957
2020-12-19 01:26:43,987 : INFO : vocab #6957
2020-12-19 01:26:43,988 : INFO : diff #set()
2020-12-19 01:26:45,227 : INFO : alphabet #6957
2020-12-19 01:26:45,845 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ13.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[0.8566672340086229, 0.538599476353637], [0.6937166750431061, 0.30628332], [4.264943926387714, 1.3741255278402023], [6.226193694373053, 8.081846397171304, 8.261746259156586, 6.04629383238777, 2.0355525647835337, 0.17989986198528207]]
2020-12-19 01:26:45,848 : INFO : Removed 355 and 2642 OOV words fr

2020-12-19 01:26:59,256 : INFO : extropies processed
2020-12-19 01:26:59,258 : INFO : token count processed
2020-12-19 01:26:59,260 : INFO : alphabet_source #6957
2020-12-19 01:26:59,262 : INFO : alphabet_target #6957
2020-12-19 01:26:59,263 : INFO : vocab #6957
2020-12-19 01:26:59,265 : INFO : diff #set()
2020-12-19 01:27:00,518 : INFO : alphabet #6957
2020-12-19 01:27:01,133 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ13.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[0.9441780705827416, 0.514356177106896], [0.7902387082576752, 0.20976129], [4.10335564848309, 1.3735076070827388], [6.226193694373053, 7.3308860974230985, 7.65133652909485, 5.905743262701302, 1.4251428347217976, 0.3204504316717518]]
2020-12-19 01:27:01,136 : INFO : Removed 355 and 2276 OOV words from document 1 and 2 (respectively).
2020-12-19 01:27:01,137 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:27:01,138 : INFO : built Dictionary

2020-12-19 01:27:14,526 : INFO : alphabet_target #6957
2020-12-19 01:27:14,527 : INFO : vocab #6957
2020-12-19 01:27:14,529 : INFO : diff #set()
2020-12-19 01:27:15,774 : INFO : alphabet #6957
2020-12-19 01:27:16,391 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ53.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.9047778452482623, 0.5249956064402164], [0.937778927385807, 0.062221073], [4.00182282562223, 1.3937544716796082], [6.152846632892834, 8.158196888137685, 8.297259747755744, 6.013783773274776, 2.1444131148629095, 0.13906285961805942]]
2020-12-19 01:27:16,394 : INFO : Removed 133 and 1854 OOV words from document 1 and 2 (respectively).
2020-12-19 01:27:16,395 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:27:16,396 : INFO : built Dictionary(33 unique tokens: ['://', 'associ', 'class', 'general', 'key']...) from 2 documents (total 260 corpus positions)
2020-12-19 01:27:16,410 : INFO : token count pro

2020-12-19 01:27:31,032 : INFO : alphabet #6957
2020-12-19 01:27:31,647 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ53.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[0.9914917527390008, 0.5021361492582878], [0.9340643882751465, 0.06593561], [3.918295834054489, 1.3855815028066254], [6.152846632892834, 7.749661971001125, 7.922347703693184, 5.980160900200776, 1.7695010708003496, 0.17268573269205856]]
2020-12-19 01:27:31,650 : INFO : Removed 133 and 1285 OOV words from document 1 and 2 (respectively).
2020-12-19 01:27:31,651 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:27:31,653 : INFO : built Dictionary(31 unique tokens: ['://', 'associ', 'class', 'general', 'key']...) from 2 documents (total 186 corpus positions)
2020-12-19 01:27:31,675 : INFO : token count processed
2020-12-19 01:27:31,682 : INFO : frequencies processed
2020-12-19 01:27:32,310 : INFO : scalar_distribution processed
2020-12-19 01:27:32

2020-12-19 01:27:46,763 : INFO : Removed 133 and 1986 OOV words from document 1 and 2 (respectively).
2020-12-19 01:27:46,764 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:27:46,765 : INFO : built Dictionary(42 unique tokens: ['://', 'associ', 'class', 'general', 'key']...) from 2 documents (total 265 corpus positions)
2020-12-19 01:27:46,783 : INFO : token count processed
2020-12-19 01:27:46,793 : INFO : frequencies processed
2020-12-19 01:27:47,418 : INFO : scalar_distribution processed
2020-12-19 01:27:47,419 : INFO : entropies processed
2020-12-19 01:27:47,419 : INFO : extropies processed
2020-12-19 01:27:47,422 : INFO : token count processed
2020-12-19 01:27:47,423 : INFO : alphabet_source #6957
2020-12-19 01:27:47,425 : INFO : alphabet_target #6957
2020-12-19 01:27:47,425 : INFO : vocab #6957
2020-12-19 01:27:47,427 : INFO : diff #set()
2020-12-19 01:27:48,773 : INFO : alphabet #6957
2020-12-19 01:27:49,390 : INFO : Computed distances or similaritie

2020-12-19 01:28:02,019 : INFO : built Dictionary(36 unique tokens: ['est', 'less', 'see', 'use', '200']...) from 2 documents (total 168 corpus positions)
2020-12-19 01:28:02,035 : INFO : token count processed
2020-12-19 01:28:02,045 : INFO : frequencies processed
2020-12-19 01:28:02,664 : INFO : scalar_distribution processed
2020-12-19 01:28:02,665 : INFO : entropies processed
2020-12-19 01:28:02,666 : INFO : extropies processed
2020-12-19 01:28:02,668 : INFO : token count processed
2020-12-19 01:28:02,670 : INFO : alphabet_source #6957
2020-12-19 01:28:02,671 : INFO : alphabet_target #6957
2020-12-19 01:28:02,671 : INFO : vocab #6957
2020-12-19 01:28:02,673 : INFO : diff #set()
2020-12-19 01:28:03,908 : INFO : alphabet #6957
2020-12-19 01:28:04,524 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[0.9344941274616877, 0.5169310083727844], [0.7939793765544891, 0.20602062], [3.0849625007211

2020-12-19 01:28:17,291 : INFO : frequencies processed
2020-12-19 01:28:17,907 : INFO : scalar_distribution processed
2020-12-19 01:28:17,908 : INFO : entropies processed
2020-12-19 01:28:17,909 : INFO : extropies processed
2020-12-19 01:28:17,914 : INFO : token count processed
2020-12-19 01:28:17,916 : INFO : alphabet_source #6957
2020-12-19 01:28:17,918 : INFO : alphabet_target #6957
2020-12-19 01:28:17,919 : INFO : vocab #6957
2020-12-19 01:28:17,921 : INFO : diff #set()
2020-12-19 01:28:19,161 : INFO : alphabet #6957
2020-12-19 01:28:19,778 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.1025243988568432, 0.4756187374299707], [0.9585761576890945, 0.041423842], [2.75, 1.3226647836567116], [3.7216117239699003, 7.127980212598745, 7.152875774563704, 3.6967161620049414, 3.4312640505938035, 0.0248955619649589]]
2020-12-19 01:28:19,781 : INFO : Removed 13 and 774 OOV words from document 1

2020-12-19 01:28:33,126 : INFO : token count processed
2020-12-19 01:28:33,129 : INFO : alphabet_source #6957
2020-12-19 01:28:33,131 : INFO : alphabet_target #6957
2020-12-19 01:28:33,132 : INFO : vocab #6957
2020-12-19 01:28:33,133 : INFO : diff #set()
2020-12-19 01:28:34,378 : INFO : alphabet #6957
2020-12-19 01:28:34,991 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[1.003364356465116, 0.49916032336947125], [0.796873539686203, 0.20312646], [3.238901256602631, 1.3579502728384498], [3.7216117239699003, 9.006525301292756, 9.009761848186042, 3.7183751770766147, 5.288150124216141, 0.0032365468932855634]]
2020-12-19 01:28:34,995 : INFO : Removed 13 and 1726 OOV words from document 1 and 2 (respectively).
2020-12-19 01:28:34,995 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:28:34,996 : INFO : built Dictionary(37 unique tokens: ['est', 'less', 'see', 'use', '

2020-12-19 01:28:48,352 : INFO : diff #set()
2020-12-19 01:28:49,591 : INFO : alphabet #6957
2020-12-19 01:28:50,206 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.9157659067360481, 0.521984443132581], [0.8090004026889801, 0.1909996], [3.0957952550009336, 1.3487605247277434], [3.7216117239699003, 9.228957519717383, 9.232880673677819, 3.7176885700094644, 5.511268949707919, 0.0039231539604358545]]
2020-12-19 01:28:50,209 : INFO : Removed 13 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 01:28:50,210 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:28:50,212 : INFO : built Dictionary(42 unique tokens: ['est', 'less', 'see', 'use', '200']...) from 2 documents (total 258 corpus positions)
2020-12-19 01:28:50,228 : INFO : token count processed
2020-12-19 01:28:50,238 : INFO : frequencies processed
2020-12-19 01:28:50,860 : INFO : scalar_dist

2020-12-19 01:29:05,491 : INFO : Removed 130 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 01:29:05,491 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:29:05,492 : INFO : built Dictionary(41 unique tokens: ['com', 'dan', 'est', 'iana', 'key']...) from 2 documents (total 363 corpus positions)
2020-12-19 01:29:05,509 : INFO : token count processed
2020-12-19 01:29:05,515 : INFO : frequencies processed
2020-12-19 01:29:06,129 : INFO : scalar_distribution processed
2020-12-19 01:29:06,130 : INFO : entropies processed
2020-12-19 01:29:06,131 : INFO : extropies processed
2020-12-19 01:29:06,133 : INFO : token count processed
2020-12-19 01:29:06,135 : INFO : alphabet_source #6957
2020-12-19 01:29:06,136 : INFO : alphabet_target #6957
2020-12-19 01:29:06,137 : INFO : vocab #6957
2020-12-19 01:29:06,139 : INFO : diff #set()
2020-12-19 01:29:07,383 : INFO : alphabet #6957
2020-12-19 01:29:07,998 : INFO : Computed distances or similarities ('test

2020-12-19 01:29:20,728 : INFO : token count processed
2020-12-19 01:29:20,734 : INFO : frequencies processed
2020-12-19 01:29:21,348 : INFO : scalar_distribution processed
2020-12-19 01:29:21,349 : INFO : entropies processed
2020-12-19 01:29:21,350 : INFO : extropies processed
2020-12-19 01:29:21,352 : INFO : token count processed
2020-12-19 01:29:21,354 : INFO : alphabet_source #6957
2020-12-19 01:29:21,355 : INFO : alphabet_target #6957
2020-12-19 01:29:21,356 : INFO : vocab #6957
2020-12-19 01:29:21,357 : INFO : diff #set()
2020-12-19 01:29:22,600 : INFO : alphabet #6957
2020-12-19 01:29:23,214 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ50.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[0.9938357430612501, 0.5015458286772624], [0.8836574256420135, 0.116342574], [4.73418371977919, 1.4124366982189034], [6.329472471796489, 7.183148764136712, 7.394684920436502, 6.117936315496699, 1.065212448640013, 0.21153615629978972]]
2020-12-19 

2020-12-19 01:29:36,515 : INFO : extropies processed
2020-12-19 01:29:36,517 : INFO : token count processed
2020-12-19 01:29:36,518 : INFO : alphabet_source #6957
2020-12-19 01:29:36,519 : INFO : alphabet_target #6957
2020-12-19 01:29:36,520 : INFO : vocab #6957
2020-12-19 01:29:36,522 : INFO : diff #set()
2020-12-19 01:29:37,763 : INFO : alphabet #6957
2020-12-19 01:29:38,378 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ50.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[1.0204712798374012, 0.4949340334500948], [0.8539486229419708, 0.14605138], [4.594672032363179, 1.4096534295013257], [6.329472471796489, 8.610843107887472, 8.788632771278113, 6.151682808405848, 2.459160299481624, 0.17778966339064084]]
2020-12-19 01:29:38,381 : INFO : Removed 130 and 1850 OOV words from document 1 and 2 (respectively).
2020-12-19 01:29:38,382 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:29:38,383 : INFO : built Dictiona

2020-12-19 01:29:51,771 : INFO : vocab #6957
2020-12-19 01:29:51,772 : INFO : diff #set()
2020-12-19 01:29:53,011 : INFO : alphabet #6957
2020-12-19 01:29:53,636 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ28.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[1.0359170315504735, 0.49117915146003743], [0.8622331768274307, 0.13776682], [3.155221528859512, 1.349079551388468], [4.6420846845805, 7.2878904389256105, 7.349094326077514, 4.5808807974285966, 2.707009641497014, 0.061203887151903835]]
2020-12-19 01:29:53,640 : INFO : Removed 33 and 3144 OOV words from document 1 and 2 (respectively).
2020-12-19 01:29:53,640 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:29:53,641 : INFO : built Dictionary(34 unique tokens: ['est', 'ident', 'uri', 'use', 'cat']...) from 2 documents (total 394 corpus positions)
2020-12-19 01:29:53,655 : INFO : token count processed
2020-12-19 01:29:53,662 : INFO : frequencies processed

2020-12-19 01:30:08,936 : INFO : Removed 33 and 2544 OOV words from document 1 and 2 (respectively).
2020-12-19 01:30:08,937 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:30:08,937 : INFO : built Dictionary(37 unique tokens: ['est', 'ident', 'uri', 'use', '200']...) from 2 documents (total 291 corpus positions)
2020-12-19 01:30:08,949 : INFO : token count processed
2020-12-19 01:30:08,955 : INFO : frequencies processed
2020-12-19 01:30:09,586 : INFO : scalar_distribution processed
2020-12-19 01:30:09,587 : INFO : entropies processed
2020-12-19 01:30:09,588 : INFO : extropies processed
2020-12-19 01:30:09,591 : INFO : token count processed
2020-12-19 01:30:09,592 : INFO : alphabet_source #6957
2020-12-19 01:30:09,594 : INFO : alphabet_target #6957
2020-12-19 01:30:09,594 : INFO : vocab #6957
2020-12-19 01:30:09,596 : INFO : diff #set()
2020-12-19 01:30:10,837 : INFO : alphabet #6957
2020-12-19 01:30:11,451 : INFO : Computed distances or similarities ('test

2020-12-19 01:30:24,200 : INFO : token count processed
2020-12-19 01:30:24,206 : INFO : frequencies processed
2020-12-19 01:30:24,821 : INFO : scalar_distribution processed
2020-12-19 01:30:24,822 : INFO : entropies processed
2020-12-19 01:30:24,823 : INFO : extropies processed
2020-12-19 01:30:24,827 : INFO : token count processed
2020-12-19 01:30:24,830 : INFO : alphabet_source #6957
2020-12-19 01:30:24,832 : INFO : alphabet_target #6957
2020-12-19 01:30:24,833 : INFO : vocab #6957
2020-12-19 01:30:24,836 : INFO : diff #set()
2020-12-19 01:30:26,090 : INFO : alphabet #6957
2020-12-19 01:30:26,707 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ28.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[1.0571186696703938, 0.4861168267751063], [0.8713501840829849, 0.12864982], [3.00623892865339, 1.3374329493342525], [4.6420846845805, 7.345352443618291, 7.429494794299984, 4.557942333898808, 2.7874101097194837, 0.0841423506816934]]
2020-12-19 01

2020-12-19 01:30:40,057 : INFO : extropies processed
2020-12-19 01:30:40,059 : INFO : token count processed
2020-12-19 01:30:40,060 : INFO : alphabet_source #6957
2020-12-19 01:30:40,061 : INFO : alphabet_target #6957
2020-12-19 01:30:40,062 : INFO : vocab #6957
2020-12-19 01:30:40,064 : INFO : diff #set()
2020-12-19 01:30:41,301 : INFO : alphabet #6957
2020-12-19 01:30:41,916 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ28.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[1.0220783202444985, 0.4945406861783106], [0.96791185811162, 0.032088142], [2.446439344671016, 1.2856945251022454], [4.6420846845805, 7.102818334145153, 7.171763627633715, 4.57313939109194, 2.5296789430532147, 0.06894529348856171]]
2020-12-19 01:30:41,919 : INFO : Removed 56 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 01:30:41,920 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:30:41,921 : INFO : built Dictionary(39

2020-12-19 01:30:55,287 : INFO : vocab #6957
2020-12-19 01:30:55,289 : INFO : diff #set()
2020-12-19 01:30:56,533 : INFO : alphabet #6957
2020-12-19 01:30:57,146 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ33.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[0.9048358715495772, 0.5249796136958003], [0.8483816087245941, 0.15161839], [3.7820896423727435, 1.3781677709498696], [4.940249894421201, 7.587625711074752, 7.662122769560144, 4.86575283593581, 2.721872875138943, 0.07449705848539168]]
2020-12-19 01:30:57,149 : INFO : Removed 56 and 2503 OOV words from document 1 and 2 (respectively).
2020-12-19 01:30:57,150 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:30:57,151 : INFO : built Dictionary(28 unique tokens: ['base', 'est', 'form', 'get', 'less']...) from 2 documents (total 184 corpus positions)
2020-12-19 01:30:57,166 : INFO : token count processed
2020-12-19 01:30:57,171 : INFO : frequencies processed

2020-12-19 01:31:12,360 : INFO : Removed 56 and 2642 OOV words from document 1 and 2 (respectively).
2020-12-19 01:31:12,361 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:31:12,362 : INFO : built Dictionary(49 unique tokens: ['base', 'est', 'form', 'get', 'less']...) from 2 documents (total 311 corpus positions)
2020-12-19 01:31:12,382 : INFO : token count processed
2020-12-19 01:31:12,388 : INFO : frequencies processed
2020-12-19 01:31:13,000 : INFO : scalar_distribution processed
2020-12-19 01:31:13,001 : INFO : entropies processed
2020-12-19 01:31:13,002 : INFO : extropies processed
2020-12-19 01:31:13,004 : INFO : token count processed
2020-12-19 01:31:13,006 : INFO : alphabet_source #6957
2020-12-19 01:31:13,007 : INFO : alphabet_target #6957
2020-12-19 01:31:13,008 : INFO : vocab #6957
2020-12-19 01:31:13,010 : INFO : diff #set()
2020-12-19 01:31:14,247 : INFO : alphabet #6957
2020-12-19 01:31:14,859 : INFO : Computed distances or similarities ('tes

2020-12-19 01:31:27,436 : INFO : token count processed
2020-12-19 01:31:27,442 : INFO : frequencies processed
2020-12-19 01:31:28,059 : INFO : scalar_distribution processed
2020-12-19 01:31:28,059 : INFO : entropies processed
2020-12-19 01:31:28,060 : INFO : extropies processed
2020-12-19 01:31:28,063 : INFO : token count processed
2020-12-19 01:31:28,064 : INFO : alphabet_source #6957
2020-12-19 01:31:28,066 : INFO : alphabet_target #6957
2020-12-19 01:31:28,066 : INFO : vocab #6957
2020-12-19 01:31:28,068 : INFO : diff #set()
2020-12-19 01:31:29,418 : INFO : alphabet #6957
2020-12-19 01:31:30,036 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ33.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.847303103110365, 0.5413296812614384], [0.7391529083251953, 0.2608471], [3.9040098347359096, 1.3811148630167893], [4.940249894421201, 9.228957519717383, 9.241977353413215, 4.927230060725368, 4.301727458992015, 0.013019833695832261]]
2020-12-19

2020-12-19 01:31:43,296 : INFO : extropies processed
2020-12-19 01:31:43,298 : INFO : token count processed
2020-12-19 01:31:43,300 : INFO : alphabet_source #6957
2020-12-19 01:31:43,301 : INFO : alphabet_target #6957
2020-12-19 01:31:43,302 : INFO : vocab #6957
2020-12-19 01:31:43,304 : INFO : diff #set()
2020-12-19 01:31:44,545 : INFO : alphabet #6957
2020-12-19 01:31:45,161 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ23.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[1.1107258557469892, 0.4737706686433224], [0.9019251316785812, 0.09807487], [3.4681390622295662, 1.3580341801816789], [5.333342934441811, 6.985394788721831, 7.1330392902233335, 5.18569843294031, 1.7996963557815224, 0.1476445015015022]]
2020-12-19 01:31:45,165 : INFO : Removed 86 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 01:31:45,166 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:31:45,167 : INFO : built Dictiona

2020-12-19 01:31:58,477 : INFO : vocab #6957
2020-12-19 01:31:58,480 : INFO : diff #set()
2020-12-19 01:31:59,730 : INFO : alphabet #6957
2020-12-19 01:32:00,339 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ23.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[1.0844802576339214, 0.4797358940377266], [0.8141169250011444, 0.18588307], [3.523896386581805, 1.3583133123257851], [5.333342934441811, 7.278774046648788, 7.425150486545169, 5.18696649454543, 2.091807552103358, 0.14637643989638072]]
2020-12-19 01:32:00,343 : INFO : Removed 86 and 2288 OOV words from document 1 and 2 (respectively).
2020-12-19 01:32:00,344 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:32:00,344 : INFO : built Dictionary(40 unique tokens: ['base', 'less', 'list', 'may', 'name']...) from 2 documents (total 379 corpus positions)
2020-12-19 01:32:00,360 : INFO : token count processed
2020-12-19 01:32:00,368 : INFO : frequencies processed

2020-12-19 01:32:15,617 : INFO : Removed 86 and 954 OOV words from document 1 and 2 (respectively).
2020-12-19 01:32:15,618 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:32:15,619 : INFO : built Dictionary(39 unique tokens: ['base', 'less', 'list', 'may', 'name']...) from 2 documents (total 101 corpus positions)
2020-12-19 01:32:15,642 : INFO : token count processed
2020-12-19 01:32:15,649 : INFO : frequencies processed
2020-12-19 01:32:16,261 : INFO : scalar_distribution processed
2020-12-19 01:32:16,262 : INFO : entropies processed
2020-12-19 01:32:16,263 : INFO : extropies processed
2020-12-19 01:32:16,265 : INFO : token count processed
2020-12-19 01:32:16,266 : INFO : alphabet_source #6957
2020-12-19 01:32:16,268 : INFO : alphabet_target #6957
2020-12-19 01:32:16,268 : INFO : vocab #6957
2020-12-19 01:32:16,270 : INFO : diff #set()
2020-12-19 01:32:17,509 : INFO : alphabet #6957
2020-12-19 01:32:18,125 : INFO : Computed distances or similarities ('tes

2020-12-19 01:32:30,871 : INFO : built Dictionary(29 unique tokens: ['base', 'type', 'use', '://', 'build']...) from 2 documents (total 230 corpus positions)
2020-12-19 01:32:30,881 : INFO : token count processed
2020-12-19 01:32:30,889 : INFO : frequencies processed
2020-12-19 01:32:31,505 : INFO : scalar_distribution processed
2020-12-19 01:32:31,506 : INFO : entropies processed
2020-12-19 01:32:31,507 : INFO : extropies processed
2020-12-19 01:32:31,509 : INFO : token count processed
2020-12-19 01:32:31,511 : INFO : alphabet_source #6957
2020-12-19 01:32:31,512 : INFO : alphabet_target #6957
2020-12-19 01:32:31,513 : INFO : vocab #6957
2020-12-19 01:32:31,515 : INFO : diff #set()
2020-12-19 01:32:32,751 : INFO : alphabet #6957
2020-12-19 01:32:33,375 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[1.1995637954074454, 0.4546355973343164], [0.9556447342038155, 0.044355266], [3.1699250

2020-12-19 01:32:46,126 : INFO : frequencies processed
2020-12-19 01:32:46,748 : INFO : scalar_distribution processed
2020-12-19 01:32:46,748 : INFO : entropies processed
2020-12-19 01:32:46,749 : INFO : extropies processed
2020-12-19 01:32:46,751 : INFO : token count processed
2020-12-19 01:32:46,752 : INFO : alphabet_source #6957
2020-12-19 01:32:46,754 : INFO : alphabet_target #6957
2020-12-19 01:32:46,755 : INFO : vocab #6957
2020-12-19 01:32:46,758 : INFO : diff #set()
2020-12-19 01:32:48,011 : INFO : alphabet #6957
2020-12-19 01:32:48,627 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[1.1464813829504765, 0.4658787203760584], [0.9649099372327328, 0.035090063], [2.584962500721156, 1.315172029168969], [4.723405663154669, 7.373064697852536, 7.500191145860503, 4.596279215146701, 2.7767854827058347, 0.12712644800796724]]
2020-12-19 01:32:48,630 : INFO : Removed 37 and 2544 OOV words fr

2020-12-19 01:33:01,932 : INFO : token count processed
2020-12-19 01:33:01,933 : INFO : alphabet_source #6957
2020-12-19 01:33:01,934 : INFO : alphabet_target #6957
2020-12-19 01:33:01,935 : INFO : vocab #6957
2020-12-19 01:33:01,937 : INFO : diff #set()
2020-12-19 01:33:03,177 : INFO : alphabet #6957
2020-12-19 01:33:03,792 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[1.0282273955934704, 0.4930413631985257], [0.8978729099035263, 0.10212709], [3.375, 1.3527432338288685], [4.723405663154669, 8.67532843392421, 8.713055225877492, 4.685678871201388, 3.989649562722823, 0.03772679195328088]]
2020-12-19 01:33:03,794 : INFO : Removed 37 and 926 OOV words from document 1 and 2 (respectively).
2020-12-19 01:33:03,795 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:33:03,797 : INFO : built Dictionary(25 unique tokens: ['base', 'type', 'use', '"),', 'associ']...) fro

2020-12-19 01:33:17,062 : INFO : diff #set()
2020-12-19 01:33:18,302 : INFO : alphabet #6957
2020-12-19 01:33:18,916 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[0.9912853664554716, 0.5021881930363503], [0.8975667208433151, 0.10243328], [3.521928094887362, 1.3671580312847744], [4.723405663154669, 7.9055766874171, 7.949737807923171, 4.679244542648597, 3.2263321447685023, 0.044161120506070795]]
2020-12-19 01:33:18,919 : INFO : Removed 37 and 2291 OOV words from document 1 and 2 (respectively).
2020-12-19 01:33:18,920 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:33:18,921 : INFO : built Dictionary(24 unique tokens: ['base', 'type', 'use', 'act', 'char']...) from 2 documents (total 152 corpus positions)
2020-12-19 01:33:18,936 : INFO : token count processed
2020-12-19 01:33:18,942 : INFO : frequencies processed
2020-12-19 01:33:19,668 : INFO : scalar_distr

2020-12-19 01:33:34,168 : INFO : Removed 11 and 1444 OOV words from document 1 and 2 (respectively).
2020-12-19 01:33:34,169 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:33:34,170 : INFO : built Dictionary(30 unique tokens: ['est', 'general', '200', '://', 'break']...) from 2 documents (total 148 corpus positions)
2020-12-19 01:33:34,184 : INFO : token count processed
2020-12-19 01:33:34,195 : INFO : frequencies processed
2020-12-19 01:33:34,814 : INFO : scalar_distribution processed
2020-12-19 01:33:34,815 : INFO : entropies processed
2020-12-19 01:33:34,816 : INFO : extropies processed
2020-12-19 01:33:34,820 : INFO : token count processed
2020-12-19 01:33:34,823 : INFO : alphabet_source #6957
2020-12-19 01:33:34,825 : INFO : alphabet_target #6957
2020-12-19 01:33:34,826 : INFO : vocab #6957
2020-12-19 01:33:34,829 : INFO : diff #set()
2020-12-19 01:33:36,074 : INFO : alphabet #6957
2020-12-19 01:33:36,703 : INFO : Computed distances or similarities ('

2020-12-19 01:33:49,414 : INFO : built Dictionary(46 unique tokens: ['est', 'general', '200', '://', 'act']...) from 2 documents (total 326 corpus positions)
2020-12-19 01:33:49,428 : INFO : token count processed
2020-12-19 01:33:49,435 : INFO : frequencies processed
2020-12-19 01:33:50,056 : INFO : scalar_distribution processed
2020-12-19 01:33:50,057 : INFO : entropies processed
2020-12-19 01:33:50,058 : INFO : extropies processed
2020-12-19 01:33:50,064 : INFO : token count processed
2020-12-19 01:33:50,066 : INFO : alphabet_source #6957
2020-12-19 01:33:50,068 : INFO : alphabet_target #6957
2020-12-19 01:33:50,070 : INFO : vocab #6957
2020-12-19 01:33:50,071 : INFO : diff #set()
2020-12-19 01:33:51,327 : INFO : alphabet #6957
2020-12-19 01:33:51,943 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ31.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[1.0566251635087378, 0.4862334749876999], [0.8193776309490204, 0.18062237], [2.50325833

2020-12-19 01:34:04,711 : INFO : frequencies processed
2020-12-19 01:34:05,334 : INFO : scalar_distribution processed
2020-12-19 01:34:05,335 : INFO : entropies processed
2020-12-19 01:34:05,335 : INFO : extropies processed
2020-12-19 01:34:05,338 : INFO : token count processed
2020-12-19 01:34:05,339 : INFO : alphabet_source #6957
2020-12-19 01:34:05,341 : INFO : alphabet_target #6957
2020-12-19 01:34:05,341 : INFO : vocab #6957
2020-12-19 01:34:05,343 : INFO : diff #set()
2020-12-19 01:34:06,601 : INFO : alphabet #6957
2020-12-19 01:34:07,214 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ31.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[1.173777551048725, 0.4600286719851147], [0.908493660390377, 0.09150634], [2.7219280948873625, 1.3198385641318495], [3.378783493486176, 7.3308860974230985, 7.344953740565078, 3.364715850344197, 3.966170247078902, 0.014067643141979502]]
2020-12-19 01:34:07,217 : INFO : Removed 11 and 2276 OOV words f

2020-12-19 01:34:20,610 : INFO : token count processed
2020-12-19 01:34:20,611 : INFO : alphabet_source #6957
2020-12-19 01:34:20,613 : INFO : alphabet_target #6957
2020-12-19 01:34:20,613 : INFO : vocab #6957
2020-12-19 01:34:20,615 : INFO : diff #set()
2020-12-19 01:34:21,852 : INFO : alphabet #6957
2020-12-19 01:34:22,467 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ58.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[0.9252488826245935, 0.5194133646953484], [0.8062765896320343, 0.19372341], [4.5737025916981695, 1.3978654822794003], [9.126024319448563, 8.158196888137685, 8.887915536998777, 8.396305670587472, -0.2381087824497854, 0.7297186488610929]]
2020-12-19 01:34:22,470 : INFO : Removed 639 and 1854 OOV words from document 1 and 2 (respectively).
2020-12-19 01:34:22,471 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:34:22,472 : INFO : built Dictionary(41 unique tokens: ['also', 'aph', 'app', 'base', 

2020-12-19 01:34:35,913 : INFO : diff #set()
2020-12-19 01:34:37,156 : INFO : alphabet #6957
2020-12-19 01:34:37,770 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ58.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[0.9143762684837168, 0.5223633496000505], [0.8444755673408508, 0.15552443], [4.912593481276515, 1.4053394377327564], [9.126024319448563, 7.749661971001125, 8.648012005255914, 8.227674285193775, -0.47801231419264845, 0.898350034254789]]
2020-12-19 01:34:37,773 : INFO : Removed 639 and 1285 OOV words from document 1 and 2 (respectively).
2020-12-19 01:34:37,774 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:34:37,775 : INFO : built Dictionary(40 unique tokens: ['also', 'aph', 'app', 'base', 'est']...) from 2 documents (total 204 corpus positions)
2020-12-19 01:34:37,793 : INFO : token count processed
2020-12-19 01:34:37,802 : INFO : frequencies processed
2020-12-19 01:34:38,414 : INFO : scalar_dist

2020-12-19 01:34:53,091 : INFO : Removed 639 and 1986 OOV words from document 1 and 2 (respectively).
2020-12-19 01:34:53,092 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:34:53,093 : INFO : built Dictionary(46 unique tokens: ['also', 'aph', 'app', 'base', 'est']...) from 2 documents (total 283 corpus positions)
2020-12-19 01:34:53,114 : INFO : token count processed
2020-12-19 01:34:53,127 : INFO : frequencies processed
2020-12-19 01:34:53,760 : INFO : scalar_distribution processed
2020-12-19 01:34:53,761 : INFO : entropies processed
2020-12-19 01:34:53,762 : INFO : extropies processed
2020-12-19 01:34:53,765 : INFO : token count processed
2020-12-19 01:34:53,766 : INFO : alphabet_source #6957
2020-12-19 01:34:53,768 : INFO : alphabet_target #6957
2020-12-19 01:34:53,769 : INFO : vocab #6957
2020-12-19 01:34:53,771 : INFO : diff #set()
2020-12-19 01:34:55,009 : INFO : alphabet #6957
2020-12-19 01:34:55,624 : INFO : Computed distances or similarities ('tes

2020-12-19 01:35:08,342 : INFO : token count processed
2020-12-19 01:35:08,352 : INFO : frequencies processed
2020-12-19 01:35:08,981 : INFO : scalar_distribution processed
2020-12-19 01:35:08,982 : INFO : entropies processed
2020-12-19 01:35:08,983 : INFO : extropies processed
2020-12-19 01:35:08,988 : INFO : token count processed
2020-12-19 01:35:08,990 : INFO : alphabet_source #6957
2020-12-19 01:35:08,992 : INFO : alphabet_target #6957
2020-12-19 01:35:08,993 : INFO : vocab #6957
2020-12-19 01:35:08,997 : INFO : diff #set()
2020-12-19 01:35:10,351 : INFO : alphabet #6957
2020-12-19 01:35:10,966 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ19.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[0.9578465096616913, 0.5107652694249236], [0.8957700654864311, 0.104229935], [4.432294243948856, 1.4044493550643158], [5.209707042510355, 8.626714101844048, 8.687025579982027, 5.1493955643723766, 3.477318537471672, 0.060311478137979435]]
2020-12-

2020-12-19 01:35:24,199 : INFO : extropies processed
2020-12-19 01:35:24,202 : INFO : token count processed
2020-12-19 01:35:24,203 : INFO : alphabet_source #6957
2020-12-19 01:35:24,204 : INFO : alphabet_target #6957
2020-12-19 01:35:24,205 : INFO : vocab #6957
2020-12-19 01:35:24,207 : INFO : diff #set()
2020-12-19 01:35:25,450 : INFO : alphabet #6957
2020-12-19 01:35:26,064 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ19.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[1.035488404507451, 0.4912825824925201], [0.9752770885825157, 0.024722911], [3.9705730958116847, 1.3904984042298727], [5.209707042510355, 7.127980212598745, 7.272145619750927, 5.065541635358172, 2.062438577240572, 0.1441654071521823]]
2020-12-19 01:35:26,067 : INFO : Removed 99 and 774 OOV words from document 1 and 2 (respectively).
2020-12-19 01:35:26,068 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:35:26,070 : INFO : built Dictionary(

2020-12-19 01:35:39,399 : INFO : vocab #6957
2020-12-19 01:35:39,400 : INFO : diff #set()
2020-12-19 01:35:40,641 : INFO : alphabet #6957
2020-12-19 01:35:41,255 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ19.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[0.9497021302613021, 0.5128988600253407], [0.8737755417823792, 0.12622446], [4.361440501696145, 1.3879908422689988], [5.209707042510355, 9.006525301292756, 9.044334696263792, 5.171897647539318, 3.8346276537534374, 0.037809394971036525]]
2020-12-19 01:35:41,258 : INFO : Removed 99 and 1726 OOV words from document 1 and 2 (respectively).
2020-12-19 01:35:41,259 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:35:41,260 : INFO : built Dictionary(38 unique tokens: ['est', 'key', 'oper', 'per', 'side']...) from 2 documents (total 169 corpus positions)
2020-12-19 01:35:41,270 : INFO : token count processed
2020-12-19 01:35:41,276 : INFO : frequencies processe

2020-12-19 01:35:56,559 : INFO : Removed 99 and 2212 OOV words from document 1 and 2 (respectively).
2020-12-19 01:35:56,560 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:35:56,561 : INFO : built Dictionary(43 unique tokens: ['est', 'key', 'oper', 'per', 'side']...) from 2 documents (total 271 corpus positions)
2020-12-19 01:35:56,572 : INFO : token count processed
2020-12-19 01:35:56,578 : INFO : frequencies processed
2020-12-19 01:35:57,202 : INFO : scalar_distribution processed
2020-12-19 01:35:57,203 : INFO : entropies processed
2020-12-19 01:35:57,204 : INFO : extropies processed
2020-12-19 01:35:57,209 : INFO : token count processed
2020-12-19 01:35:57,212 : INFO : alphabet_source #6957
2020-12-19 01:35:57,214 : INFO : alphabet_target #6957
2020-12-19 01:35:57,215 : INFO : vocab #6957
2020-12-19 01:35:57,217 : INFO : diff #set()
2020-12-19 01:35:58,475 : INFO : alphabet #6957
2020-12-19 01:35:59,119 : INFO : Computed distances or similarities ('test

2020-12-19 01:36:11,861 : INFO : token count processed
2020-12-19 01:36:11,868 : INFO : frequencies processed
2020-12-19 01:36:12,481 : INFO : scalar_distribution processed
2020-12-19 01:36:12,482 : INFO : entropies processed
2020-12-19 01:36:12,483 : INFO : extropies processed
2020-12-19 01:36:12,485 : INFO : token count processed
2020-12-19 01:36:12,487 : INFO : alphabet_source #6957
2020-12-19 01:36:12,488 : INFO : alphabet_target #6957
2020-12-19 01:36:12,489 : INFO : vocab #6957
2020-12-19 01:36:12,491 : INFO : diff #set()
2020-12-19 01:36:13,732 : INFO : alphabet #6957
2020-12-19 01:36:14,346 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ1.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[0.8324011003072685, 0.5457320451468369], [0.8529299199581146, 0.14707008], [3.8121795021836387, 1.3708906301233523], [6.091365711073073, 6.792406615896576, 7.02581696052188, 5.857955366447769, 0.9344512494488066, 0.23341034462530352]]
2020-12-19 

2020-12-19 01:36:27,737 : INFO : extropies processed
2020-12-19 01:36:27,743 : INFO : token count processed
2020-12-19 01:36:27,745 : INFO : alphabet_source #6957
2020-12-19 01:36:27,748 : INFO : alphabet_target #6957
2020-12-19 01:36:27,749 : INFO : vocab #6957
2020-12-19 01:36:27,752 : INFO : diff #set()
2020-12-19 01:36:29,013 : INFO : alphabet #6957
2020-12-19 01:36:29,627 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ1.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[0.8949228117119609, 0.5277259811425004], [0.8600528389215469, 0.13994716], [4.465608600009803, 1.3953801415112417], [6.091365711073073, 7.183148764136712, 7.377995083249946, 5.896519391959838, 1.286629372176873, 0.1948463191132337]]
2020-12-19 01:36:29,630 : INFO : Removed 154 and 2059 OOV words from document 1 and 2 (respectively).
2020-12-19 01:36:29,631 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:36:29,632 : INFO : built Dictionary(

2020-12-19 01:36:43,075 : INFO : vocab #6957
2020-12-19 01:36:43,076 : INFO : diff #set()
2020-12-19 01:36:44,318 : INFO : alphabet #6957
2020-12-19 01:36:44,931 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ1.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[0.8128127036659784, 0.5516289675032285], [0.7684215754270554, 0.23157842], [4.212180011265247, 1.384580705730539], [6.091365711073073, 8.610843107887472, 8.731494031148056, 5.97071478781249, 2.640128320074983, 0.12065092326058391]]
2020-12-19 01:36:44,934 : INFO : Removed 154 and 1850 OOV words from document 1 and 2 (respectively).
2020-12-19 01:36:44,935 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:36:44,936 : INFO : built Dictionary(39 unique tokens: ['build', 'est', 'key', 'may', 'new']...) from 2 documents (total 323 corpus positions)
2020-12-19 01:36:44,948 : INFO : token count processed
2020-12-19 01:36:44,953 : INFO : frequencies processed
20

2020-12-19 01:37:00,059 : INFO : Removed 209 and 3144 OOV words from document 1 and 2 (respectively).
2020-12-19 01:37:00,060 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:37:00,060 : INFO : built Dictionary(41 unique tokens: ['also', 'associ', 'end', 'est', 'ident']...) from 2 documents (total 417 corpus positions)
2020-12-19 01:37:00,084 : INFO : token count processed
2020-12-19 01:37:00,089 : INFO : frequencies processed
2020-12-19 01:37:00,829 : INFO : scalar_distribution processed
2020-12-19 01:37:00,830 : INFO : entropies processed
2020-12-19 01:37:00,831 : INFO : extropies processed
2020-12-19 01:37:00,834 : INFO : token count processed
2020-12-19 01:37:00,836 : INFO : alphabet_source #6957
2020-12-19 01:37:00,838 : INFO : alphabet_target #6957
2020-12-19 01:37:00,839 : INFO : vocab #6957
2020-12-19 01:37:00,841 : INFO : diff #set()
2020-12-19 01:37:02,082 : INFO : alphabet #6957
2020-12-19 01:37:02,696 : INFO : Computed distances or similarities (

2020-12-19 01:37:15,319 : INFO : built Dictionary(44 unique tokens: ['also', 'associ', 'end', 'est', 'ident']...) from 2 documents (total 314 corpus positions)
2020-12-19 01:37:15,342 : INFO : token count processed
2020-12-19 01:37:15,352 : INFO : frequencies processed
2020-12-19 01:37:15,970 : INFO : scalar_distribution processed
2020-12-19 01:37:15,971 : INFO : entropies processed
2020-12-19 01:37:15,972 : INFO : extropies processed
2020-12-19 01:37:15,977 : INFO : token count processed
2020-12-19 01:37:15,979 : INFO : alphabet_source #6957
2020-12-19 01:37:15,981 : INFO : alphabet_target #6957
2020-12-19 01:37:15,982 : INFO : vocab #6957
2020-12-19 01:37:15,985 : INFO : diff #set()
2020-12-19 01:37:17,240 : INFO : alphabet #6957
2020-12-19 01:37:17,853 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ38.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[0.9048156927573038, 0.524985175102404], [0.780708909034729, 0.21929109], [5.175670232

2020-12-19 01:37:30,676 : INFO : frequencies processed
2020-12-19 01:37:31,296 : INFO : scalar_distribution processed
2020-12-19 01:37:31,297 : INFO : entropies processed
2020-12-19 01:37:31,298 : INFO : extropies processed
2020-12-19 01:37:31,300 : INFO : token count processed
2020-12-19 01:37:31,301 : INFO : alphabet_source #6957
2020-12-19 01:37:31,303 : INFO : alphabet_target #6957
2020-12-19 01:37:31,303 : INFO : vocab #6957
2020-12-19 01:37:31,305 : INFO : diff #set()
2020-12-19 01:37:32,568 : INFO : alphabet #6957
2020-12-19 01:37:33,184 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ38.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[0.961460149948916, 0.5098242755663651], [0.8250070363283157, 0.17499296], [4.5279892815744125, 1.4022581630628275], [6.503130455594833, 7.345352443618291, 7.734731896748121, 6.113751002465003, 1.2316014411532885, 0.38937945312983047]]
2020-12-19 01:37:33,188 : INFO : Removed 209 and 1986 OOV words 

2020-12-19 01:37:46,624 : INFO : extropies processed
2020-12-19 01:37:46,627 : INFO : token count processed
2020-12-19 01:37:46,628 : INFO : alphabet_source #6957
2020-12-19 01:37:46,630 : INFO : alphabet_target #6957
2020-12-19 01:37:46,631 : INFO : vocab #6957
2020-12-19 01:37:46,633 : INFO : diff #set()
2020-12-19 01:37:47,873 : INFO : alphabet #6957
2020-12-19 01:37:48,487 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ38.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[1.067728482458465, 0.4836224912910379], [0.952585157006979, 0.047414843], [4.423498224896145, 1.3992424607246354], [6.503130455594833, 7.102818334145153, 7.4309008663038645, 6.175047923436122, 0.9277704107090319, 0.3280825321587111]]
2020-12-19 01:37:48,491 : INFO : Removed 455 and 1761 OOV words from document 1 and 2 (respectively).
2020-12-19 01:37:48,492 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:37:48,493 : INFO : built Dictionar

2020-12-19 01:38:01,913 : INFO : vocab #6957
2020-12-19 01:38:01,914 : INFO : diff #set()
2020-12-19 01:38:03,165 : INFO : alphabet #6957
2020-12-19 01:38:03,782 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ52.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[0.9248917321165544, 0.5195097382959972], [0.953954666852951, 0.046045333], [4.318242691024847, 1.3937520438848123], [7.366092813760617, 7.587625711074752, 8.257454207095364, 6.696264317740006, 0.8913613933347468, 0.669828496020612]]
2020-12-19 01:38:03,786 : INFO : Removed 455 and 2503 OOV words from document 1 and 2 (respectively).
2020-12-19 01:38:03,786 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:38:03,787 : INFO : built Dictionary(46 unique tokens: ['://', 'base', 'ber', 'cer', 'der']...) from 2 documents (total 215 corpus positions)
2020-12-19 01:38:03,809 : INFO : token count processed
2020-12-19 01:38:03,818 : INFO : frequencies processed
2

2020-12-19 01:38:19,178 : INFO : Removed 455 and 2642 OOV words from document 1 and 2 (respectively).
2020-12-19 01:38:19,179 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:38:19,180 : INFO : built Dictionary(63 unique tokens: ['://', 'base', 'ber', 'cer', 'der']...) from 2 documents (total 342 corpus positions)
2020-12-19 01:38:19,221 : INFO : token count processed
2020-12-19 01:38:19,231 : INFO : frequencies processed
2020-12-19 01:38:19,847 : INFO : scalar_distribution processed
2020-12-19 01:38:19,848 : INFO : entropies processed
2020-12-19 01:38:19,849 : INFO : extropies processed
2020-12-19 01:38:19,852 : INFO : token count processed
2020-12-19 01:38:19,853 : INFO : alphabet_source #6957
2020-12-19 01:38:19,855 : INFO : alphabet_target #6957
2020-12-19 01:38:19,855 : INFO : vocab #6957
2020-12-19 01:38:19,857 : INFO : diff #set()
2020-12-19 01:38:21,119 : INFO : alphabet #6957
2020-12-19 01:38:21,738 : INFO : Computed distances or similarities ('test

2020-12-19 01:38:34,553 : INFO : token count processed
2020-12-19 01:38:34,559 : INFO : frequencies processed
2020-12-19 01:38:35,173 : INFO : scalar_distribution processed
2020-12-19 01:38:35,174 : INFO : entropies processed
2020-12-19 01:38:35,175 : INFO : extropies processed
2020-12-19 01:38:35,177 : INFO : token count processed
2020-12-19 01:38:35,179 : INFO : alphabet_source #6957
2020-12-19 01:38:35,180 : INFO : alphabet_target #6957
2020-12-19 01:38:35,181 : INFO : vocab #6957
2020-12-19 01:38:35,182 : INFO : diff #set()
2020-12-19 01:38:36,420 : INFO : alphabet #6957
2020-12-19 01:38:37,034 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ52.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[0.8640723963013043, 0.5364598510144787], [0.942912258207798, 0.05708774], [4.663532754804254, 1.4059345172710873], [7.366092813760617, 9.228957519717383, 9.521139417422694, 7.073910916055306, 2.1550466036620763, 0.29218189770531033]]
2020-12-19

2020-12-19 01:38:50,328 : INFO : extropies processed
2020-12-19 01:38:50,332 : INFO : token count processed
2020-12-19 01:38:50,334 : INFO : alphabet_source #6957
2020-12-19 01:38:50,336 : INFO : alphabet_target #6957
2020-12-19 01:38:50,337 : INFO : vocab #6957
2020-12-19 01:38:50,340 : INFO : diff #set()
2020-12-19 01:38:51,702 : INFO : alphabet #6957
2020-12-19 01:38:52,319 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ6.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[1.0099217847917674, 0.49753179828517674], [0.8688268959522247, 0.1311731], [2.7401032600285835, 1.298908549047567], [4.138589126445727, 6.985394788721831, 7.036839862104712, 4.087144053062847, 2.8982507356589844, 0.0514450733828804]]
2020-12-19 01:38:52,323 : INFO : Removed 40 and 2273 OOV words from document 1 and 2 (respectively).
2020-12-19 01:38:52,324 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:38:52,325 : INFO : built Dictionary

2020-12-19 01:39:05,557 : INFO : vocab #6957
2020-12-19 01:39:05,560 : INFO : diff #set()
2020-12-19 01:39:06,809 : INFO : alphabet #6957
2020-12-19 01:39:07,424 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ6.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[1.058379187828052, 0.48581913668451626], [0.7665635943412781, 0.2334364], [2.817856113506615, 1.3155321013041552], [4.138589126445727, 7.278774046648788, 7.321368016846124, 4.0959951562483905, 3.182778890400397, 0.042593970197335906]]
2020-12-19 01:39:07,427 : INFO : Removed 40 and 2288 OOV words from document 1 and 2 (respectively).
2020-12-19 01:39:07,428 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:39:07,429 : INFO : built Dictionary(36 unique tokens: ['est', 'respond', 'use', '200', 'back']...) from 2 documents (total 369 corpus positions)
2020-12-19 01:39:07,441 : INFO : token count processed
2020-12-19 01:39:07,449 : INFO : frequencies process

2020-12-19 01:39:22,650 : INFO : Removed 40 and 954 OOV words from document 1 and 2 (respectively).
2020-12-19 01:39:22,651 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 01:39:22,652 : INFO : built Dictionary(32 unique tokens: ['est', 'respond', 'use', '200', '://']...) from 2 documents (total 91 corpus positions)
2020-12-19 01:39:22,665 : INFO : token count processed
2020-12-19 01:39:22,671 : INFO : frequencies processed
2020-12-19 01:39:23,285 : INFO : scalar_distribution processed
2020-12-19 01:39:23,286 : INFO : entropies processed
2020-12-19 01:39:23,287 : INFO : extropies processed
2020-12-19 01:39:23,291 : INFO : token count processed
2020-12-19 01:39:23,293 : INFO : alphabet_source #6957
2020-12-19 01:39:23,295 : INFO : alphabet_target #6957
2020-12-19 01:39:23,296 : INFO : vocab #6957
2020-12-19 01:39:23,300 : INFO : diff #set()
2020-12-19 01:39:24,560 : INFO : alphabet #6957
2020-12-19 01:39:25,173 : INFO : Computed distances or similarities ('test

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.51824,0.739114,0.260886,4.464656,1.39074,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.28789,7.522528,5.790037,1.497854,0.234638
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.51355,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.38191,6.024675,6.792407,7.049971,5.76711,1.025296,0.257564


In [None]:
word2vec.df_nonground_link.head()

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.51824,0.739114,0.260886,4.464656,1.39074,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.28789,7.522528,5.790037,1.497854,0.234638
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.51355,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.38191,6.024675,6.792407,7.049971,5.76711,1.025296,0.257564


In [None]:
#tst 
#df_mapping = pd.read_csv(parameters['path_mappings'], header = 0, sep = ',')
#ground_links = word2vec.ground_truth_processing(from_mappings='True') #<---- SACP
ground_links = word2vec.ground_truth_processing(path_to_ground_truth) #<---- LIBEST
ground_links

[('RQ4.txt', 'us1864.c'),
 ('RQ4.txt', 'us901.c'),
 ('RQ4.txt', 'us1005.c'),
 ('RQ4.txt', 'us3512.c'),
 ('RQ4.txt', 'us895.c'),
 ('RQ4.txt', 'us897.c'),
 ('RQ4.txt', 'us900.c'),
 ('RQ6.txt', 'us1005.c'),
 ('RQ6.txt', 'us1159.c'),
 ('RQ6.txt', 'us3496.c'),
 ('RQ6.txt', 'us3512.c'),
 ('RQ6.txt', 'us3612.c'),
 ('RQ6.txt', 'us4020.c'),
 ('RQ6.txt', 'us748.c'),
 ('RQ6.txt', 'us893.c'),
 ('RQ6.txt', 'us895.c'),
 ('RQ6.txt', 'us896.c'),
 ('RQ6.txt', 'us897.c'),
 ('RQ6.txt', 'us898.c'),
 ('RQ6.txt', 'us899.c'),
 ('RQ6.txt', 'us900.c'),
 ('RQ8.txt', 'us1005.c'),
 ('RQ8.txt', 'us1159.c'),
 ('RQ8.txt', 'us1883.c'),
 ('RQ8.txt', 'us2174.c'),
 ('RQ8.txt', 'us3496.c'),
 ('RQ8.txt', 'us3512.c'),
 ('RQ8.txt', 'us3612.c'),
 ('RQ8.txt', 'us4020.c'),
 ('RQ8.txt', 'us748.c'),
 ('RQ8.txt', 'us893.c'),
 ('RQ8.txt', 'us895.c'),
 ('RQ8.txt', 'us896.c'),
 ('RQ8.txt', 'us897.c'),
 ('RQ8.txt', 'us898.c'),
 ('RQ8.txt', 'us899.c'),
 ('RQ8.txt', 'us900.c'),
 ('RQ11.txt', 'us1159.c'),
 ('RQ11.txt', 'us1883.c'),
 ('R

In [None]:
len(ground_links)

352

In [None]:
#tst
df_x = word2vec.df_nonground_link
df_x.head()

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.51824,0.739114,0.260886,4.464656,1.39074,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.28789,7.522528,5.790037,1.497854,0.234638
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.51355,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.38191,6.024675,6.792407,7.049971,5.76711,1.025296,0.257564


In [None]:
df_x['Source'].values

array(['test_data/LibEST_semeru_format/requirements/RQ17.txt',
       'test_data/LibEST_semeru_format/requirements/RQ17.txt',
       'test_data/LibEST_semeru_format/requirements/RQ17.txt', ...,
       'test_data/LibEST_semeru_format/requirements/RQ6.txt',
       'test_data/LibEST_semeru_format/requirements/RQ6.txt',
       'test_data/LibEST_semeru_format/requirements/RQ6.txt'],
      dtype=object)

In [None]:
#tst
test_source = 'RQ50.txt'
test_target = 'us893.c'
df_x[( df_x["Source"].str.contains(test_source) ) & (df_x["Target"].str.contains(test_target, regex=False))]

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise
859,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us893.c,0.983646,0.504122,0.849104,0.150896,4.892534,1.415386,6.329472,7.905577,8.075343,6.159706,1.745871,0.169766


In [None]:
ground_links[0][0]

'RQ4.txt'

In [None]:
ground_links[0][1]

'us1864.c'

In [None]:
#tst
df_x[( df_x["Source"].str.contains(ground_links[0][0]) ) & (df_x["Target"].str.contains(ground_links[0][1], regex=False))]

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise
394,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,0.859013,0.53792,0.720722,0.279278,3.347434,1.307874,5.492329,8.610843,8.658847,5.444325,3.166518,0.048003


In [None]:
def find_index_gt( tuple_g ):
    dist = df_x.loc[(df_x["Source"].str.eq(tuple_g[0]) ) & 
                 (df_x["Target"].str.contains(tuple_g[1], regex=False))]
    return dist.index.values
#dist

In [None]:
#Formatted for SACP
matchGT = [ word2vec.findDistInDF( g , from_mappings=True ) for g in word2vec.ground_truth_processing(from_mappings=True)]
matchGT

KeyError: 'id_pr'

In [None]:
matchGT = functools.reduce(lambda a,b : np.concatenate([a,b]), matchGT) #Concatenate indexes
matchGT

In [None]:
new_column = pd.Series(np.full([len(matchGT)], 1 ), name=word2vec.params['names'][2], index = matchGT)

In [None]:
new_column

In [None]:
#Some of the mappings are not found in the non-ling list because the mappings have all the ground truth of the issues
#it might include files not take into account in the non-links part
matchGT_ = [ (g,word2vec.findDistInDF( g , from_mappings=True )) for g in word2vec.ground_truth_processing(from_mappings=True)]

In [None]:
matchGT_

In [None]:
len(matchGT)

In [None]:
#[step 3]Saving Non-GroundTruth Links
word2vec.SaveLinks()

2020-12-19 01:48:50,778 : INFO : Saving in...../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.word2vec-LinkType.req2tc-False-1608342530.750395].csv


In [None]:
#Loading Non-GroundTruth Links (change the timestamp with the assigned in the previous step)
df_nonglinks = LoadLinks(timestamp=1608342530.750395, params=parameters)
df_nonglinks.head()

2020-12-19 01:49:02,982 : INFO : Loading computed links from... ../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.word2vec-LinkType.req2tc-False-1608342530.750395].csv


Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.51824,0.739114,0.260886,4.464656,1.39074,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.28789,7.522528,5.790037,1.497854,0.234638
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.51355,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.38191,6.024675,6.792407,7.049971,5.76711,1.025296,0.257564


In [None]:
#[step 4]GroundTruthMatching Testing
word2vec.MatchWithGroundTruth(path_to_ground_truth, semeru_format=True)
word2vec.df_ground_link

2020-12-19 01:49:09,020 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,024 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,028 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,030 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,033 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,036 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,038 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,041 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,043 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,046 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,049 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,051 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,054 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,057 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,062 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,066 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,069 

2020-12-19 01:49:09,453 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,459 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,464 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,466 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,469 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,472 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,474 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,485 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,487 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,490 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,493 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,498 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,502 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,504 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,507 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,510 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,513 

2020-12-19 01:49:09,895 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,899 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,902 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,905 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,907 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,910 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,913 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,916 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,918 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,921 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,924 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,926 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,930 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,935 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,938 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,941 : INFO : findDistInDF: semeru_format
2020-12-19 01:49:09,944 

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.518240,0.739114,0.260886,4.464656,1.390740,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428,0.0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.287890,7.522528,5.790037,1.497854,0.234638,0.0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552,0.0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.513550,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756,0.0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.381910,6.024675,6.792407,7.049971,5.767110,1.025296,0.257564,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,0.845571,0.541838,0.771328,0.228672,3.082328,1.324060,4.138589,8.610843,8.614900,4.134532,4.476311,0.004057,0.0
1088,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1159.c,1.051327,0.487489,0.842482,0.157518,2.757004,1.314602,4.138589,7.330886,7.369423,4.100052,3.230834,0.038537,1.0
1089,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us2174.c,0.926151,0.519170,0.772877,0.227123,2.939884,1.321355,4.138589,9.228958,9.233328,4.134218,5.094739,0.004371,0.0
1090,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us893.c,0.997642,0.500590,0.826657,0.173343,3.345074,1.351139,4.138589,7.905577,7.929560,4.114606,3.790971,0.023983,1.0


In [None]:
#[step 4.1]GroundTruthMatching Testing For CISCO Mappings <----- Warning SACP
word2vec.MatchWithGroundTruth(from_mappings=True)
word2vec.df_ground_link

In [None]:
df_z = word2vec.df_ground_link
df_z[~df_z.isin([np.nan, np.inf, -np.inf]).any(1)]

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.518240,0.739114,0.260886,4.464656,1.390740,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428,0.0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.287890,7.522528,5.790037,1.497854,0.234638,0.0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552,0.0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.513550,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756,0.0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.381910,6.024675,6.792407,7.049971,5.767110,1.025296,0.257564,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,0.845571,0.541838,0.771328,0.228672,3.082328,1.324060,4.138589,8.610843,8.614900,4.134532,4.476311,0.004057,0.0
1088,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1159.c,1.051327,0.487489,0.842482,0.157518,2.757004,1.314602,4.138589,7.330886,7.369423,4.100052,3.230834,0.038537,1.0
1089,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us2174.c,0.926151,0.519170,0.772877,0.227123,2.939884,1.321355,4.138589,9.228958,9.233328,4.134218,5.094739,0.004371,0.0
1090,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us893.c,0.997642,0.500590,0.826657,0.173343,3.345074,1.351139,4.138589,7.905577,7.929560,4.114606,3.790971,0.023983,1.0


In [None]:
#debug
df_y = word2vec.df_ground_link.copy()
df_y

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.518240,0.739114,0.260886,4.464656,1.390740,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428,0.0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.287890,7.522528,5.790037,1.497854,0.234638,0.0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552,0.0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.513550,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756,0.0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.381910,6.024675,6.792407,7.049971,5.767110,1.025296,0.257564,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,0.845571,0.541838,0.771328,0.228672,3.082328,1.324060,4.138589,8.610843,8.614900,4.134532,4.476311,0.004057,0.0
1088,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1159.c,1.051327,0.487489,0.842482,0.157518,2.757004,1.314602,4.138589,7.330886,7.369423,4.100052,3.230834,0.038537,1.0
1089,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us2174.c,0.926151,0.519170,0.772877,0.227123,2.939884,1.321355,4.138589,9.228958,9.233328,4.134218,5.094739,0.004371,0.0
1090,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us893.c,0.997642,0.500590,0.826657,0.173343,3.345074,1.351139,4.138589,7.905577,7.929560,4.114606,3.790971,0.023983,1.0


In [None]:
#debug
df_y.update(new_column)

In [None]:
new_column

In [None]:
word2vec.df_ground_link[word2vec.df_ground_link['Linked?'] == 1]

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise,Linked?
44,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.819607,0.549569,0.749672,0.250328,4.280310,1.382068,5.622179,8.158197,8.235031,5.545345,2.612852,0.076834,1.0
45,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.942675,0.514754,0.846516,0.153484,3.868299,1.366060,5.622179,6.985395,7.193951,5.413622,1.571772,0.208557,1.0
47,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1060.c,0.855173,0.539033,0.831881,0.168119,4.250213,1.388677,5.622179,7.587626,7.746308,5.463497,2.124129,0.158682,1.0
58,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,0.718124,0.582030,0.784647,0.215353,4.401750,1.392002,5.622179,8.610843,8.618406,5.614617,2.996227,0.007562,1.0
59,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1159.c,0.983724,0.504102,0.824537,0.175463,3.956365,1.368030,5.622179,7.330886,7.501689,5.451376,1.879510,0.170803,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1084,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us748.c,0.871661,0.534285,0.785632,0.214368,3.210587,1.341541,4.138589,8.675328,8.681645,4.132272,4.543056,0.006317,1.0
1085,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3612.c,1.083496,0.479962,0.859709,0.140291,2.812879,1.312220,4.138589,7.345352,7.401173,4.082769,3.262584,0.055821,1.0
1088,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1159.c,1.051327,0.487489,0.842482,0.157518,2.757004,1.314602,4.138589,7.330886,7.369423,4.100052,3.230834,0.038537,1.0
1090,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us893.c,0.997642,0.500590,0.826657,0.173343,3.345074,1.351139,4.138589,7.905577,7.929560,4.114606,3.790971,0.023983,1.0


In [None]:
word2vec.df_ground_link[word2vec.df_ground_link['Linked?'] == 1].shape #Positive Links

In [None]:
#[optional]GroundTruth Direct Processing
ground_links = word2vec.ground_truth_processing(path_to_ground_truth)
ground_links[141] # A tuple

('RQ33.txt', 'us894.c')

In [None]:
#Inspecting Source
ground_links[141][0][:ground_links[141][0].find('.')] + '-'

'RQ33-'

In [None]:
#Inspecting Target
ground_links[141][1][:ground_links[141][1].find('.')]

'us894'

In [None]:
#[step 5]Saving GroundTruth Links
word2vec.SaveLinks(grtruth = True)

2020-12-19 01:49:53,525 : INFO : Saving in...../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.word2vec-LinkType.req2tc-True-1608342593.495168].csv


In [None]:
#Loading Non-GroundTruth Links (change the timestamp with the assigned in the previous step)
df_glinks = LoadLinks(timestamp=1608342593.495168, params=parameters,grtruth = True)
df_glinks.head()

2020-12-19 01:50:04,684 : INFO : Loading computed links from... ../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.word2vec-LinkType.req2tc-True-1608342593.495168].csv


Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.51824,0.739114,0.260886,4.464656,1.39074,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428,0.0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.28789,7.522528,5.790037,1.497854,0.234638,0.0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552,0.0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.51355,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756,0.0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.38191,6.024675,6.792407,7.049971,5.76711,1.025296,0.257564,0.0


In [None]:
df_glinks[df_glinks["Linked?"] == 0]

Unnamed: 0,Source,Target,DistanceMetric.WMD,SimilarityMetric.WMD_sim,DistanceMetric.SCM,SimilarityMetric.SCM_sim,EntropyMetric.MSI_I,EntropyMetric.MSI_X,EntropyMetric.Entropy_src,EntropyMetric.Entropy_tgt,EntropyMetric.JI,EntropyMetric.MI,EntropyMetric.Loss,EntropyMetric.Noise,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,0.929608,0.518240,0.739114,0.260886,4.464656,1.390740,6.024675,8.626714,8.716142,5.935246,2.691468,0.089428,0.0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,0.959776,0.510262,0.697518,0.302482,4.228282,1.385062,6.024675,7.287890,7.522528,5.790037,1.497854,0.234638,0.0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,0.837307,0.544275,0.710308,0.289692,4.634286,1.398938,6.024675,8.158197,8.269749,5.913122,2.245075,0.111552,0.0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,0.947232,0.513550,0.847159,0.152841,4.011289,1.368943,6.024675,6.985395,7.273151,5.736918,1.248476,0.287756,0.0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,0.901434,0.525919,0.773193,0.226807,4.105223,1.381910,6.024675,6.792407,7.049971,5.767110,1.025296,0.257564,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1079,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us894.c,0.908948,0.523849,0.745993,0.254007,3.202828,1.343610,4.138589,7.749662,7.762332,4.125919,3.623743,0.012670,0.0
1083,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1883.c,1.047163,0.488481,0.740980,0.259020,3.115590,1.337921,4.138589,9.006525,9.010840,4.134274,4.872251,0.004315,0.0
1086,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us901.c,0.857670,0.538309,0.705430,0.294570,3.185374,1.340750,4.138589,7.643403,7.657125,4.124867,3.518536,0.013722,0.0
1087,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,0.845571,0.541838,0.771328,0.228672,3.082328,1.324060,4.138589,8.610843,8.614900,4.134532,4.476311,0.004057,0.0


## 3. Artifacts Similarity with Doc2Vec

Try to reproduce the same empirical evaluation like here: [link](https://arxiv.org/pdf/1507.07998.pdf). Pay attention to:
- Accuracy vs. Dimensionality (we can replace accuracy for false positive rate or true positive rate)
- Visualize paragraph vectors using t-sne
- Computing Cosine Distance and Similarity. More about similarity [link](https://www.kdnuggets.com/2017/08/comparing-distance-measurements-python-scipy.html)

In [None]:
#experiment 0.0.1

In [None]:
path_to_trained_model = path_data+'/models/pv/bpe8k/[doc2vec-Py-Java-PVDBOW-500-20E-8k-1594572857.17191].model'

In [None]:
def doc2vec_params():
    return {
        "vectorizationType": VectorizationType.doc2vec,
        "linkType": LinkType.req2tc,
        "system": 'libest',
        "system_path_config": {
            "system_path": path_data + 'se-benchmarking/traceability/cisco/libest_data/[libest-all-corpus-1596063103.098236].csv',
            "sep": '~',
            "names": ['ids','conv'],
            "prep": Preprocessing.conv
        },
        "source_type": SoftwareArtifacts.REQ.value,
        "target_type": SoftwareArtifacts.TC.value,
        "path_to_trained_model": path_to_trained_model,
        "saving_path":  path_data + 'metrics/traceability/experiments0.0.x/',
        "names": ['Source','Target','Linked?'],
        "model_prefix":path_model_prefix, #For BPE Analysis
    }

In [None]:
doc2vec_params = doc2vec_params()
doc2vec_params

{'vectorizationType': <VectorizationType.doc2vec: 2>,
 'linkType': <LinkType.req2tc: 1>,
 'system': 'libest',
 'system_path_config': {'system_path': '../dvc-ds4se/se-benchmarking/traceability/cisco/libest_data/[libest-all-corpus-1596063103.098236].csv',
  'sep': '~',
  'names': ['ids', 'conv'],
  'prep': <Preprocessing.conv: 1>},
 'source_type': 'req',
 'target_type': 'tc',
 'path_to_trained_model': '../dvc-ds4se//models/pv/bpe8k/[doc2vec-Py-Java-PVDBOW-500-20E-8k-1594572857.17191].model',
 'saving_path': '../dvc-ds4se/metrics/traceability/experiments0.0.x/',
 'names': ['Source', 'Target', 'Linked?'],
 'model_prefix': '../dvc-ds4se/models/bpe/sentencepiece/wiki_py_java_bpe_8k'}

In [None]:
#Export
class Doc2VecSeqVect(BasicSequenceVectorization):
    
    def __init__(self, params):
        super().__init__(params)
        self.new_model = gensim.models.Doc2Vec.load( params['path_to_trained_model'] )
        self.new_model.init_sims(replace=True)  # Normalizes the vectors in the word2vec class.
        self.df_inferred_src = None
        self.df_inferred_trg = None
        
        self.dict_distance_dispatcher = {
            DistanceMetric.COS: self.cos_scipy,
            SimilarityMetric.Pearson: self.pearson_abs_scipy,
            DistanceMetric.EUC: self.euclidean_scipy,
            DistanceMetric.MAN: self.manhattan_scipy
        }
    
    def distance(self, metric_list, link):
        '''Iterate on the metrics'''
        ν_inferredSource = self.df_inferred_src[self.df_inferred_src['ids'].str.contains(link[0])]['inf-doc2vec'].values[0]
        w_inferredTarget = self.df_inferred_trg[self.df_inferred_trg['ids'].str.contains(link[1])]['inf-doc2vec'].values[0]
        
        dist = [ self.dict_distance_dispatcher[metric](ν_inferredSource,w_inferredTarget) for metric in metric_list]
        logging.info("Computed distances or similarities "+ str(link) + str(dist))    
        return functools.reduce(lambda a,b : a+b, dist) #Always return a list
    
    def computeDistanceMetric(self, links, metric_list):
        '''It is computed the cosine similarity'''
        
        metric_labels = [ self.dict_labels[metric] for metric in metric_list] #tracking of the labels
        distSim = [[link[0], link[1], self.distance( metric_list, link )] for link in links] #Return the link with metrics
        distSim = [[elem[0], elem[1]] + elem[2] for elem in distSim] #Return the link with metrics
        
        return distSim, functools.reduce(lambda a,b : a+b, metric_labels)

    
    def InferDoc2Vec(self, steps=200):
        '''Activate Inference on Target and Source Corpus'''
        self.df_inferred_src = self.df_source.copy()
        self.df_inferred_trg = self.df_target.copy()
        
        text = self.params['system_path_config']['names'][1]
        self.df_inferred_src['inf-doc2vec'] =  [self.new_model.infer_vector(artifact.split(),steps=steps) for artifact in self.df_inferred_src[text].values]
        self.df_inferred_trg['inf-doc2vec'] =  [self.new_model.infer_vector(artifact.split(),steps=steps) for artifact in self.df_inferred_trg[text].values]
        
        logging.info("Infer Doc2Vec on Source and Target Complete")

### Testing Doc2Vec SequenceVectorization

In [None]:
doc2vec = Doc2VecSeqVect(params = doc2vec_params)

2020-12-19 03:33:52,739 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-12-19 03:33:52,794 : INFO : built Dictionary(6957 unique tokens: ['");', '"../../', '("\\', '();', ')))']...) from 87 documents (total 88944 corpus positions)
2020-12-19 03:33:52,795 : INFO : conventional preprocessing documents, dictionary, and vocab for the test corpus
2020-12-19 03:33:52,796 : INFO : loading Doc2Vec object from ../dvc-ds4se//models/pv/bpe8k/[doc2vec-Py-Java-PVDBOW-500-20E-8k-1594572857.17191].model
2020-12-19 03:33:52,968 : INFO : loading vocabulary recursively from ../dvc-ds4se//models/pv/bpe8k/[doc2vec-Py-Java-PVDBOW-500-20E-8k-1594572857.17191].model.vocabulary.* with mmap=None
2020-12-19 03:33:52,970 : INFO : loading trainables recursively from ../dvc-ds4se//models/pv/bpe8k/[doc2vec-Py-Java-PVDBOW-500-20E-8k-1594572857.17191].model.trainables.* with mmap=None
2020-12-19 03:33:52,971 : INFO : loading syn1neg from ../dvc-ds4se//models/pv/bpe8k/[doc2vec-Py-Java-PVDBOW-500-20

In [None]:
doc2vec.df_source.head(2)

Unnamed: 0,ids,conv
35,test_data/LibEST_semeru_format/requirements/RQ...,requir http uri control est server must suppor...
36,test_data/LibEST_semeru_format/requirements/RQ...,requir server side key generat respons request...


In [None]:
#[step1]Apply Doc2Vec Inference
doc2vec.InferDoc2Vec( steps = 200 )

2020-12-19 03:34:15,393 : INFO : Infer Doc2Vec on Source and Target Complete


In [None]:
doc2vec.df_inferred_src.head(2)

Unnamed: 0,ids,conv,inf-doc2vec
35,test_data/LibEST_semeru_format/requirements/RQ...,requir http uri control est server must suppor...,"[-1.627681, -1.1846577, 0.0467408, -0.09786891..."
36,test_data/LibEST_semeru_format/requirements/RQ...,requir server side key generat respons request...,"[-1.2679539, 0.8652082, -0.66573286, -2.190202..."


In [None]:
len(doc2vec.df_inferred_src['inf-doc2vec'].values[35])

500

In [None]:
len(doc2vec.df_inferred_src['inf-doc2vec'].values[36])

500

In [None]:
#test_inferDoc2Vec_trg = inferDoc2Vec(df_target)
#test_inferDoc2Vec_trg.head()
doc2vec.df_inferred_trg.head(2)

Unnamed: 0,ids,conv,inf-doc2vec
0,test_data/LibEST_semeru_format/test/us903.c,unit test user stori server simpl enrol august...,"[-1.0151138, -1.3879219, -2.6165612, -4.045337..."
1,test_data/LibEST_semeru_format/test/us3496.c,unit test uri path segment extens support marc...,"[-2.4390655, -0.9076792, -1.7120461, -2.499704..."


In [None]:
#tst correlation
pearsonr(doc2vec.df_inferred_trg['inf-doc2vec'][0], doc2vec.df_inferred_trg['inf-doc2vec'][0])

(0.9999999999999999, 0.0)

In [None]:
len(doc2vec.df_inferred_src['inf-doc2vec'])

52

In [None]:
pearsonr(doc2vec.df_inferred_trg['inf-doc2vec'][0], doc2vec.df_inferred_src['inf-doc2vec'][35])

(0.532670673449515, 5.46237708290494e-38)

In [None]:
#[step 2]NonGroundTruth Computation
metric_l = [DistanceMetric.EUC,DistanceMetric.COS,DistanceMetric.MAN, SimilarityMetric.Pearson]
doc2vec.ComputeDistanceArtifacts( sampling=False, samples = 50, metric_list = metric_l )
doc2vec.df_nonground_link.head()

2020-12-19 03:34:43,609 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ17.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[44.1058349609375, 0.022170080675061637], [0.46729451417922974, 0.5327054858207703], [795.84357, 0.0012549514629291826], [0.532670673449515]]
2020-12-19 03:34:43,615 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ17.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[43.145660400390625, 0.02265228316736545], [0.38369375467300415, 0.6163062453269958], [746.00366, 0.001338681522894036], [0.6146666837799339]]
2020-12-19 03:34:43,620 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ17.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[47.40620803833008, 0.020658507256097355], [0.389950692653656, 0.610049307346344], [850.88806, 0.0011738631460707324], [0.6097129258371485]]
2020-12-19 03:34:43,623 : INFO : Computed distances or si

2020-12-19 03:34:43,698 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ46.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[50.58488464355469, 0.01938552362595902], [0.6353503465652466, 0.3646496534347534], [904.527, 0.001104329329555355], [0.3662752322148147]]
2020-12-19 03:34:43,701 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ46.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[45.83034133911133, 0.021353677368241373], [0.6907251179218292, 0.3092748820781708], [820.71185, 0.0012169716139736923], [0.3099119313930767]]
2020-12-19 03:34:43,704 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ46.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[54.74112319946289, 0.017940076241765357], [0.6133263409137726, 0.3866736590862274], [970.46674, 0.001029371323903838], [0.38779708281187525]]
2020-12-19 03:34:43,707 : INFO : Computed distances or simi

2020-12-19 03:34:43,776 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ18.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[53.88861846923828, 0.018218713239438498], [0.4194219708442688, 0.5805780291557312], [960.6216, 0.0010399101046460319], [0.5803437211183244]]
2020-12-19 03:34:43,779 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ18.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[43.54933166503906, 0.02244702586155224], [0.43223053216934204, 0.567769467830658], [771.17303, 0.0012950465182068886], [0.5675710510263299]]
2020-12-19 03:34:43,781 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ18.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[42.812747955322266, 0.022824407202664914], [0.5435371696949005, 0.4564628303050995], [769.4116, 0.0012980074191771722], [0.4566375218074007]]
2020-12-19 03:34:43,784 : INFO : Computed distances or 

2020-12-19 03:34:43,851 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ48.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[48.700164794921875, 0.020120657630136776], [0.47075098752975464, 0.5292490124702454], [877.14, 0.0011387705642822222], [0.5282152014538007]]
2020-12-19 03:34:43,854 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ48.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[50.801265716552734, 0.019304547604528068], [0.4562833905220032, 0.5437166094779968], [909.79285, 0.00109794450367668], [0.5438230430654303]]
2020-12-19 03:34:43,857 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ48.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[47.12606430053711, 0.02077876124993748], [0.5910617709159851, 0.4089382290840149], [852.8504, 0.0011711653431130595], [0.4081211586903623]]
2020-12-19 03:34:43,859 : INFO : Computed distances or si

2020-12-19 03:34:43,930 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ29.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[54.751319885253906, 0.017936795076030077], [0.589832216501236, 0.41016778349876404], [966.4589, 0.0010336356158128332], [0.4095022400851886]]
2020-12-19 03:34:43,933 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ29.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[46.95840835571289, 0.020851400917705355], [0.5977496802806854, 0.4022503197193146], [837.5758, 0.0011924980344598741], [0.4012898968696515]]
2020-12-19 03:34:43,935 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ29.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[50.391754150390625, 0.019458374529766835], [0.585319995880127, 0.41468000411987305], [870.6389, 0.0011472640581160949], [0.41426352687099677]]
2020-12-19 03:34:43,938 : INFO : Computed distances or

2020-12-19 03:34:44,008 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ47.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[38.74907684326172, 0.025157816971276916], [0.6213461756706238, 0.3786538243293762], [678.126, 0.001472480857029874], [0.3782795522267616]]
2020-12-19 03:34:44,012 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ47.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[53.026512145996094, 0.018509431023377845], [0.6037310361862183, 0.39626896381378174], [949.74817, 0.0010518032352450637], [0.39551959413460713]]
2020-12-19 03:34:44,014 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ47.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[46.137474060058594, 0.021214543628830945], [0.5788751840591431, 0.42112481594085693], [834.52625, 0.0011968504949353735], [0.42053440876770176]]
2020-12-19 03:34:44,018 : INFO : Computed distances

2020-12-19 03:34:44,094 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ36.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[45.15864944458008, 0.021664412023160252], [0.48417818546295166, 0.5158218145370483], [810.00867, 0.0012330324455208726], [0.5155688191979837]]
2020-12-19 03:34:44,098 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ36.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[39.79519271850586, 0.02451269214243402], [0.49947410821914673, 0.5005258917808533], [720.6073, 0.0013857952937430971], [0.5014208567574873]]
2020-12-19 03:34:44,102 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ36.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[50.10858154296875, 0.019566185752176776], [0.4664608836174011, 0.5335391163825989], [875.75366, 0.0011405712268074337], [0.5330327937078874]]
2020-12-19 03:34:44,106 : INFO : Computed distances or

2020-12-19 03:34:44,192 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ56.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[43.32960891723633, 0.02255828608519887], [0.47851723432540894, 0.5214827656745911], [786.5718, 0.001269725539648854], [0.5213565923808424]]
2020-12-19 03:34:44,195 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ56.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[41.90338897705078, 0.02330818203044297], [0.46826982498168945, 0.5317301750183105], [771.938, 0.001293764849394527], [0.5316654524025899]]
2020-12-19 03:34:44,198 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ15.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[45.413116455078125, 0.021545633570369493], [0.5211955606937408, 0.47880443930625916], [813.36053, 0.0012279573449709472], [0.47834368546638967]]
2020-12-19 03:34:44,201 : INFO : Computed distances or si

2020-12-19 03:34:44,274 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ35.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[39.276241302490234, 0.02482853334027898], [0.34519344568252563, 0.6548065543174744], [725.55676, 0.0013763549544158027], [0.6545699605357329]]
2020-12-19 03:34:44,279 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ35.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[41.564369201660156, 0.023493828729429324], [0.31360650062561035, 0.6863934993743896], [729.27277, 0.0013693513525395225], [0.6862208016107652]]
2020-12-19 03:34:44,283 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ35.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[39.2136116027832, 0.02486720192848308], [0.4556097388267517, 0.5443902611732483], [710.9227, 0.0014046468227152927], [0.5439887750175953]]
2020-12-19 03:34:44,286 : INFO : Computed distances o

2020-12-19 03:34:44,364 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ51.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[48.2038688659668, 0.0203236050954456], [0.5032305121421814, 0.4967694878578186], [854.51794, 0.001168882554270923], [0.4965133430064853]]
2020-12-19 03:34:44,367 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ51.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[49.597469329833984, 0.019763834303277418], [0.5290161073207855, 0.4709838926792145], [887.1375, 0.0011259517656392974], [0.47114026809934956]]
2020-12-19 03:34:44,370 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ51.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[57.84049606323242, 0.016995098051609876], [0.43848198652267456, 0.5615180134773254], [1033.1694, 0.0009669595402031843], [0.5612820901112828]]
2020-12-19 03:34:44,372 : INFO : Computed distances or 

2020-12-19 03:34:44,448 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[50.155216217041016, 0.019548348613310645], [0.4162748456001282, 0.5837251543998718], [892.3734, 0.0011193527648710156], [0.5832990112293299]]
2020-12-19 03:34:44,453 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[42.716468811035156, 0.02287467462942877], [0.41253262758255005, 0.58746737241745], [761.20245, 0.0013119873798088954], [0.5867945995600699]]
2020-12-19 03:34:44,455 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ26.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[45.04352951049805, 0.021718578280841772], [0.4050264358520508, 0.5949735641479492], [802.672, 0.0012442887193349738], [0.5942640083820547]]
2020-12-19 03:34:44,458 : INFO : Computed distances or s

2020-12-19 03:34:44,536 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ10.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[48.72034454345703, 0.020112491359064715], [0.530127078294754, 0.46987292170524597], [886.04297, 0.0011273411043539146], [0.4693980006590045]]
2020-12-19 03:34:44,538 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ10.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[51.05840301513672, 0.019209194713661035], [0.44156455993652344, 0.5584354400634766], [895.45105, 0.0011155098766607201], [0.5570796749302883]]
2020-12-19 03:34:44,541 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ10.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[56.01335906982422, 0.01753974886438985], [0.4537159204483032, 0.5462840795516968], [1001.3124, 0.0009976929568260308], [0.5455979638755647]]
2020-12-19 03:34:44,589 : INFO : Computed distances or

2020-12-19 03:34:44,662 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[41.40262222290039, 0.023583447144925152], [0.39924198389053345, 0.6007580161094666], [748.4782, 0.0013342616050180094], [0.5997245097649824]]
2020-12-19 03:34:44,666 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[34.167442321777344, 0.028435391770892382], [0.36781156063079834, 0.6321884393692017], [604.1018, 0.001652614467558363], [0.6327423091546336]]
2020-12-19 03:34:44,669 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ34.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[48.77573013305664, 0.020090112135510162], [0.45901936292648315, 0.5409806370735168], [862.93945, 0.0011574885211953782], [0.5404527041211401]]
2020-12-19 03:34:44,672 : INFO : Computed distances or

2020-12-19 03:34:44,738 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ25.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[56.47686767578125, 0.017398303707864812], [0.4813719391822815, 0.5186280608177185], [998.06287, 0.0010009380128325825], [0.5176494282634643]]
2020-12-19 03:34:44,741 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ25.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[47.432716369628906, 0.02064720038348041], [0.5438140332698822, 0.4561859667301178], [866.0998, 0.001153269795094], [0.45581749516274367]]
2020-12-19 03:34:44,744 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ25.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[41.57746124267578, 0.023486604668614925], [0.5286154448986053, 0.47138455510139465], [735.64215, 0.0013575112404399816], [0.47189181273303443]]
2020-12-19 03:34:44,747 : INFO : Computed distances or 

2020-12-19 03:34:44,818 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ16.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[46.61759948730469, 0.021000638645520334], [0.4295058250427246, 0.5704941749572754], [842.749, 0.0011851865569288854], [0.5717980286580447]]
2020-12-19 03:34:44,820 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ16.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[40.3902473449707, 0.024160280842620025], [0.3836331367492676, 0.6163668632507324], [732.59344, 0.0013631528567429017], [0.61686501381403]]
2020-12-19 03:34:44,823 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ16.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[41.1828727722168, 0.023706303868868718], [0.4474904537200928, 0.5525095462799072], [741.12787, 0.0013474766845987544], [0.5542689348951401]]
2020-12-19 03:34:44,826 : INFO : Computed distances or simil

2020-12-19 03:34:44,895 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ27.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[41.8160400390625, 0.023355733017057782], [0.29586178064346313, 0.7041382193565369], [765.2317, 0.0013050882830410214], [0.703286066816631]]
2020-12-19 03:34:44,898 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ27.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[46.15293884277344, 0.021207585879946863], [0.32468706369400024, 0.6753129363059998], [806.9219, 0.0012377434389927863], [0.6754227507358839]]
2020-12-19 03:34:44,900 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ27.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[40.06373977661133, 0.024352384985879195], [0.4352034330368042, 0.5647965669631958], [712.13654, 0.0014022560197342886], [0.563790708562385]]
2020-12-19 03:34:44,903 : INFO : Computed distances or s

2020-12-19 03:34:44,968 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ41.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[55.40009307861328, 0.017730467192778383], [0.6689041554927826, 0.3310958445072174], [1001.89923, 0.0009971091502839575], [0.33109966198523294]]
2020-12-19 03:34:44,971 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ41.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[48.66328430175781, 0.020135599448556918], [0.6663270890712738, 0.3336729109287262], [868.39636, 0.001150223354223722], [0.3336585116742791]]
2020-12-19 03:34:44,974 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ41.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[48.41179656982422, 0.02023808218725445], [0.598414957523346, 0.40158504247665405], [875.1994, 0.001141292721590961], [0.401607955643939]]
2020-12-19 03:34:44,977 : INFO : Computed distances or si

2020-12-19 03:34:45,045 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ11.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[48.57405471801758, 0.0201718420187355], [0.5659232139587402, 0.43407678604125977], [878.0425, 0.001137601449553097], [0.4342031412267965]]
2020-12-19 03:34:45,048 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ11.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[40.476966857910156, 0.02410976683578992], [0.45870476961135864, 0.5412952303886414], [725.06024, 0.001377296183660563], [0.5441186722213575]]
2020-12-19 03:34:45,051 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ11.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[53.6974983215332, 0.01828237178456701], [0.4999915361404419, 0.5000084638595581], [955.6619, 0.0010453013468399062], [0.4998810363331946]]
2020-12-19 03:34:45,054 : INFO : Computed distances or simil

2020-12-19 03:34:45,122 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ24.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[46.49126434326172, 0.021056504050347202], [0.586226224899292, 0.413773775100708], [842.25806, 0.0011858766034015783], [0.41346338073265965]]
2020-12-19 03:34:45,125 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ24.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[45.81470489501953, 0.021360809648217752], [0.6216133236885071, 0.3783866763114929], [822.5738, 0.0012142202803393325], [0.37827329348327]]
2020-12-19 03:34:45,128 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ32.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[42.64375305175781, 0.022912786597753962], [0.42085474729537964, 0.5791452527046204], [773.28406, 0.0012915156784674602], [0.5790062475065986]]
2020-12-19 03:34:45,131 : INFO : Computed distances or sim

2020-12-19 03:34:45,199 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ8.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[47.528076171875, 0.02060662772738478], [0.470378577709198, 0.529621422290802], [829.70966, 0.0012037900282777507], [0.5294147105319273]]
2020-12-19 03:34:45,202 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ8.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[41.21109390258789, 0.02369045451197586], [0.5286145806312561, 0.4713854193687439], [744.4546, 0.001341463334754708], [0.47010073109851747]]
2020-12-19 03:34:45,205 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ8.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[43.68613052368164, 0.022378308174838386], [0.46537643671035767, 0.5346235632896423], [797.22125, 0.0012527854864067345], [0.5334110970909172]]
2020-12-19 03:34:45,207 : INFO : Computed distances or similari

2020-12-19 03:34:45,277 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ14.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[47.70231246948242, 0.020532905919541593], [0.6090386807918549, 0.39096131920814514], [854.5974, 0.0011687739886152967], [0.39369650897284664]]
2020-12-19 03:34:45,280 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ14.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[60.16085433959961, 0.016350327522363164], [0.4865170121192932, 0.5134829878807068], [1079.6365, 0.0009253805729271509], [0.5134595542316439]]
2020-12-19 03:34:45,282 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ14.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[58.35478591918945, 0.016847841071509943], [0.5319195687770844, 0.46808043122291565], [1043.9972, 0.00095694037006912], [0.46590146581970937]]
2020-12-19 03:34:45,285 : INFO : Computed distances 

2020-12-19 03:34:45,351 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ45.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[53.63154983520508, 0.01830444135332933], [0.5754174292087555, 0.4245825707912445], [949.6291, 0.0010519349883117979], [0.424258638033888]]
2020-12-19 03:34:45,354 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ45.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[46.988582611083984, 0.02083828997627091], [0.6086217164993286, 0.3913782835006714], [836.193, 0.001194467713138197], [0.3909521527906794]]
2020-12-19 03:34:45,356 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ45.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[47.7219352722168, 0.02052463627343309], [0.5245847105979919, 0.47541528940200806], [857.33875, 0.0011650411981151704], [0.47506053734448567]]
2020-12-19 03:34:45,359 : INFO : Computed distances or simi

2020-12-19 03:34:45,427 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ39.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[50.28635787963867, 0.019498362553777925], [0.7080725133419037, 0.2919274866580963], [914.5032, 0.001092295503267953], [0.29120365238029106]]
2020-12-19 03:34:45,430 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ39.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[53.06378173828125, 0.018496671299113438], [0.6671901643276215, 0.33280983567237854], [934.2771, 0.001069201844477596], [0.3309970735206006]]
2020-12-19 03:34:45,433 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ39.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[57.91987991333008, 0.0169722002399017], [0.6656763553619385, 0.3343236446380615], [1028.9243, 0.0009709451306959468], [0.3333165074012357]]
2020-12-19 03:34:45,435 : INFO : Computed distances or sim

2020-12-19 03:34:45,505 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ37.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[44.597023010253906, 0.021931256340465888], [0.4944889545440674, 0.5055110454559326], [799.4813, 0.0012492483846465055], [0.5049928479316562]]
2020-12-19 03:34:45,507 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ37.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[36.06361389160156, 0.026980639365731016], [0.4924585819244385, 0.5075414180755615], [642.36786, 0.0015543207298704604], [0.5073139966251116]]
2020-12-19 03:34:45,510 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ37.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[50.592926025390625, 0.01938250215752187], [0.49470651149749756, 0.5052934885025024], [897.06836, 0.0011135009819251823], [0.5048814190097588]]
2020-12-19 03:34:45,513 : INFO : Computed distances or

2020-12-19 03:34:45,579 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ20.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[60.782230377197266, 0.01618588377102492], [0.49214673042297363, 0.5078532695770264], [1088.2468, 0.0009180655623431741], [0.5071599287121652]]
2020-12-19 03:34:45,582 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ20.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[56.61284255981445, 0.017357241121400774], [0.463604211807251, 0.536395788192749], [1020.62134, 0.000978836250684361], [0.5349702545155455]]
2020-12-19 03:34:45,585 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ20.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[45.286949157714844, 0.021604361881632585], [0.4878261089324951, 0.5121738910675049], [814.3391, 0.0012264835405369878], [0.5119761910683595]]
2020-12-19 03:34:45,588 : INFO : Computed distances or

2020-12-19 03:34:45,655 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ5.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[51.663536071777344, 0.018988470478644996], [0.7260843217372894, 0.27391567826271057], [921.15845, 0.0010844123403794543], [0.2744849370423181]]
2020-12-19 03:34:45,658 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ5.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[53.64399337768555, 0.018300273061821293], [0.7086714506149292, 0.2913285493850708], [957.4208, 0.00104338305748172], [0.2915207313411104]]
2020-12-19 03:34:45,661 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ5.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[48.26335906982422, 0.02029906240422286], [0.6992686092853546, 0.3007313907146454], [866.5552, 0.0011526644390075604], [0.3011101770728825]]
2020-12-19 03:34:45,664 : INFO : Computed distances or simi

2020-12-19 03:34:45,740 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ57.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[54.35374450683594, 0.01806562516970292], [0.5769197344779968, 0.4230802655220032], [942.3839, 0.001060013837631811], [0.42277850148254753]]
2020-12-19 03:34:45,743 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ57.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[46.38945770263672, 0.0211017396796326], [0.5493620336055756, 0.45063796639442444], [818.24426, 0.0012206371720077738], [0.45035997432791663]]
2020-12-19 03:34:45,799 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ57.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[50.16498565673828, 0.019544616052644254], [0.5657684206962585, 0.43423157930374146], [889.1494, 0.0011234069069777392], [0.43400490008601955]]
2020-12-19 03:34:45,803 : INFO : Computed distances or 

2020-12-19 03:34:45,863 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ21.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[50.19674301147461, 0.01953249252156279], [0.4974029064178467, 0.5025970935821533], [898.3583, 0.0011119039278087689], [0.5019743050289622]]
2020-12-19 03:34:45,866 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ21.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[43.06371307373047, 0.022694410666816264], [0.461189866065979, 0.538810133934021], [780.08777, 0.0012802658552064953], [0.5383549149447503]]
2020-12-19 03:34:45,869 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ21.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[44.007720947265625, 0.022218410062835085], [0.48658162355422974, 0.5134183764457703], [796.5672, 0.0012538128453217835], [0.5140135790866233]]
2020-12-19 03:34:45,872 : INFO : Computed distances or si

2020-12-19 03:34:45,937 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ22.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[41.178104400634766, 0.023708983943454564], [0.5258830785751343, 0.4741169214248657], [741.0399, 0.0013476363967769248], [0.4743562336930783]]
2020-12-19 03:34:45,939 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ22.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[50.92304611206055, 0.019259270687659494], [0.49864012002944946, 0.5013598799705505], [911.6072, 0.001095761708211177], [0.5009972776155881]]
2020-12-19 03:34:45,942 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ22.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[45.529945373535156, 0.021491536084389435], [0.5329070687294006, 0.46709293127059937], [823.7751, 0.001212451755202261], [0.46659363496930967]]
2020-12-19 03:34:45,945 : INFO : Computed distances o

2020-12-19 03:34:46,010 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ2.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[39.28301239013672, 0.024824359963825586], [0.393848180770874, 0.606151819229126], [689.6841, 0.00144783994016349], [0.6069753737840551]]
2020-12-19 03:34:46,012 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ13.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[50.98912048339844, 0.01923479356261331], [0.5143010318279266, 0.48569896817207336], [905.06995, 0.0011036675524838245], [0.48525449350046546]]
2020-12-19 03:34:46,015 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ13.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[48.171485900878906, 0.020336989653227576], [0.4280453324317932, 0.5719546675682068], [856.53516, 0.001166132948266516], [0.5705984437597339]]
2020-12-19 03:34:46,018 : INFO : Computed distances or simi

2020-12-19 03:34:46,082 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ53.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[40.62934494018555, 0.024021516587321608], [0.49445903301239014, 0.5055409669876099], [727.25037, 0.001373154132696104], [0.5049414683853055]]
2020-12-19 03:34:46,085 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ53.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[47.77665710449219, 0.020501609978267717], [0.643894225358963, 0.356105774641037], [855.3613, 0.0011677313852897775], [0.35526570661606494]]
2020-12-19 03:34:46,088 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ53.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[40.82758712768555, 0.023907666415166062], [0.727226972579956, 0.27277302742004395], [718.19037, 0.0013904524359438404], [0.27238151197308985]]
2020-12-19 03:34:46,090 : INFO : Computed distances or 

2020-12-19 03:34:46,155 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[66.4220199584961, 0.01483194957100935], [0.6423521041870117, 0.3576478958129883], [1174.765, 0.0008505100828323314], [0.3568461813968025]]
2020-12-19 03:34:46,158 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[63.009178161621094, 0.015622759559184348], [0.6537658274173737, 0.34623417258262634], [1122.148, 0.0008903546506901335], [0.3448623201721036]]
2020-12-19 03:34:46,161 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ7.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[50.12013244628906, 0.01956176465408576], [0.6681573688983917, 0.3318426311016083], [904.11163, 0.0011048360922654124], [0.33133871952589883]]
2020-12-19 03:34:46,164 : INFO : Computed distances or sim

2020-12-19 03:34:46,228 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ50.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[49.31775665283203, 0.019873699992221674], [0.581707775592804, 0.41829222440719604], [887.57086, 0.0011254026470728644], [0.41791161671400345]]
2020-12-19 03:34:46,231 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ50.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[49.67119598388672, 0.019735077899444033], [0.5328329205513, 0.46716707944869995], [879.4232, 0.0011358173885156826], [0.46699461153209]]
2020-12-19 03:34:46,234 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ50.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[46.14427947998047, 0.021211481245028762], [0.5633699893951416, 0.4366300106048584], [823.4416, 0.0012129422058654018], [0.43632490885224834]]
2020-12-19 03:34:46,236 : INFO : Computed distances or si

2020-12-19 03:34:46,302 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ33.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[55.17570114135742, 0.017801290943991164], [0.6104077696800232, 0.3895922303199768], [978.48706, 0.0010209425323512412], [0.3885917317178521]]
2020-12-19 03:34:46,305 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ33.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[48.14622497558594, 0.020347442768936245], [0.6576038300991058, 0.34239616990089417], [858.20447, 0.00116386731855739], [0.34065825596341265]]
2020-12-19 03:34:46,308 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ33.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[50.00704574584961, 0.01960513465105689], [0.5655998885631561, 0.43440011143684387], [889.05725, 0.0011235232328064395], [0.4339375851733665]]
2020-12-19 03:34:46,311 : INFO : Computed distances or

2020-12-19 03:34:46,375 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ23.txt', 'test_data/LibEST_semeru_format/test/us894.c')[[54.706600189208984, 0.01795119423198459], [0.6865675151348114, 0.3134324848651886], [986.03186, 0.0010131385218343596], [0.3135043799050539]]
2020-12-19 03:34:46,378 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ23.txt', 'test_data/LibEST_semeru_format/test/us1005.c')[[48.32079315185547, 0.02027542413847778], [0.680717408657074, 0.319282591342926], [863.40656, 0.0011568630455337594], [0.31932843448833287]]
2020-12-19 03:34:46,381 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ23.txt', 'test_data/LibEST_semeru_format/test/us898.c')[[47.31252670288086, 0.020698565532495123], [0.6225681602954865, 0.37743183970451355], [845.01196, 0.001182016382585459], [0.37744272264891915]]
2020-12-19 03:34:46,383 : INFO : Computed distances or 

2020-12-19 03:34:46,448 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us3612.c')[[43.44173049926758, 0.022501374018648542], [0.6137309968471527, 0.3862690031528473], [778.98193, 0.001282080977686908], [0.38631519816510046]]
2020-12-19 03:34:46,451 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us901.c')[[56.18059158325195, 0.017488451453742872], [0.6000524759292603, 0.39994752407073975], [998.4048, 0.001000595569335459], [0.3998501156437286]]
2020-12-19 03:34:46,453 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ40.txt', 'test_data/LibEST_semeru_format/test/us1864.c')[[48.86736297607422, 0.020053195924552666], [0.6508325040340424, 0.34916749596595764], [865.32605, 0.0011542998161321008], [0.34906440778027453]]
2020-12-19 03:34:46,456 : INFO : Computed distances 

2020-12-19 03:34:46,521 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ31.txt', 'test_data/LibEST_semeru_format/test/us895.c')[[46.821372985839844, 0.020911152013475338], [0.5433358252048492, 0.45666417479515076], [842.7163, 0.001185232512177859], [0.45693400197259404]]
2020-12-19 03:34:46,523 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ58.txt', 'test_data/LibEST_semeru_format/test/us903.c')[[48.34011459350586, 0.02026748434288436], [0.5860859751701355, 0.4139140248298645], [873.1681, 0.0011439447521604316], [0.413393240410978]]
2020-12-19 03:34:46,526 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ58.txt', 'test_data/LibEST_semeru_format/test/us3496.c')[[48.474586486816406, 0.020212397333860932], [0.521081805229187, 0.478918194770813], [856.1614, 0.0011666414596917685], [0.4777203163537993]]
2020-12-19 03:34:46,529 : INFO : Computed distances or sim

2020-12-19 03:34:46,594 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ19.txt', 'test_data/LibEST_semeru_format/test/us1060.c')[[39.837642669677734, 0.024487211666174544], [0.47759974002838135, 0.5224002599716187], [713.5393, 0.0013995031353858697], [0.5226751766284409]]
2020-12-19 03:34:46,596 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ19.txt', 'test_data/LibEST_semeru_format/test/us900.c')[[43.42311477661133, 0.022510803329047466], [0.4674469232559204, 0.5325530767440796], [771.498, 0.0012945017570664872], [0.5328871036724985]]
2020-12-19 03:34:46,599 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ19.txt', 'test_data/LibEST_semeru_format/test/us896.c')[[35.59723663330078, 0.027324467418670454], [0.47442901134490967, 0.5255709886550903], [622.0724, 0.001604949954047728], [0.5255861914449763]]
2020-12-19 03:34:46,602 : INFO : Computed distances or s

2020-12-19 03:34:46,670 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ1.txt', 'test_data/LibEST_semeru_format/test/us3512.c')[[57.67479705810547, 0.017043092607712015], [0.41819626092910767, 0.5818037390708923], [1038.9489, 0.0009615857525670361], [0.5814884070907771]]
2020-12-19 03:34:46,673 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ1.txt', 'test_data/LibEST_semeru_format/test/us1883.c')[[55.14423370361328, 0.01781126812201273], [0.4386489987373352, 0.5613510012626648], [973.61865, 0.0010260423372723405], [0.5597885845116074]]
2020-12-19 03:34:46,675 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ1.txt', 'test_data/LibEST_semeru_format/test/us748.c')[[43.254886627197266, 0.022596374687929726], [0.42891693115234375, 0.5710830688476562], [773.80817, 0.0012906420495181479], [0.5713928597943922]]
2020-12-19 03:34:46,681 : INFO : Computed distances or

2020-12-19 03:34:46,750 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ38.txt', 'test_data/LibEST_semeru_format/test/us1159.c')[[47.866050720214844, 0.02046410514583126], [0.5297326147556305, 0.4702673852443695], [867.5697, 0.0011513180779003287], [0.4693657404469805]]
2020-12-19 03:34:46,753 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ38.txt', 'test_data/LibEST_semeru_format/test/us2174.c')[[46.33984375, 0.0211238551035564], [0.43760108947753906, 0.5623989105224609], [832.69446, 0.0011994802057213975], [0.562286149166968]]
2020-12-19 03:34:46,756 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ38.txt', 'test_data/LibEST_semeru_format/test/us893.c')[[43.79230880737305, 0.022325261336727405], [0.4858347177505493, 0.5141652822494507], [790.09656, 0.0012640681979606098], [0.5136244102000073]]
2020-12-19 03:34:46,759 : INFO : Computed distances or similar

2020-12-19 03:34:46,825 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ6.txt', 'test_data/LibEST_semeru_format/test/us899.c')[[57.211307525634766, 0.017178792961481334], [0.6967165768146515, 0.3032834231853485], [1014.9628, 0.0009842879787282295], [0.30293800421038164]]
2020-12-19 03:34:46,828 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ6.txt', 'test_data/LibEST_semeru_format/test/us4020.c')[[48.801513671875, 0.020079710961973068], [0.6830730438232422, 0.3169269561767578], [883.09143, 0.001131104731157586], [0.31653041582066527]]
2020-12-19 03:34:46,831 : INFO : Computed distances or similarities ('test_data/LibEST_semeru_format/requirements/RQ6.txt', 'test_data/LibEST_semeru_format/test/us897.c')[[52.403560638427734, 0.018725343180215355], [0.6645509600639343, 0.3354490399360657], [924.5156, 0.0010804787871625614], [0.33520607133415975]]
2020-12-19 03:34:46,834 : INFO : Computed distances or s

Unnamed: 0,Source,Target,DistanceMetric.EUC,SimilarityMetric.EUC_sim,DistanceMetric.COS,SimilarityMetric.COS_sim,DistanceMetric.MAN,SimilarityMetric.MAN_sim,SimilarityMetric.Pearson
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,44.105835,0.02217,0.467295,0.532705,795.843567,0.001255,0.532671
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,43.14566,0.022652,0.383694,0.616306,746.003662,0.001339,0.614667
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,47.406208,0.020659,0.389951,0.610049,850.888062,0.001174,0.609713
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,39.96262,0.024413,0.373937,0.626063,716.214722,0.001394,0.624889
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,43.581291,0.022431,0.386722,0.613278,761.269653,0.001312,0.613646


In [None]:
#[step 3]Saving Non-GroundTruth Links
doc2vec.SaveLinks()

2020-12-19 03:34:46,963 : INFO : Saving in...../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.doc2vec-LinkType.req2tc-False-1608348886.946251].csv


In [None]:
#Loading Non-GroundTruth Links (change the timestamp with the assigned in the previous step)
df_nonglinks_doc2vec = LoadLinks(timestamp=1608348886.946251, params=doc2vec_params)
df_nonglinks_doc2vec.head()

2020-12-19 03:36:41,779 : INFO : Loading computed links from... ../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.doc2vec-LinkType.req2tc-False-1608348886.946251].csv


Unnamed: 0,Source,Target,DistanceMetric.EUC,SimilarityMetric.EUC_sim,DistanceMetric.COS,SimilarityMetric.COS_sim,DistanceMetric.MAN,SimilarityMetric.MAN_sim,SimilarityMetric.Pearson
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,44.105835,0.02217,0.467295,0.532705,795.843567,0.001255,0.532671
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,43.14566,0.022652,0.383694,0.616306,746.003662,0.001339,0.614667
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,47.406208,0.020659,0.389951,0.610049,850.888062,0.001174,0.609713
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,39.96262,0.024413,0.373937,0.626063,716.214722,0.001394,0.624889
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,43.581291,0.022431,0.386722,0.613278,761.269653,0.001312,0.613646


In [None]:
#[step 4]GroundTruthMatching Testing
doc2vec.MatchWithGroundTruth(path_to_ground_truth)
doc2vec.df_ground_link

2020-12-19 03:36:50,514 : INFO : findDistInDF: default
2020-12-19 03:36:50,518 : INFO : findDistInDF: default
2020-12-19 03:36:50,524 : INFO : findDistInDF: default
2020-12-19 03:36:50,530 : INFO : findDistInDF: default
2020-12-19 03:36:50,534 : INFO : findDistInDF: default
2020-12-19 03:36:50,537 : INFO : findDistInDF: default
2020-12-19 03:36:50,540 : INFO : findDistInDF: default
2020-12-19 03:36:50,544 : INFO : findDistInDF: default
2020-12-19 03:36:50,548 : INFO : findDistInDF: default
2020-12-19 03:36:50,552 : INFO : findDistInDF: default
2020-12-19 03:36:50,556 : INFO : findDistInDF: default
2020-12-19 03:36:50,559 : INFO : findDistInDF: default
2020-12-19 03:36:50,563 : INFO : findDistInDF: default
2020-12-19 03:36:50,566 : INFO : findDistInDF: default
2020-12-19 03:36:50,570 : INFO : findDistInDF: default
2020-12-19 03:36:50,573 : INFO : findDistInDF: default
2020-12-19 03:36:50,577 : INFO : findDistInDF: default
2020-12-19 03:36:50,581 : INFO : findDistInDF: default
2020-12-19

2020-12-19 03:36:51,055 : INFO : findDistInDF: default
2020-12-19 03:36:51,058 : INFO : findDistInDF: default
2020-12-19 03:36:51,066 : INFO : findDistInDF: default
2020-12-19 03:36:51,070 : INFO : findDistInDF: default
2020-12-19 03:36:51,073 : INFO : findDistInDF: default
2020-12-19 03:36:51,077 : INFO : findDistInDF: default
2020-12-19 03:36:51,080 : INFO : findDistInDF: default
2020-12-19 03:36:51,084 : INFO : findDistInDF: default
2020-12-19 03:36:51,087 : INFO : findDistInDF: default
2020-12-19 03:36:51,090 : INFO : findDistInDF: default
2020-12-19 03:36:51,094 : INFO : findDistInDF: default
2020-12-19 03:36:51,097 : INFO : findDistInDF: default
2020-12-19 03:36:51,101 : INFO : findDistInDF: default
2020-12-19 03:36:51,107 : INFO : findDistInDF: default
2020-12-19 03:36:51,112 : INFO : findDistInDF: default
2020-12-19 03:36:51,115 : INFO : findDistInDF: default
2020-12-19 03:36:51,119 : INFO : findDistInDF: default
2020-12-19 03:36:51,122 : INFO : findDistInDF: default
2020-12-19

2020-12-19 03:36:51,637 : INFO : findDistInDF: default
2020-12-19 03:36:51,641 : INFO : findDistInDF: default
2020-12-19 03:36:51,644 : INFO : findDistInDF: default
2020-12-19 03:36:51,648 : INFO : findDistInDF: default
2020-12-19 03:36:51,651 : INFO : findDistInDF: default
2020-12-19 03:36:51,655 : INFO : findDistInDF: default
2020-12-19 03:36:51,658 : INFO : findDistInDF: default
2020-12-19 03:36:51,662 : INFO : findDistInDF: default
2020-12-19 03:36:51,665 : INFO : findDistInDF: default
2020-12-19 03:36:51,669 : INFO : findDistInDF: default
2020-12-19 03:36:51,672 : INFO : findDistInDF: default
2020-12-19 03:36:51,676 : INFO : findDistInDF: default
2020-12-19 03:36:51,679 : INFO : findDistInDF: default
2020-12-19 03:36:51,683 : INFO : findDistInDF: default
2020-12-19 03:36:51,686 : INFO : findDistInDF: default
2020-12-19 03:36:51,690 : INFO : findDistInDF: default
2020-12-19 03:36:51,693 : INFO : findDistInDF: default
2020-12-19 03:36:51,697 : INFO : findDistInDF: default
2020-12-19

Unnamed: 0,Source,Target,DistanceMetric.EUC,SimilarityMetric.EUC_sim,DistanceMetric.COS,SimilarityMetric.COS_sim,DistanceMetric.MAN,SimilarityMetric.MAN_sim,SimilarityMetric.Pearson,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,44.105835,0.022170,0.467295,0.532705,795.843567,0.001255,0.532671,0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,43.145660,0.022652,0.383694,0.616306,746.003662,0.001339,0.614667,0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,47.406208,0.020659,0.389951,0.610049,850.888062,0.001174,0.609713,0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,39.962620,0.024413,0.373937,0.626063,716.214722,0.001394,0.624889,0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,43.581291,0.022431,0.386722,0.613278,761.269653,0.001312,0.613646,0
...,...,...,...,...,...,...,...,...,...,...
1087,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1864.c,48.111172,0.020362,0.642251,0.357749,846.068726,0.001181,0.357422,0
1088,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us1159.c,51.451206,0.019065,0.701924,0.298076,931.417725,0.001072,0.297687,0
1089,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us2174.c,53.151047,0.018467,0.655677,0.344323,952.509399,0.001049,0.344142,0
1090,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us893.c,46.669491,0.020978,0.562637,0.437363,841.761230,0.001187,0.437124,0


In [None]:
#[step 5]Saving GroundTruth Links
doc2vec.SaveLinks(grtruth = True)

2020-12-19 03:36:55,474 : INFO : Saving in...../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.doc2vec-LinkType.req2tc-True-1608349015.451968].csv


In [None]:
#Loading Non-GroundTruth Links (change the timestamp with the assigned in the previous step)
df_glinks_doc2vec = LoadLinks(timestamp=1608349015.451968, params=doc2vec_params, grtruth = True)
df_glinks_doc2vec.head()

2020-12-19 03:37:11,312 : INFO : Loading computed links from... ../dvc-ds4se/metrics/traceability/experiments0.0.x/[libest-VectorizationType.doc2vec-LinkType.req2tc-True-1608349015.451968].csv


Unnamed: 0,Source,Target,DistanceMetric.EUC,SimilarityMetric.EUC_sim,DistanceMetric.COS,SimilarityMetric.COS_sim,DistanceMetric.MAN,SimilarityMetric.MAN_sim,SimilarityMetric.Pearson,Linked?
0,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us903.c,44.105835,0.02217,0.467295,0.532705,795.843567,0.001255,0.532671,0
1,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us3496.c,43.14566,0.022652,0.383694,0.616306,746.003662,0.001339,0.614667,0
2,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us899.c,47.406208,0.020659,0.389951,0.610049,850.888062,0.001174,0.609713,0
3,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us4020.c,39.96262,0.024413,0.373937,0.626063,716.214722,0.001394,0.624889,0
4,test_data/LibEST_semeru_format/requirements/RQ...,test_data/LibEST_semeru_format/test/us897.c,43.581291,0.022431,0.386722,0.613278,761.269653,0.001312,0.613646,0
