<a href="https://colab.research.google.com/github/asyrofist/Extraction-Requirement/blob/main/modul_wordEmbed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive

In [None]:
!pip install tracereq==0.0.2
!pip install py-automl
!pip install fasttext

# Sent Embed

In [None]:
#@title Modul Sentence Modeling { vertical-output: true }
#@markdown - One of the methods to represent sentences as vectors (Mu et al 2017)
#@markdown - Computing vector representations of each embedded word, and weight average them using PCA
#@markdown     - If there are **n** words in a sentence, select **N** words with high explained variance (n>N)
#@markdown     - Most of "energy" (around 80%) can be containted using only 4 words (N=4) in the original paper (Mu et al 2017)

#@markdown ## Parameter
dataset_param1 = '/content/drive/MyDrive/dataset/dataset_2.xlsx' #@param {type:"string"}
dataset_param2 = '/content/drive/MyDrive/dataset/dataset_2_split.xlsx' #@param {type:"string"}
dataset_index = '2005 - Grid 3D' #@param {type:"string"}

#@markdown ## Load Model
th_param =  3#@param {type:"number"}
sg_param =  1#@param {type:"number"}
window_param =  2#@param {type:"number"}
min_count_param = 1 #@param {type:"number"}
iter_param = 100 #@param {type:"number"}
n_param =  3#@param {type:"number"}

import re
import numpy as np
import pandas as pd
from gensim.models import Word2Vec
from multiprocessing import Pool
from scipy import spatial
from sklearn.decomposition import PCA
from traceability import prosesData
from tabulate import tabulate
from sklearn.metrics.pairwise import cosine_similarity


class sentenceMod:
  def __init__(self):
      pass

  def sent_model(self, data, th_param):
      # sentences = data # import the corpus and convert into a list
      sentences = [num.split() for num in data] # import the corpus and convert into a list
      threshold = th_param # set threshold to consider only sentences longer than certain integer
      for i in range(len(sentences)):
          if len(sentences[i]) < threshold:
              sentences[i] = None

      sentences = [sentence for sentence in sentences if sentence is not None] 
      model = Word2Vec(sentences = sentences, size = len(sentences), sg = sg_param, window = window_param, min_count = min_count_param, iter = iter_param, workers = Pool()._processes)
      model.init_sims(replace = True)
      for i in range(len(sentences)): # converting each word into its vector representation
          sentences[i] = [model[word] for word in sentences[i]]
      return sentences

  def sent_PCA(self, sentence, n = n_param): # define function to compute weighted vector representation of sentence
      pca = PCA(n_components = n) # dataset_param 'n' means number of words to be accounted when computing weighted average
      pca.fit(np.array(sentence).transpose())
      variance = np.array(pca.explained_variance_ratio_)
      words = []
      for _ in range(n):
          idx = np.argmax(variance)
          words.append(np.amax(variance) * sentence[idx])
          variance[idx] = 0
      return np.sum(words, axis = 0)    

  # define a function that computes cosine similarity between two words
  def cosine_similarity(self, v1, v2):
      return 1 - spatial.distance.cosine(v1, v2)    

if __name__ == "__main__":
    myProses1 = prosesData(dataset_param1)
    list_data1 = list(myProses1.fulldataset(dataset_index)['Requirement Statement'])
    id_data1 = list(myProses1.fulldataset(dataset_index)['ID'])
    cleaned_text1 = myProses1.apply_cleaning_function_to_list(list_data1)

    myDoc = sentenceMod()
    data_sent1 = myDoc.sent_model(cleaned_text1, th_param)
    sent_v1 = [myDoc.sent_PCA(num) for num in data_sent1] # computing vector representation of each sentence

    a = [[myDoc.cosine_similarity(num, angka) for angka in sent_v1] for num in sent_v1]
    df = pd.DataFrame(a, index= id_data1, columns= id_data1)
    print(tabulate(df, headers = 'keys', tablefmt = 'psql'))   

    # myUkur= pengukuranEvaluasi(df.values, df.values)
    # myUkur.ukur_evaluasi()

In [None]:
#@title Modul doc2vec
#@markdown Berikut ini penjelasan singkat bagaimana modul ini telah dibuat.

#@markdown ### Preprocess data
#@markdown - Use re module to preprocess data
#@markdown - Convert all letters into lowercase
#@markdown - Remove punctuations, numbers, etc.
#@markdown - For the doc2vec model, input data should be in format of **iterable TaggedDocuments"**
#@markdown - Each TaggedDocument instance comprises **words** and **tags**
#@markdown - Hence, each document (i.e., a sentence or paragraph) should have a unique tag which is identifiable
dataset_var = '/content/drive/MyDrive/dataset/dataset_2.xlsx' #@param {type:"string"}
dataset_index = '2005 - Grid 3D' #@param {type:"string"}

#@markdown  ### Create and train model
#@markdown - Create a doc2vec model and train it with Hamlet corpus
#@markdown - Key parameter description (https://radimrehurek.com/gensim/models/doc2vec.html)
#@markdown - **documents**: training data (has to be iterable TaggedDocument instances)
#@markdown - **size**: dimension of embedding space
#@markdown - **dm**: DBOW if 0, distributed-memory if 1
#@markdown - **window**: number of words accounted for each context (if the window size is 3, 3 word in the left neighorhood and 3 word in the right neighborhood are considered)
#@markdown - **min_count**: minimum count of words to be included in the vocabulary
#@markdown - **iter**: number of training iterations
#@markdown - **workers**: number of worker threads to train
dm_param = 1 #@param {type:"number"}
size_param = 100 #@param {type:"number"}
window_param = 3 #@param {type:"number"}
min_count_param = 1 #@param {type:"number"}
iter_param = 10 #@param {type:"number"}

#@markdown ### Save and load model
#@markdown - doc2vec model can be saved and loaded locally
#@markdown - Doing so can reduce time to train model again  
model_data = '/content/sample_data/doc2vec_model' #@param {type:"string"}

#@markdown ### Similarity calculation
#@markdown - Similarity between embedded words (i.e., vectors) can be computed using metrics such as cosine similarity
#@markdown - For other metrics and comparisons between them, refer to: https://github.com/taki0112/Vector_Similarity

import pandas as pd
import re
import numpy as np
from traceability import prosesData
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
from nltk.corpus import gutenberg
from multiprocessing import Pool
from scipy import spatial
from tabulate import tabulate

class docMod:
  def __init__(self):
    pass

  def doc2vec_func(self, data):
      sentences = list(data)   # import the corpus and convert into a list
      for i in range(len(sentences)):
          sentences[i] = TaggedDocument(words = sentences[i], tags = ['sent{}'.format(i)])    # converting each sentence into a TaggedDocument    
      model = Doc2Vec(documents = sentences, dm = dm_param, size = size_param, window = window_param, min_count = min_count_param, iter = iter_param, workers = Pool()._processes)
      model.init_sims(replace = True)
      model.save(model_data)
      model = Doc2Vec.load(model_data)
      v1 = [[model.infer_vector(angka) for angka in num] for num in sentences]
      v1_df = pd.DataFrame(v1) # in doc2vec, infer_vector() function is used to infer the vector embedding of a document
      a = [[docMod.cosine_similarity(self, v1[num][0], v1[angka][0]) for angka in v1_df.index] for num in v1_df.index]
      return a

  def cosine_similarity(self, v1, v2): # define a function that computes cosine similarity between two words
      return 1 - spatial.distance.cosine(v1, v2)

if __name__ == "__main__":
    myProses = prosesData(dataset_var)
    list_data = list(myProses.fulldataset(dataset_index)['Requirement Statement'])
    id_data = list(myProses.fulldataset(dataset_index)['ID'])
    cleaned_text = myProses.apply_cleaning_function_to_list(list_data)
    myDoc = docMod()
    data_doc = myDoc.doc2vec_func(cleaned_text)
    data_df = pd.DataFrame(data_doc, index= id_data, columns= id_data)
    print(tabulate(data_df, headers = 'keys', tablefmt = 'psql'))   

# Word Embed

In [3]:
#@title Modul Word2vec
#@markdown Berikut ini penjelasan lengkap modul word2vec

#@markdown ### Preprocess data
#@markdown - Use re module to preprocess data
#@markdown - Convert all letters into lowercase
#@markdown - Remove punctuations, numbers, etc.
dataset_var1 = '/content/drive/MyDrive/dataset/dataset_2.xlsx'#@param {type:"string"}
dataset_var2 = '/content/drive/MyDrive/dataset/dataset_2_split.xlsx'#@param {type:"string"}
dataset_index = '2005 - Grid 3D'#@param {type:"string"}

sg_param = 1 #@param {type:"number"}
window_param = 3 #@param {type:"number"}
min_count_param = 1 #@param {type:"number"}
iter_param = 10 #@param {type:"number"}


#@markdown ### Create and train model
#@markdown - Create a word2vec model and train it with Hamlet corpus
#@markdown - Key parameter description (https://radimrehurek.com/gensim/models/word2vec.html)
#@markdown     - **sentences**: training data (has to be a list with tokenized sentences)
#@markdown     - **size**: dimension of embedding space
#@markdown     - **sg**: CBOW if 0, skip-gram if 1
#@markdown     - **window**: number of words accounted for each context (if the window size is 3, 3 word in the left neighorhood and 3 word in the right neighborhood are considered)
#@markdown     - **min_count**: minimum count of words to be included in the vocabulary
#@markdown     - **iter**: number of training iterations
#@markdown     - **workers**: number of worker threads to train

#@markdown ### Save and load model
#@markdown - word2vec model can be saved and loaded locally
#@markdown - Doing so can reduce time to train model again
model_data = '/content/sample_data/word2vec_model'#@param {type:"string"}

#@markdown ### Similarity calculation
#@markdown - Similarity between embedded words (i.e., vectors) can be computed using metrics such as cosine similarity
#@markdown - For other metrics and comparisons between them, refer to: https://github.com/taki0112/Vector_Similarity

import numpy as np
import pandas as pd
from gensim.models import Word2Vec
from multiprocessing import Pool
from traceability import prosesData
from gensim.utils import simple_preprocess  
from tabulate import tabulate


class wordMod:
  def __init__(self):
      pass

  def model_word(self, data):
      sentences = [num.split() for num in data]
      model = Word2Vec(sentences = sentences, 
                        size = len(sentences), 
                        sg = sg_param, window = window_param, 
                        min_count = min_count_param, iter = iter_param, 
                        workers = Pool()._processes)
      model.init_sims(replace = True)    
      model.save(model_data)
      model = Word2Vec.load(model_data)
      return model

  def tidy_sentence(self, sentence, vocabulary):
      return [word for word in simple_preprocess(sentence) if word in vocabulary]    

  def compute_sentence_similarity(self, sentence_1, sentence_2, model_wv):
      vocabulary = set(model_wv.wv.index2word)
      tokens_1 = wordMod.tidy_sentence(self, sentence_1, vocabulary)    
      tokens_2 = wordMod.tidy_sentence(self, sentence_2, vocabulary)    
      return model_wv.wv.n_similarity(tokens_1, tokens_2)

  def wordMeasure(self, data1, data2):
      sim = [[wordMod.compute_sentence_similarity(self, num, angka, wordMod.model_word(self, data)) for angka in data2] for num in data1]
      return sim

if __name__ == "__main__":
    myProses1 = prosesData(dataset_var1)
    list_data1 = list(myProses1.fulldataset(dataset_index)['Requirement Statement'])
    id_data1 = list(myProses1.fulldataset(dataset_index)['ID'])
    cleaned_text1 = myProses1.apply_cleaning_function_to_list(list_data1)

    myProses2 = prosesData(dataset_var1)
    list_data2 = list(myProses2.fulldataset(dataset_index)['Requirement Statement'])
    id_data2 = list(myProses2.fulldataset(dataset_index)['ID'])
    cleaned_text2 = myProses2.apply_cleaning_function_to_list(list_data2)

    dt_word = wordMod().wordMeasure(cleaned_text1, cleaned_text2)
    df_word = pd.DataFrame(dt_word, index= id_data1, columns= id_data2)
    print(tabulate(df_word, headers = 'keys', tablefmt = 'psql'))   

+------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+
|      |        F01 |        F02 |        F03 |        F04 |        F05 |        F06 |        F07 |        F08 |        F09 |       NF01 |       NF02 |       NF03 |       NF04 |       NF05 |       NF06 |       NF07 |       NF08 |
|------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------+------------|
| F01  |  1         |  0.422852  |  0.441888  |  0.521807  |  0.470901  |  0.620763  |  0.0441903 | -0.0243936 |  0.460406  | -0.134974  |  0.0604617 |  0.40881   |  0.125351  | -0.171688  |  0.0765274 |  0.169999  |  0.128459  |
| F02  |  0.422852  |  1         |  0.367773  |  0.244047  |  0.246719  |  0.064

In [2]:
#@title Modul Word2vec Pretrained { vertical-output: true }
dataset_var1 = "/content/drive/MyDrive/dataset/dataset_2.xlsx" #@param {type:"string"}
dataset_var2 = "/content/drive/MyDrive/dataset/dataset_2_split.xlsx" #@param {type:"string"}
dataset_index = "2005 - Grid 3D" #@param {type:"string"}
load_bin = "/content/drive/MyDrive/dataset/GoogleNews-vectors-negative300.bin" #@param {type:"string"}
#@markdown Word2Vec is a more recent model that embeds words in a lower-dimensional vector space using a shallow neural network. The result is a set of word-vectors where vectors close together in vector space have similar meanings based on context, and word-vectors distant to each other have differing meanings. For example, strong and powerful would be close together and strong and Paris would be relatively far.
#@markdown The are two versions of this model and Word2Vec class implements them both:
#@markdown - Skip-grams (SG)
#@markdown - Continuous-bag-of-words (CBOW)

import pandas as pd
import gensim.downloader as api
from traceability import prosesData
from gensim.utils import simple_preprocess  
from tabulate import tabulate
from gensim.models import KeyedVectors # Load Google Pre trained word2vec model https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit
  

class pretrainedMeasure:
  def __init__(self):
      pass

  def tidy_sentence(self, sentence, vocabulary):
      return [word for word in simple_preprocess(sentence) if word in vocabulary]    

  def compute_sentence_similarity(self, sentence_1, sentence_2, model_wv):
      vocabulary = set(model_wv.index2word)    
      tokens_1 = pretrainedMeasure.tidy_sentence(self, sentence_1, vocabulary)    
      tokens_2 = pretrainedMeasure.tidy_sentence(self, sentence_2, vocabulary)    
      return model_wv.n_similarity(tokens_1, tokens_2)

  def wordMeasure(self, data1, data2):
      # model_word2vec = api.load('word2vec-google-news-300') # didownload langsung dari source
      model_word2vec = KeyedVectors.load_word2vec_format(load_bin, binary=True)
      sim = [[pretrainedMeasure.compute_sentence_similarity(self, num, angka, model_word2vec) for angka in data2] for num in data1]
      return sim

if __name__ == "__main__":
    myProses1 = prosesData(dataset_var1)
    list_data1 = list(myProses1.fulldataset(dataset_index)['Requirement Statement'])
    id_data1 = list(myProses1.fulldataset(dataset_index)['ID'])
    cleaned_text1 = myProses1.apply_cleaning_function_to_list(list_data1)  

    myProses2 = prosesData(dataset_var2)
    list_data2 = list(myProses2.fulldataset(dataset_index)['Requirement Statement'])
    id_data2 = list(myProses2.fulldataset(dataset_index)['ID'])
    cleaned_text2 = myProses2.apply_cleaning_function_to_list(list_data2)  

    dt_word = pretrainedMeasure().wordMeasure(cleaned_text1, cleaned_text2)
    df_word = pd.DataFrame(dt_word, index= id_data1, columns= id_data2)
    print(tabulate(df_word, headers = 'keys', tablefmt = 'psql'))   

+------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+
|      |      F01 |      F02 |      F03 |      F04 |      F05 |      F06 |      F07 |      F08 |      F09 |     NF01 |     NF02 |     NF03 |     NF04 |     NF05 |     NF06 |     NF07 |     NF08 |
|------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------|
| F01  | 1        | 0.535725 | 0.577641 | 0.590245 | 0.529952 | 0.554168 | 0.428587 | 0.446485 | 0.639938 | 0.314172 | 0.427552 | 0.446093 | 0.345466 | 0.330312 | 0.540891 | 0.478917 | 0.412512 |
| F02  | 0.535725 | 1        | 0.686973 | 0.491833 | 0.586066 | 0.592669 | 0.41267  | 0.579903 | 0.678056 | 0.304399 | 0.329199 | 0.62628  | 0.361038 | 0.360136 | 0.513972 | 0.44782  | 0.454106 |
| F03  | 0.577641 | 

In [18]:
#@title Modul Glove Pretrained { vertical-output: true }
dataset_var1 = "/content/drive/MyDrive/dataset/dataset_2.xlsx" #@param {type:"string"}
dataset_var2 = "/content/drive/MyDrive/dataset/dataset_2_split.xlsx" #@param {type:"string"}
dataset_index = "2005 - Grid 3D" #@param {type:"string"}
data_input = "/content/drive/MyDrive/dataset/glove.6B.300d.txt" #@param {type:"string"}
data_output = "/content/drive/MyDrive/dataset/glove_vectors.txt" #@param {type:"string"}
#@markdown This script allows to convert GloVe vectors into the word2vec. Both files are presented in text format and almost identical except that word2vec includes number of vectors and its dimension which is only difference regard to GloVe.

import pandas as pd
from traceability import prosesData
from gensim.utils import simple_preprocess  
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
from tabulate import tabulate

class pretrainedMeasure:
  def __init__(self):
      pass

  def tidy_sentence(self, sentence, vocabulary):
      return [word for word in simple_preprocess(sentence) if word in vocabulary]    

  def compute_sentence_similarity(self, sentence_1, sentence_2, model_wv):
      vocabulary = set(model_wv.index2word)    
      tokens_1 = pretrainedMeasure.tidy_sentence(self, sentence_1, vocabulary)    
      tokens_2 = pretrainedMeasure.tidy_sentence(self, sentence_2, vocabulary)    
      return model_wv.n_similarity(tokens_1, tokens_2)

  def wordMeasure(self, data1, data2):
      # Convert and save glove word embedding to gensim format # https://nlp.stanford.edu/projects/glove/
      glove2word2vec(glove_input_file= data_input, word2vec_output_file= data_output) 
      glove_model = KeyedVectors.load_word2vec_format(data_output, binary=False) # Read saved gensim glove word embedding        
      sim = [[pretrainedMeasure.compute_sentence_similarity(self, num, angka, glove_model) for angka in data2] for num in data1]
      return sim

if __name__ == "__main__":
    myProses1 = prosesData(dataset_var1)
    list_data1 = list(myProses1.fulldataset(dataset_index)['Requirement Statement'])
    id_data1 = list(myProses1.fulldataset(dataset_index)['ID'])
    cleaned_text1 = myProses1.apply_cleaning_function_to_list(list_data1)    

    myProses2 = prosesData(dataset_var2)
    list_data2 = list(myProses2.fulldataset(dataset_index)['Requirement Statement'])
    id_data2 = list(myProses2.fulldataset(dataset_index)['ID'])
    cleaned_text2 = myProses2.apply_cleaning_function_to_list(list_data2)    

    dt_word = pretrainedMeasure().wordMeasure(cleaned_text1, cleaned_text2)
    df_word = pd.DataFrame(dt_word, index= id_data1, columns= id_data2)
    print(tabulate(df_word, headers = 'keys', tablefmt = 'psql'))   

+------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+
|      |      F01 |      F02 |      F03 |      F04 |      F05 |      F06 |      F07 |      F08 |      F09 |     NF01 |     NF02 |     NF03 |     NF04 |     NF05 |     NF06 |     NF07 |     NF08 |
|------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------|
| F01  | 1        | 0.658532 | 0.724895 | 0.772626 | 0.711642 | 0.736143 | 0.656843 | 0.613114 | 0.768676 | 0.551979 | 0.658281 | 0.623811 | 0.502694 | 0.493835 | 0.712047 | 0.675505 | 0.630335 |
| F02  | 0.658532 | 1        | 0.74617  | 0.658928 | 0.702454 | 0.719665 | 0.607963 | 0.690776 | 0.791737 | 0.423885 | 0.429631 | 0.720458 | 0.412376 | 0.414175 | 0.60749  | 0.561243 | 0.592377 |
| F03  | 0.724895 | 

In [16]:
#@title Modul Fasttext Pretrained { vertical-output: true }
#@markdown We distribute pre-trained word vectors for 157 languages, trained on Common Crawl and Wikipedia using fastText. These models were trained using CBOW with position-weights, in dimension 300, with character n-grams of length 5, a window of size 5 and 10 negatives. We also distribute three new word analogy datasets, for French, Hindi and Polish.

dataset_var1 = "/content/drive/MyDrive/dataset/dataset_2.xlsx" #@param {type:"string"}
dataset_var2 = "/content/drive/MyDrive/dataset/dataset_2_split.xlsx" #@param {type:"string"}
dataset_index = "2005 - Grid 3D" #@param {type:"string"}
pretrained_data = "/content/drive/MyDrive/dataset/cc.en.300.bin" #@param {type:"string"}

import pandas as pd
import fasttext
import fasttext.util
from traceability import prosesData
from tabulate import tabulate
from scipy.spatial import distance

class pretrainedMeasure:
  def __init__(self, dataWord1, dataWord2):
      self.data1 = dataWord1
      self.data2 = dataWord2

  # define a function that computes cosine similarity between two words
  def cosine_similarity(self, v1, v2):
      return 1 - distance.cosine(v1, v2)

  def sim_calculation(self): # pencarian kesmaaan dokumen
      fasttext.util.download_model('en', if_exists='ignore')  # English
      ft = fasttext.load_model(pretrained_data)
      raw_text1 = [ft.get_word_vector(num) for num in self.data1]
      raw_text2 = [ft.get_word_vector(num) for num in self.data2]
      b = [[pretrainedMeasure.cosine_similarity(self, num, angka) for angka in raw_text2] for num in raw_text1]
      return b  #kesamaan berdasarkan kata pertama

if __name__ == "__main__":
    myProses1 = prosesData(dataset_var1)
    list_data1 = list(myProses1.fulldataset(dataset_index)['Requirement Statement'])
    id_data1 = list(myProses1.fulldataset(dataset_index)['ID'])
    cleaned_text1 = myProses1.apply_cleaning_function_to_list(list_data1)    

    myProses2 = prosesData(dataset_var2)
    list_data2 = list(myProses2.fulldataset(dataset_index)['Requirement Statement'])
    id_data2 = list(myProses2.fulldataset(dataset_index)['ID'])
    cleaned_text2 = myProses2.apply_cleaning_function_to_list(list_data2)    

    data_word = pretrainedMeasure(cleaned_text1, cleaned_text2).sim_calculation()
    word_df = pd.DataFrame(data_word, index= id_data1, columns= id_data2)
    print(tabulate(word_df, headers = 'keys', tablefmt = 'psql'))       

+------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+
|      |      F01 |      F02 |      F03 |      F04 |      F05 |      F06 |      F07 |      F08 |      F09 |     NF01 |     NF02 |     NF03 |     NF04 |     NF05 |     NF06 |     NF07 |     NF08 |
|------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------|
| F01  | 1        | 0.620941 | 0.663894 | 0.663342 | 0.701809 | 0.734858 | 0.558847 | 0.563464 | 0.764665 | 0.50245  | 0.671332 | 0.545056 | 0.637175 | 0.365165 | 0.67028  | 0.628573 | 0.551983 |
| F02  | 0.620941 | 1        | 0.694993 | 0.547442 | 0.617525 | 0.608526 | 0.619202 | 0.596892 | 0.744773 | 0.459781 | 0.62146  | 0.669359 | 0.737103 | 0.344321 | 0.608425 | 0.496231 | 0.492421 |
| F03  | 0.663894 | 