###Part 1: Loading and Preprocessing

In [None]:
# Install JAX.
!pip install --upgrade jax
!pip install --upgrade jaxlib
!pip install --upgrade trax

# Make sure the Colab Runtime is set to Accelerator: TPU.
import requests
import os
if 'TPU_DRIVER_MODE' not in globals():
  url = 'http://' + os.environ['COLAB_TPU_ADDR'].split(':')[0] + ':8475/requestversion/tpu_driver0.1-dev20191206'
  resp = requests.post(url)
  TPU_DRIVER_MODE = 1

# The following is required to use TPU Driver as JAX's backend.
from jax.config import config
config.FLAGS.jax_xla_backend = "tpu_driver"
config.FLAGS.jax_backend_target = "grpc://" + os.environ['COLAB_TPU_ADDR']
print(config.FLAGS.jax_backend_target)

Collecting jax
[?25l  Downloading https://files.pythonhosted.org/packages/5f/3e/b66a9f1af6fe8d4ced9bff28a5b8b95c5dd4121c0266fd273dd56fa2fceb/jax-0.2.5.tar.gz (487kB)
[K     |████████████████████████████████| 491kB 3.3MB/s 
Building wheels for collected packages: jax
  Building wheel for jax (setup.py) ... [?25l[?25hdone
  Created wheel for jax: filename=jax-0.2.5-cp36-none-any.whl size=576151 sha256=b8ed1ff5eb05cf3724fa495ecd799f4261e68023d89848d3273a61f3bf7256e8
  Stored in directory: /root/.cache/pip/wheels/d1/0d/3e/c44cadf26545452c29274cb512526d63f1840278edf59b80db
Successfully built jax
Installing collected packages: jax
  Found existing installation: jax 0.2.4
    Uninstalling jax-0.2.4:
      Successfully uninstalled jax-0.2.4
Successfully installed jax-0.2.5
Requirement already up-to-date: jaxlib in /usr/local/lib/python3.6/dist-packages (0.1.56+cuda101)
Collecting trax
[?25l  Downloading https://files.pythonhosted.org/packages/85/1d/c0a3aeed127c26a0c3f0925fc9cc7278c272e523

In [None]:
import json
import random
import numpy as np
import pandas as pd
from termcolor import colored
import pickle
import trax   
from trax import layers as tl
from trax.supervised import training
!pip list | grep trax #to check the trax installation status

trax                          1.3.6                


In [None]:
'''
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
os.chdir("/content/drive/My Drive/Colab Notebooks")
os.listdir()
'''

#https://drive.google.com/drive/folders/1Qf6JSsgn0K_jDSM2glyepiDRMrwC_p0q?usp=sharing
#download the source amino acid data file from google drive
#https://drive.google.com/file/d/1rbx8uc14DPkC1hIPt-YGMI-u2YKKjN0q/view?usp=sharing
!gdown --id 1mVLPzj6UrpBSZzOXsYRIWkwo37Ee7O3g # Download the global_area_surface glycoprotein_sample_out.pkl

# Load the sequences from the pkl file
with open("./global_area_surface glycoprotein_sample_out.pkl", 'rb') as f:
  mynewlist = pickle.load(f)    # Get all the collected data
  to_test_list = mynewlist[-100:]  # Get the future data


Downloading...
From: https://drive.google.com/uc?id=1mVLPzj6UrpBSZzOXsYRIWkwo37Ee7O3g
To: /content/global_area_surface glycoprotein_sample_out.pkl
28.7MB [00:00, 108MB/s] 


In [None]:
#download the vocab file from google drive
#https://drive.google.com/file/d/1ATh8qkDTLf_55Jf-uJrB-fWBnVL-6U24/view?usp=sharing
!gdown --id 1ATh8qkDTLf_55Jf-uJrB-fWBnVL-6U24   # Download the vocabulary file = en_8k.subword

VOCAB_FILE = 'en_8k.subword'

# vocabulary file directory
VOCAB_DIR = './'

Downloading...
From: https://drive.google.com/uc?id=1ATh8qkDTLf_55Jf-uJrB-fWBnVL-6U24
To: /content/en_8k.subword
  0% 0.00/59.3k [00:00<?, ?B/s]100% 59.3k/59.3k [00:00<00:00, 22.2MB/s]


In [None]:
def process_inputdata(data):

  #create a empty list for return
  splitted_data=[]
  for i in range(len(data)):
    temp=data[i].replace("", " ")[1: -1]
    splitted_data.append(temp)

  return splitted_data

In [None]:
# initialize empty list
untokenized_data = process_inputdata(mynewlist) 

# print the first element to check if it's the same as the one we got before
print(untokenized_data[0])
print(len(untokenized_data[0]))

M F V F L V L L P L V S S Q C V N L T T R T Q L P P A Y T N S F T R G V Y Y P D K V F R S S V L H S T Q D L F L P F F S N V T W F H A I H V S G T N G T K R F D N P V L P F N D G V Y F A S T E K S N I I R G W I F G T T L D S K T Q S L L I V N N A T N V V I K V C E F Q F C N D P F L G V Y Y H K N N K S W M E S E F R V Y S S A N N C T F E Y V S Q P F L M D L E G K Q G N F K N L R E F V F K N I D G Y F K I Y S K H T P I N L V R D L P Q G F S A L E P L V D L P I G I N I T R F Q T L L A L H R S Y L T P G D S S S G W T A G A A A Y Y V G Y L Q P R T F L L K Y N E N G T I T D A V D C A L D P L S E T K C T L K S F T V E K G I Y Q T S N F R V Q P T E S I V R F P N I T N L C P F G E V F N A T R F A S V Y A W N R K R I S N C V A D Y S V L Y N S A S F S T F K C Y G V S P T K L N D L C F T N V Y A D S F V I R G D E V R Q I A P G Q T G K I A D Y N Y K L P D D F T G C V I A W N S N N L D S K V G G N Y N Y L Y R L F R K S N L K P F E R D I S T E I Y Q A G S T P C N G V E G F N C Y F P L Q S Y G F Q P T 

## Part 2: ReformerLM Training

In [None]:
def ReformerLM(vocab_size=33000, n_layers=6, mode='train', attention_type=tl.SelfAttention):
    """
    Args: 
        vocab_size (int): size of the vocabulary
        n_layers (int): number of decoder layers
        mode (string): setting of the model which can be 'train', 'eval', or 'predict' 
        attention_type(class): attention class to use 
    Returns: 
        model (ReformerLM): a reformer language model implemented in Trax
    """    
    
    ### START CODE HERE (REPLACE INSTANCES OF 'None' WITH YOUR CODE) ###
    # initialize an instance of Trax's ReformerLM class
    model = trax.models.reformer.ReformerLM( 
        # set vocab size
        vocab_size=vocab_size,
        # set number of layers
        n_layers=n_layers,
        # set mode
        mode=mode,
        # set attention type
        attention_type=attention_type
    )
    
    ### END CODE HERE ###
    return model

## Part 3: Decode from a pretrained model

In [None]:
#Download the model
#https://drive.google.com/file/d/1-I09p-wCVpQun6iU-H6kKlOTUI3iWdPK/view?usp=sharing
!gdown --id 1-I09p-wCVpQun6iU-H6kKlOTUI3iWdPK

Downloading...
From: https://drive.google.com/uc?id=1-I09p-wCVpQun6iU-H6kKlOTUI3iWdPK
To: /content/model.pkl.gz
656MB [00:07, 87.8MB/s]


In [None]:
# define the `predict_mem_len` and `predict_drop_len` of tl.SelfAttention
def attention(*args, **kwargs):
    # number of input positions to remember in a cache when doing fast inference. 
    kwargs['predict_mem_len'] = 1200
    # number of input elements to drop once the fast inference input cache fills up.
    kwargs['predict_drop_len'] = 1200
    # return the attention layer with the parameters defined above
    return tl.SelfAttention(*args, **kwargs)

# define the model using the ReformerLM function you implemented earlier.
model = ReformerLM(
    vocab_size=33000,
    n_layers=6,
    mode='predict',
    attention_type=attention,
)

# define an input signature so we can initialize our model. shape will be (1, 1) and the data type is int32.
shape11 = trax.shapes.ShapeDtype((1, 1), dtype=np.int32)

In [None]:
# initialize from file
model.init_from_file('./model.pkl.gz', weights_only=True, input_signature=shape11)

# save the starting state
STARTING_STATE = model.state

In [None]:
def tokenize(sentence, vocab_file, vocab_dir):
    return list(trax.data.tokenize(iter([sentence]), vocab_file=vocab_file, vocab_dir=vocab_dir))[0]

def detokenize(tokens, vocab_file, vocab_dir):
    return trax.data.detokenize(tokens, vocab_file=vocab_file, vocab_dir=vocab_dir)

In [None]:
def ReformerLM_output_gen(ReformerLM, start_sentence, vocab_file, vocab_dir, temperature):
    """
    Args:
        ReformerLM:  the Reformer language model you just trained
        start_sentence (string): starting sentence of the conversation
        vocab_file (string): vocabulary filename
        vocab_dir (string): directory of the vocabulary file
        temperature (float): parameter for sampling ranging from 0.0 to 1.0.
            0.0: same as argmax, always pick the most probable token
            1.0: sampling from the distribution (can sometimes say random things)

    Returns:
        generator: yields the next symbol generated by the model
    """
    
    ### START CODE HERE (REPLACE INSTANCES OF 'None' WITH YOUR CODE) ###
    
    # Create input tokens using the the tokenize function
    input_tokens = tokenize(start_sentence, vocab_file=vocab_file, vocab_dir=vocab_dir)
    
    # Add batch dimension to array. Convert from (n,) to (x, n) where 
    # x is the batch size. Default is 1. (hint: you can use np.expand_dims() with axis=0)
    input_tokens_with_batch = np.array(input_tokens)[None, :]
    
    # call the autoregressive_sample_stream function from trax
    output_gen = trax.supervised.decoding.autoregressive_sample_stream( 
        # model
        ReformerLM,
        # inputs will be the tokens with batch dimension
        inputs=input_tokens_with_batch,
        # temperature
        temperature=temperature
    )
    
    ### END CODE HERE ###
    
    return output_gen

In [None]:
shape11 = trax.shapes.ShapeDtype((1, 1), dtype=np.int32)

def attention(*args, **kwargs):
    kwargs['predict_mem_len'] = 1400  # max length for predictions
    kwargs['predict_drop_len'] = 1400  # never drop old stuff
    return tl.SelfAttention(*args, **kwargs)

model = ReformerLM(
    vocab_size=33000,
    n_layers=6,
    mode='predict',
    attention_type=attention,
)

In [None]:
model.init_from_file('./model.pkl.gz', weights_only=True, input_signature=shape11)

STARTING_STATE = model.state

In [None]:
def generate_dialogue(ReformerLM, model_state, start_sentence, vocab_file, vocab_dir, max_len, temperature):
    """
    Args:
        ReformerLM:  the Reformer language model you just trained
        model_state (np.array): initial state of the model before decoding
        start_sentence (string): starting sentence of the conversation
        vocab_file (string): vocabulary filename
        vocab_dir (string): directory of the vocabulary file
        max_len (int): maximum number of tokens to generate 
        temperature (float): parameter for sampling ranging from 0.0 to 1.0.
            0.0: same as argmax, always pick the most probable token
            1.0: sampling from the distribution (can sometimes say random things)

    Returns:
        generator: yields the next symbol generated by the model
    """  
    
    # define the delimiters we used during training
    delimiter_1 = 'Person 1: ' 
    delimiter_2 = 'Person 2: '
    
    # initialize detokenized output
    sentence = ''
    
    # token counter
    counter = 0
    
    # output tokens. we insert a ': ' for formatting
    
    result = []
    
    # reset the model state when starting a new dialogue
    ReformerLM.state = model_state
    
    # calls the output generator implemented earlier
    output = ReformerLM_output_gen(ReformerLM, start_sentence, vocab_file=VOCAB_FILE, vocab_dir=VOCAB_DIR, temperature=temperature)
    
    # print the starting sentence
    print('The given starting sequence is', start_sentence.split(delimiter_2)[0].strip())
    
    # loop below yields the next tokens until max_len is reached. the if-elif is just for prettifying the output.
    for o in output:
        
        result.append(o)
        
        sentence = detokenize(np.concatenate(result, axis=0), vocab_file=VOCAB_FILE, vocab_dir=VOCAB_DIR)
        counter += 1
        
        if counter > max_len:
            break    
    print(start_sentence+sentence)
    return start_sentence+' '+sentence
    

## Part 4: Compare 100 prediction result to 100 future data

In [None]:
# Get the predict sequences that we predicted before
#https://drive.google.com/file/d/1lWtjU_yS8SfloH3fQleZ7igGL-h1by3Q/view?usp=sharing
!gdown --id 1lWtjU_yS8SfloH3fQleZ7igGL-h1by3Q  # Download the pre_predicted_sequences.txt file
get_file = open('pre_predicted_sequences.txt')
process_file = get_file.read()
result_get = process_file.split("\"")
result_get.remove("")

Downloading...
From: https://drive.google.com/uc?id=1lWtjU_yS8SfloH3fQleZ7igGL-h1by3Q
To: /content/pre_predicted_sequences.txt
  0% 0.00/255k [00:00<?, ?B/s]100% 255k/255k [00:00<00:00, 45.5MB/s]


In [None]:
# Compute the comparison of 1 predicted sequence to  100 future data
def compute_comp_1_to_100(seq_predct,seq_future,expect_percentage):
  """
  Args:
      seq_predct        : A predicted sequence array
      seq_future        : A list of multiple future sequences 
      expect_percentage : The expect similarity percentage defined by end user
  """

  # Declare the variable need to use
  final_simli=0
  score_store = np.empty((1,0),float)              # Store the similarity score
  non_simli = np.empty((0,len(seq_predct)), bool)       # To store the not similiar (mutation)
  future_non_same = np.empty((0,len(seq_future[0])), bool)   # 
  future_non_same.astype(str)
  i = 0

  np_seq_predct = np.array(list(seq_predct))          # Split predicted sequence into numpy array

  for want_comp_seq in seq_future:

    np_seq_future = np.array(list(want_comp_seq))      # Split future sequence into numpy array
    
    # Compare the length of predicted sequence and future sequences
    if len(seq_predct) != len(want_comp_seq):
      print("The length of sequences to compare is different")
      break

    np_comp = np_seq_predct == np_seq_future      # Compare the two sequences by using the numpy array feature
    count_true = np.count_nonzero(np_comp==True)     # Find the same location
    count_false = np.count_nonzero(np_comp==False)    # Find the difference location
    score = count_true/len(want_comp_seq)        # Calculate the similarity score
    score_store = np.append(score_store, score)     # store the score into numpy array

    # Update the highest similarity score
    if final_simli < score:  # The final_simili is not the same for next
      final_simli = score

  # Print the message if the highest similarity score fulfill the expected percentage
  if final_simli >= expect_percentage: 
    print("The similarity of currrent predict sequences and future sequences is", "{:.3%}".format(final_simli))
  else:
    print('The similarity of current predict sequences and future data is', "{:.3%}".format(final_simli), 'less than', "{:.3%}".format(expect_percentage))
  
  
  # If the highest similarity score is fulfill the expected percentage and not equal to 100% 
  if final_simli >= expect_percentage and final_simli < 1:
    test1 = np.where(score_store==np.amax(score_store))

    # The loop to get the unmatch location information for Predicted sequences and Future Sequences
    for loop1 in test1[0]:
      np_compare_false = np_seq_predct == np.array(list(seq_future[loop1])) # Find out the match and unmatch location between the predicted sequence and future sequence one by one
      np_where_false = np.where(np_compare_false==False)            # Find out the locations are unmatch between predicted sequence and future sequence
      non_simli = np.vstack((non_simli, np_compare_false))          # Store the unmatch location into a numpy array "non_simli"
      future_non_same = np.vstack((future_non_same, list(seq_future[loop1]))) # Store the future sequences

    test2 = np.where(non_simli[0]==False)
    counter1 = future_non_same.shape[0]
    # Print the unmatch location
    for loop2 in range(future_non_same.shape[0]):
      print('\nFor the case No.', loop2+1, ', locations unmatch are listed below:')
      for loop3 in test2[0]:
        print('In the location', loop3, ': the sequence \"',np_seq_predct[loop3], '\" <-> \"', future_non_same[loop2][loop3],'\"')


In [None]:
def process_outputdata(data):

  new=data.replace(" ","")[:]
    
  return new

In [None]:
# Start validation of 100 predicted sequences compare to 100 future data

for loop_number in range(len(result_get)):
  print('Here is the No.', loop_number+1, 'predicted sequence compare to 100 future data')
  compute_comp_1_to_100(process_outputdata(result_get[loop_number]), to_test_list, 0.95)
  print("-----------------------------------------------------------------------------------------------------------------------------")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
In the location 1094 : the sequence " V " <-> " F "
In the location 1095 : the sequence " G " <-> " V "
In the location 1096 : the sequence " T " <-> " S "
In the location 1097 : the sequence " G " <-> " N "
In the location 1253 : the sequence " K " <-> " C "
In the location 1254 : the sequence " F " <-> " K "
In the location 1255 : the sequence " D " <-> " F "

For the case No. 46 , locations unmatch are listed below:
In the location 258 : the sequence " S " <-> " T "
In the location 476 : the sequence " P " <-> " N "
In the location 477 : the sequence " S " <-> " T "
In the location 900 : the sequence " F " <-> " Q "
In the location 901 : the sequence " A " <-> " M "
In the location 902 : the sequence " Y " <-> " A "
In the location 903 : the sequence " R " <-> " Y "
In the location 904 : the sequence " Q " <-> " R "
In the location 905 : the sequence " V " <-> " F "
In the location 941 : the sequence " G " <-> " A "
In

## Example Application

In [None]:
# To predict the sequence by given one starting character and temperature = 0.0
import time
start = time.time()

sample_sentence = 'M'
predict_sequence_1 = generate_dialogue(ReformerLM=model, model_state=STARTING_STATE, start_sentence=sample_sentence, vocab_file=VOCAB_FILE, vocab_dir=VOCAB_DIR, max_len=1334, temperature=0.0)

print('Time taken to predict this sequences is', time.time()-start)

print('The predicted sequences =', predict_sequence_1)
print('The Length of the predict sequences =',len(predict_sequence_1))

The given starting sequence is M
MF V F L V L L P L V S S Q C V N L T T R T Q L P P A Y T N S F T R G V Y Y P D K V F R S S V L H S T Q D L F L P F F S N V T W F H A I H V S G T N G T K R F D N P V L P F N D G V Y F A S T E K S N I I R G W I F G T T L D S K T Q S L L I V N N A T N V V I K V C E F Q F C N D P F L G V Y Y H K N N K S W M E S E F R V Y S S A N N C T F E Y V S Q P F L M D L E G K Q G N F K N L R E F V F K N I D G Y F K I Y S K H T P I N L V R D L P Q G F S A L E P L V D L P I G I N I T R F Q T L L A L H R S Y L T P G D S S S G W T A G A A A Y Y V G Y L Q P R T F L L K Y N E N G T I T D A V D C A L D P L S E T K C T L K S F T V E K G I Y Q T S N F R V Q P T E S I V R F P N I T N L C P F G E V F N A T R F A S V Y A W N R K R I S N C V A D Y S V L Y N S A S F S T F K C Y G V S P T K L N D L C F T N V Y A D S F V I R G D E V R Q I A P G Q T G K I A D Y N Y K L P D D F T G C V I A W N S N N L D S K V G G N Y N Y L Y R L F R K S N L K P F E R D I S T E I Y Q A G S T P C N G V E 

In [None]:
# To predict the sequence by given one starting character and temperature = 1.0
import time
start = time.time()

sample_sentence = 'M'
predict_sequence_2 = generate_dialogue(ReformerLM=model, model_state=STARTING_STATE, start_sentence=sample_sentence, vocab_file=VOCAB_FILE, vocab_dir=VOCAB_DIR, max_len=1334, temperature=1.0)

print('Time taken to predict this sequences is', time.time()-start)

print('The predicted sequences =', predict_sequence_2)
print('The Length of the predict sequences =',len(predict_sequence_2))

In [None]:
# To predict the sequence bu given multiple starting characters and temperature = 0.0
import time
start = time.time()

sample_sentence = 'M F V F L V L L'
predict_sequence_3 = generate_dialogue(ReformerLM=model, model_state=STARTING_STATE, start_sentence=sample_sentence, vocab_file=VOCAB_FILE, vocab_dir=VOCAB_DIR, max_len=1334, temperature=0.0)

print('Time taken to predict this sequences is', time.time()-start)

print('The predicted sequences =', predict_sequence_3)
print('The Length of the predict sequences =',len(predict_sequence_3))

In [None]:
# To predict the model with one starting character and temperature = 1.0
import time
start = time.time()

sample_sentence = 'M'
predict_sequence_4 = generate_dialogue(ReformerLM=model, model_state=STARTING_STATE, start_sentence=sample_sentence, vocab_file=VOCAB_FILE, vocab_dir=VOCAB_DIR, max_len=1334, temperature=1.0)

print(time.time()-start)

print('The predicted sequences =', predict_sequence_4)
print('The Length of the predict sequences =',len(predict_sequence_4))

In [None]:
def compute_similarity(seq_1, seq_2):
    """
    Computes the percent similarity between two sequences ignoring gaps.

    Parameters
    ----------
    seq_1, seq_2 : strings
        DNA sequences to compare. These must be the same length.

    Returns
    -------
    score : float
        The percent similarity between the two sequences.
    """
    # Make sure they are the same length.
    if len(seq_1) != len(seq_2):
        raise ValueError('Sequences must be the same length!')

    # Make both sequences lowercase.
    seq_1 = seq_1.lower()
    seq_2 = seq_2.lower()

    # Set up counters of length and similarity.
    comp_length = 0
    num_sim = 0

    # Iterate through each position in the sequences.
    for base in range(len(seq_1)):

        # Ensure we are not comparing gaps.
        if (seq_1[base] != 'z') or (seq_2[base] != 'z'):

            # Increase the counter for compared length.
            comp_length += 1

            # Compare the two positions.
            if seq_1[base] == seq_2[base]:


                # Increase the similarity counter.
                num_sim += 1
            else:
              print("In location ", base+1, ", the sequences \"",seq_1[base].upper(), "\" <-> \"", seq_2[base].upper(),"\"")

    # Compute and return the percent similarity.
    score = num_sim / comp_length


    return score

In [None]:
testoutput = process_outputdata(result_get[0])

In [None]:
testoutput = process_outputdata(result_get[1])

In [None]:
testoutput = process_outputdata(result_get[2])

In [None]:
testoutput = process_outputdata(result_get[3])

In [None]:
# To compute how many predicted sequences are match 100% with the future data
true_positive=0
i=1
analysis_per=[]
for a in to_test_list:
  print("\nFor the", i, "Comparision:")
  sco = compute_similarity(testoutput,a)
  analysis_per.append(sco)
  if sco==1:
    true_positive += 1
  print("The similarity percentage for", i, "comparision = ", "{:.3%}".format(sco))
  i+=1

print("\nBased on our prediction, our prediction result got", true_positive, "case match 100% with the 100 future records")


For the 1 Comparision:
In location  259 , the sequences " S " <-> " T "
In location  477 , the sequences " P " <-> " N "
In location  478 , the sequences " S " <-> " T "
In location  901 , the sequences " F " <-> " Q "
In location  902 , the sequences " A " <-> " M "
In location  903 , the sequences " Y " <-> " A "
In location  904 , the sequences " R " <-> " Y "
In location  905 , the sequences " Q " <-> " R "
In location  906 , the sequences " V " <-> " F "
In location  942 , the sequences " G " <-> " A "
In location  1078 , the sequences " T " <-> " A "
In location  1079 , the sequences " A " <-> " P "
In location  1080 , the sequences " I " <-> " A "
In location  1081 , the sequences " C " <-> " I "
In location  1082 , the sequences " H " <-> " C "
In location  1083 , the sequences " D " <-> " H "
In location  1084 , the sequences " G " <-> " D "
In location  1085 , the sequences " K " <-> " G "
In location  1086 , the sequences " G " <-> " K "
In location  1087 , the sequences " 

In [None]:
# Compute the similarity rate of predict sequences and future data 
per_case=analysis_per
per_case.sort()

print('From the previous comparision between the predict sequence and 100 future data, we found that:')
while per_case:
    tem = per_case[0]
    a = per_case.count(tem)
    #print(analysis_per)
    print("The similarity percentage of", "{:.3%}".format(tem), "have total", a, "case")

    # To remove the counted item in list
    for i in list(per_case):
        if i == tem:
            per_case.remove(tem)

From the previous comparision between the predict sequence and 100 future data, we found that:
The similarity percentage of 91.045% have total 1 case
The similarity percentage of 92.302% have total 1 case
The similarity percentage of 92.616% have total 1 case
The similarity percentage of 93.559% have total 1 case
The similarity percentage of 93.951% have total 1 case
The similarity percentage of 94.423% have total 1 case
The similarity percentage of 94.501% have total 1 case
The similarity percentage of 94.658% have total 1 case
The similarity percentage of 94.973% have total 1 case
The similarity percentage of 95.130% have total 1 case
The similarity percentage of 95.208% have total 1 case
The similarity percentage of 95.758% have total 1 case
The similarity percentage of 96.072% have total 1 case
The similarity percentage of 96.465% have total 1 case
The similarity percentage of 96.544% have total 1 case
The similarity percentage of 97.015% have total 1 case
The similarity percentage