In [1]:
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")
model = AutoModel.from_pretrained("nlpaueb/legal-bert-base-uncased", output_hidden_states = True)

Some weights of the model checkpoint at nlpaueb/legal-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
import re
import os
import string
from collections import defaultdict, Counter
import numpy as np
import pandas as pd
import torch

In [3]:
qrels = pd.read_csv("D:\Courses\Sem 7 2021-22\COL764\COL764 Project\FIRE2017-IRLeD-track-data\Task_2\irled-qrel.txt", sep = ' ', header=None)

In [4]:
def get_word_indeces(tokenizer, text, word):
    '''
    Determines the index or indeces of the tokens corresponding to `word`
    within `text`. `word` can consist of multiple words, e.g., "cell biology".
    
    Determining the indeces is tricky because words can be broken into multiple
    tokens. I've solved this with a rather roundabout approach--I replace `word`
    with the correct number of `[MASK]` tokens, and then find these in the 
    tokenized result. 
    '''
    # Tokenize the 'word'--it may be broken into multiple tokens or subwords.
    word_tokens = tokenizer.tokenize(word)

    # Create a sequence of `[MASK]` tokens to put in place of `word`.
    masks_str = ' '.join(['[MASK]']*len(word_tokens))

    # Replace the word with mask tokens.
    text_masked = text.replace(word, masks_str)

    # `encode` performs multiple functions:
    #   1. Tokenizes the text
    #   2. Maps the tokens to their IDs
    #   3. Adds the special [CLS] and [SEP] tokens.
    input_ids = tokenizer.encode(text_masked)

    # Use numpy's `where` function to find all indeces of the [MASK] token.
    mask_token_indeces = np.where(np.array(input_ids) == tokenizer.mask_token_id)[0]

    return mask_token_indeces


In [10]:
def get_embedding(b_model, b_tokenizer, text, word=''):
    '''
    Uses the provided model and tokenizer to produce an embedding for the
    provided `text`, and a "contextualized" embedding for `word`, if provided.
    '''

    # If a word is provided, figure out which tokens correspond to it.
    if not word == '':
        word_indeces = get_word_indeces(b_tokenizer, text, word)

    # Encode the text, adding the (required!) special tokens, and converting to
    # PyTorch tensors.
    encoded_dict = b_tokenizer.encode_plus(
                        text,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        return_tensors = 'pt',  # Return pytorch tensors.
                        max_length = 512, 
                        truncation = True
                )

    input_ids = encoded_dict['input_ids']
    
    b_model.eval()

    # Run the text through the model and get the hidden states.
    bert_outputs = b_model(input_ids)
    
    # Run the text through BERT, and collect all of the hidden states produced
    # from all 12 layers. 
    with torch.no_grad():

        outputs = b_model(input_ids)

        # Evaluating the model will return a different number of objects based on 
        # how it's  configured in the `from_pretrained` call earlier. In this case, 
        # becase we set `output_hidden_states = True`, the third item will be the 
        # hidden states from all layers. See the documentation for more details:
        # https://huggingface.co/transformers/model_doc/bert.html#bertmodel
        hidden_states = outputs[2]

    # `hidden_states` has shape [13 x 1 x <sentence length> x 768]

    # Select the embeddings from the second to last layer.
    # `token_vecs` is a tensor with shape [<sent length> x 768]
    token_vecs = hidden_states[-2][0]

    # Calculate the average of all token vectors.
    sentence_embedding = torch.mean(token_vecs, dim=0)
    
    # Convert to numpy array.
    sentence_embedding = sentence_embedding.detach().numpy()

    # If `word` was provided, compute an embedding for those tokens.
    if not word == '':
        # Take the average of the embeddings for the tokens in `word`.
        word_embedding = torch.mean(token_vecs[word_indeces], dim=0)

        # Convert to numpy array.
        word_embedding = word_embedding.detach().numpy()
    
        return (sentence_embedding, word_embedding)
    else:
        return sentence_embedding

In [11]:
text = 'fbfkjb\nnflknf\nuid'
embedding = get_embedding(model, tokenizer, text, word='')

In [104]:
text2 = 'gleeful'
embedding2 = get_embedding(model, tokenizer, text2, word='')

In [105]:
np.inner(embedding, embedding2) / (np.linalg.norm(embedding) * np.linalg.norm(embedding2))

0.73351175

In [12]:
def sentence_embeddings(filename, model, tokenizer):
    corpus_files = os.listdir(filename)
    
    prior_vec = []
    
    indexes_of_doc = defaultdict(lambda:0)
    
    
    count = 0
    for corpus_file in corpus_files:
        filepath = os.path.join(filename, corpus_file)
        
        indexes_of_doc[count] = corpus_file[:-4]
        print(corpus_file)
        with open(filepath, 'r') as f:
            doc_text = f.readlines()
            
        doc_text = ''.join(doc_text)
        
        doc_vec = get_embedding(model, tokenizer, doc_text, word='')
        
        prior_vec.append(doc_vec)

            
            
        count +=1
    
    return prior_vec , indexes_of_doc

In [13]:
sent_embeddings, indexes_of_doc = sentence_embeddings("D:\Courses\Sem 7 2021-22\COL764\COL764 Project\FIRE2017-IRLeD-track-data\Task_2\Prior_Cases", model, tokenizer)

prior_case_0001.txt
prior_case_0002.txt
prior_case_0003.txt
prior_case_0004.txt
prior_case_0005.txt
prior_case_0006.txt
prior_case_0007.txt
prior_case_0008.txt
prior_case_0009.txt
prior_case_0010.txt
prior_case_0011.txt
prior_case_0012.txt
prior_case_0013.txt
prior_case_0014.txt
prior_case_0015.txt
prior_case_0016.txt
prior_case_0017.txt
prior_case_0018.txt
prior_case_0019.txt
prior_case_0020.txt
prior_case_0021.txt
prior_case_0022.txt
prior_case_0023.txt
prior_case_0024.txt
prior_case_0025.txt
prior_case_0026.txt
prior_case_0027.txt
prior_case_0028.txt
prior_case_0029.txt
prior_case_0030.txt
prior_case_0031.txt
prior_case_0032.txt
prior_case_0033.txt
prior_case_0034.txt
prior_case_0035.txt
prior_case_0036.txt
prior_case_0037.txt
prior_case_0038.txt
prior_case_0039.txt
prior_case_0040.txt
prior_case_0041.txt
prior_case_0042.txt
prior_case_0043.txt
prior_case_0044.txt
prior_case_0045.txt
prior_case_0046.txt
prior_case_0047.txt
prior_case_0048.txt
prior_case_0049.txt
prior_case_0050.txt


prior_case_0411.txt
prior_case_0412.txt
prior_case_0413.txt
prior_case_0414.txt
prior_case_0415.txt
prior_case_0416.txt
prior_case_0417.txt
prior_case_0418.txt
prior_case_0419.txt
prior_case_0420.txt
prior_case_0421.txt
prior_case_0422.txt
prior_case_0423.txt
prior_case_0424.txt
prior_case_0425.txt
prior_case_0426.txt
prior_case_0427.txt
prior_case_0428.txt
prior_case_0429.txt
prior_case_0430.txt
prior_case_0431.txt
prior_case_0432.txt
prior_case_0433.txt
prior_case_0434.txt
prior_case_0435.txt
prior_case_0436.txt
prior_case_0437.txt
prior_case_0438.txt
prior_case_0439.txt
prior_case_0440.txt
prior_case_0441.txt
prior_case_0442.txt
prior_case_0443.txt
prior_case_0444.txt
prior_case_0445.txt
prior_case_0446.txt
prior_case_0447.txt
prior_case_0448.txt
prior_case_0449.txt
prior_case_0450.txt
prior_case_0451.txt
prior_case_0452.txt
prior_case_0453.txt
prior_case_0454.txt
prior_case_0455.txt
prior_case_0456.txt
prior_case_0457.txt
prior_case_0458.txt
prior_case_0459.txt
prior_case_0460.txt


prior_case_0821.txt
prior_case_0822.txt
prior_case_0823.txt
prior_case_0824.txt
prior_case_0825.txt
prior_case_0826.txt
prior_case_0827.txt
prior_case_0828.txt
prior_case_0829.txt
prior_case_0830.txt
prior_case_0831.txt
prior_case_0832.txt
prior_case_0833.txt
prior_case_0834.txt
prior_case_0835.txt
prior_case_0836.txt
prior_case_0837.txt
prior_case_0838.txt
prior_case_0839.txt
prior_case_0840.txt
prior_case_0841.txt
prior_case_0842.txt
prior_case_0843.txt
prior_case_0844.txt
prior_case_0845.txt
prior_case_0846.txt
prior_case_0847.txt
prior_case_0848.txt
prior_case_0849.txt
prior_case_0850.txt
prior_case_0851.txt
prior_case_0852.txt
prior_case_0853.txt
prior_case_0854.txt
prior_case_0855.txt
prior_case_0856.txt
prior_case_0857.txt
prior_case_0858.txt
prior_case_0859.txt
prior_case_0860.txt
prior_case_0861.txt
prior_case_0862.txt
prior_case_0863.txt
prior_case_0864.txt
prior_case_0865.txt
prior_case_0866.txt
prior_case_0867.txt
prior_case_0868.txt
prior_case_0869.txt
prior_case_0870.txt


prior_case_1231.txt
prior_case_1232.txt
prior_case_1233.txt
prior_case_1234.txt
prior_case_1235.txt
prior_case_1236.txt
prior_case_1237.txt
prior_case_1238.txt
prior_case_1239.txt
prior_case_1240.txt
prior_case_1241.txt
prior_case_1242.txt
prior_case_1243.txt
prior_case_1244.txt
prior_case_1245.txt
prior_case_1246.txt
prior_case_1247.txt
prior_case_1248.txt
prior_case_1249.txt
prior_case_1250.txt
prior_case_1251.txt
prior_case_1252.txt
prior_case_1253.txt
prior_case_1254.txt
prior_case_1255.txt
prior_case_1256.txt
prior_case_1257.txt
prior_case_1258.txt
prior_case_1259.txt
prior_case_1260.txt
prior_case_1261.txt
prior_case_1262.txt
prior_case_1263.txt
prior_case_1264.txt
prior_case_1265.txt
prior_case_1266.txt
prior_case_1267.txt
prior_case_1268.txt
prior_case_1269.txt
prior_case_1270.txt
prior_case_1271.txt
prior_case_1272.txt
prior_case_1273.txt
prior_case_1274.txt
prior_case_1275.txt
prior_case_1276.txt
prior_case_1277.txt
prior_case_1278.txt
prior_case_1279.txt
prior_case_1280.txt


prior_case_1641.txt
prior_case_1642.txt
prior_case_1643.txt
prior_case_1644.txt
prior_case_1645.txt
prior_case_1646.txt
prior_case_1647.txt
prior_case_1648.txt
prior_case_1649.txt
prior_case_1650.txt
prior_case_1651.txt
prior_case_1652.txt
prior_case_1653.txt
prior_case_1654.txt
prior_case_1655.txt
prior_case_1656.txt
prior_case_1657.txt
prior_case_1658.txt
prior_case_1659.txt
prior_case_1660.txt
prior_case_1661.txt
prior_case_1662.txt
prior_case_1663.txt
prior_case_1664.txt
prior_case_1665.txt
prior_case_1666.txt
prior_case_1667.txt
prior_case_1668.txt
prior_case_1669.txt
prior_case_1670.txt
prior_case_1671.txt
prior_case_1672.txt
prior_case_1673.txt
prior_case_1674.txt
prior_case_1675.txt
prior_case_1676.txt
prior_case_1677.txt
prior_case_1678.txt
prior_case_1679.txt
prior_case_1680.txt
prior_case_1681.txt
prior_case_1682.txt
prior_case_1683.txt
prior_case_1684.txt
prior_case_1685.txt
prior_case_1686.txt
prior_case_1687.txt
prior_case_1688.txt
prior_case_1689.txt
prior_case_1690.txt


In [14]:
def findall(p, s):

    i = s.find(p)
    while i != -1:
        yield i
        
        i = s.find(p, i+1)
def process_query(doc):
    citation_marker = "[?CITATION?]"
    
    return findall(citation_marker, doc)

In [15]:
# function to get the query text by selecting region around the marker, how much to take?

# feedback: why construct the whole list ls1 and ls2, rather find one by one upto 40 spaces on either sides.
# i guess it would be more efficient.
def process_markers(s, i, p, length = 100):
    
    ls1 = [(a.start(), a.end()) for a in list(re.finditer(' ', s[:i]))]
    
    # another hyper-parameter,take max 40 spaces
    index1 = min(length, len(ls1))
    

    

    # find spaces
    ls2 = [(a.start(), a.end()) for a in list(re.finditer(' ', s[i + len(p):]))]
    
    # another hyper-parameter,take max 40 spaces
    index2 = min(length - 1, len(ls2)-1)
    
    
    # only considering spaces for now
    # may be later add logic for full stops and \n chars.
    return s[:i][ls1[-index1][0]:] + s[i+ len(p):][0: ls2[index2][0]]


def process_queries(filename, length = 100):
    query_files = os.listdir(filename)
    
    case_queries = defaultdict(lambda:[])
    
    for query_file in query_files:
        
        filepath = os.path.join(filename, query_file)
        
        
        
        with open(filepath, 'r') as f:
            doc_text = f.readlines() 
            doc_text = ''.join(doc_text)
            
        citation_marker_indices = list(process_query(doc_text))
        
        print(query_file, len(citation_marker_indices))
        
        for index in citation_marker_indices:
            query_text = process_markers(doc_text, index, "[?CITATION?]", length = length)
            
            case_queries[query_file[:-4]].append(query_text)
    
    return case_queries


def process_full_queries(filename):
    
    query_files = os.listdir(filename)
    
    case_queries = defaultdict(lambda: '')
    
    for query_file in query_files:
        filepath = os.path.join(filename, query_file)
        
        with open(filepath, 'r') as f:
            doc_text = f.readlines() 
            doc_text = ''.join(doc_text)
            
        case_queries[query_file[:-4]] = doc_text
            
    return case_queries
    


In [87]:
# remove the pattern "27\."
queries = process_queries("D:\Courses\Sem 7 2021-22\COL764\COL764 Project\FIRE2017-IRLeD-track-data\Task_2\Current_Cases", length = 400)

current_case_0001.txt 8
current_case_0002.txt 15
current_case_0003.txt 19
current_case_0004.txt 32
current_case_0005.txt 15
current_case_0006.txt 13
current_case_0007.txt 26
current_case_0008.txt 27
current_case_0009.txt 13
current_case_0010.txt 37
current_case_0011.txt 11
current_case_0012.txt 20
current_case_0013.txt 30
current_case_0014.txt 37
current_case_0015.txt 10
current_case_0016.txt 10
current_case_0017.txt 77
current_case_0018.txt 16
current_case_0019.txt 19
current_case_0020.txt 10
current_case_0021.txt 13
current_case_0022.txt 29
current_case_0023.txt 28
current_case_0024.txt 13
current_case_0025.txt 14
current_case_0026.txt 17
current_case_0027.txt 5
current_case_0028.txt 23
current_case_0029.txt 22
current_case_0030.txt 8
current_case_0031.txt 22
current_case_0032.txt 25
current_case_0033.txt 12
current_case_0034.txt 8
current_case_0035.txt 10
current_case_0036.txt 7
current_case_0037.txt 37
current_case_0038.txt 15
current_case_0039.txt 26
current_case_0040.txt 15
curre

In [17]:
def sim(a, b, metric = 'cosine'):
    if(metric == 'cosine'):
        return np.inner(a,b)/(np.linalg.norm(a) * np.linalg.norm(b))
    elif(metric == 'euclidean'):
        return np.abs(np.linalg.norm(a-b))
def similarity_scores(prior_vec, doc_vec, metric = 'cosine'):
    sim_scores = []
    for vec in prior_vec:
        sim_scores.append(sim(vec, doc_vec, metric = metric))
        
    return sim_scores
        
        
def bert_query(queryname, model, tokenizer, metric = 'cosine'):
    
    query_scores_doc = defaultdict(lambda:0)
    
    for query in queries[queryname]:
        
        doc_vec = get_embedding(model, tokenizer, query , word='')
        
        doc_scores = similarity_scores(sent_embeddings, doc_vec, metric = metric)

        indices = np.argsort(doc_scores)[::-1]
        
        values = [doc_scores[i] for i in indices]
        
        
        for i in range(len(indices)):
            index = indices[i]
            score = values[i]
            
            query_scores_doc[index] = query_scores_doc[index]  if query_scores_doc[index] > score else score

     
    results = sorted(query_scores_doc.items(), key = lambda x: x[1], reverse = True)
    
    
    rankings = []
    
    for result in results:
        index = result[0]
        rankings.append(indexes_of_doc[index])

    return rankings




In [18]:
def AP(qrel, rankings, queryname):
    relevant_docs = qrels[qrels[0] == queryname][2].values
    
    
    relevant_docs_retrieved = 0
    
    precision = []
    for i in range(len(rankings)):
        doc = rankings[i]
        docs_retrieved = i + 1
        
        if doc in relevant_docs:
            relevant_docs_retrieved +=1
            
            precision.append(relevant_docs_retrieved/docs_retrieved)
            
    
    if(len(precision) == 0):
        return 0, 0
        
    
    return np.sum(precision)/5, len(precision)


def MRR(qrel,  rankings, queryname):
    relevant_docs = qrels[qrels[0] == queryname][2].values
    
    rec = 0
    for i in range(len(rankings)):
        doc = rankings[i]
        if doc in relevant_docs:
            return 1/(i+1)
    
    return rec

def P_at_10(qrel,  rankings, queryname):
    relevant_docs = qrels[qrels[0] == queryname][2].values
    
    relevant = 0
    for i in range(10):
        doc = rankings[i]
        if doc in relevant_docs:
            relevant +=1
    
    return relevant/10
    

In [19]:
def obtain_relevant_docs(queries, model, tokenizer, flag = 1):
    
    querynames = queries.keys()
    
    
    list_AP = []
    list_MRR = []
    list_P10 = []
    for queryname in querynames:
        if(flag == 1):
            rankings = bert_query(queryname, model, tokenizer, metric = 'cosine')
        elif(flag == 2):
            rankings = bert_query(queryname, model, tokenizer, metric = 'euclidean')
        
        ap = AP(qrels, rankings, queryname)
        
        mrr = MRR(qrels, rankings, queryname)
        
        p10 = P_at_10(qrels, rankings, queryname)
        
        print(queryname, ap, mrr, p10)
        
        list_AP.append(ap[0])
        list_MRR.append(mrr)
        list_P10.append(p10)
        
    return np.mean(list_AP), np.mean(list_MRR) , np.mean(list_P10)

In [192]:
obtain_relevant_docs(queries, model, tokenizer, flag = 1)

current_case_0001 (0.005409558635690176, 5) 0.003125 0.0
current_case_0002 (0.00706606626106405, 5) 0.01694915254237288 0.0
current_case_0003 (0.01851944572332112, 5) 0.023809523809523808 0.0
current_case_0004 (0.03574957461005579, 5) 0.1 0.1
current_case_0005 (0.02748846464767496, 5) 0.1111111111111111 0.1
current_case_0006 (0.002555667707299802, 5) 0.0037593984962406013 0.0
current_case_0007 (0.010117518144384986, 5) 0.02631578947368421 0.0
current_case_0008 (0.12256412306882487, 5) 0.5 0.1
current_case_0009 (0.00535455544038114, 5) 0.00546448087431694 0.0
current_case_0010 (0.0075422263905766925, 5) 0.006802721088435374 0.0
current_case_0011 (0.10859600648835992, 5) 0.5 0.1
current_case_0012 (0.012174406918596, 5) 0.015625 0.0
current_case_0013 (0.02002419794049328, 5) 0.023255813953488372 0.0
current_case_0014 (0.022285275336497438, 5) 0.041666666666666664 0.0
current_case_0015 (0.20331329152852967, 5) 1.0 0.1
current_case_0016 (0.00767952684682432, 5) 0.010309278350515464 0.0
curr

current_case_0129 (0.001689219548110956, 5) 0.0008764241893076249 0.0
current_case_0130 (0.10506410466842668, 5) 0.5 0.1
current_case_0131 (0.17637894811807853, 5) 0.5 0.2
current_case_0132 (0.1107840612309472, 5) 0.16666666666666666 0.2
current_case_0133 (0.0602056805653145, 5) 0.125 0.1
current_case_0134 (0.2014514877057918, 5) 0.5 0.2
current_case_0135 (0.017439234413667026, 5) 0.03125 0.0
current_case_0136 (0.006787173075097287, 5) 0.01694915254237288 0.0
current_case_0137 (0.0020172757431669453, 5) 0.0013422818791946308 0.0
current_case_0138 (0.1756631299734748, 5) 0.25 0.2
current_case_0139 (0.016271385269613952, 5) 0.019230769230769232 0.0
current_case_0140 (0.26093897548032213, 5) 1.0 0.1
current_case_0141 (0.022515052260232746, 5) 0.09090909090909091 0.0
current_case_0142 (0.002031143752975638, 5) 0.0013315579227696406 0.0
current_case_0143 (0.025866547028832725, 5) 0.022727272727272728 0.0
current_case_0144 (0.007382397828091783, 5) 0.012987012987012988 0.0
current_case_0145 

(0.056357940496871914, 0.1694281753468374, 0.04472361809045226)

In [20]:
# remove the pattern "27\."
queries = process_queries("D:\Courses\Sem 7 2021-22\COL764\COL764 Project\FIRE2017-IRLeD-track-data\Task_2\Current_Cases", length = 150)

current_case_0001.txt 8
current_case_0002.txt 15
current_case_0003.txt 19
current_case_0004.txt 32
current_case_0005.txt 15
current_case_0006.txt 13
current_case_0007.txt 26
current_case_0008.txt 27
current_case_0009.txt 13
current_case_0010.txt 37
current_case_0011.txt 11
current_case_0012.txt 20
current_case_0013.txt 30
current_case_0014.txt 37
current_case_0015.txt 10
current_case_0016.txt 10
current_case_0017.txt 77
current_case_0018.txt 16
current_case_0019.txt 19
current_case_0020.txt 10
current_case_0021.txt 13
current_case_0022.txt 29
current_case_0023.txt 28
current_case_0024.txt 13
current_case_0025.txt 14
current_case_0026.txt 17
current_case_0027.txt 5
current_case_0028.txt 23
current_case_0029.txt 22
current_case_0030.txt 8
current_case_0031.txt 22
current_case_0032.txt 25
current_case_0033.txt 12
current_case_0034.txt 8
current_case_0035.txt 10
current_case_0036.txt 7
current_case_0037.txt 37
current_case_0038.txt 15
current_case_0039.txt 26
current_case_0040.txt 15
curre

In [21]:
obtain_relevant_docs(queries, model, tokenizer, flag = 1)

current_case_0001 (0.003865609150244812, 5) 0.0036900369003690036 0.0
current_case_0002 (0.007302069397791415, 5) 0.00909090909090909 0.0
current_case_0003 (0.11597764464637528, 5) 0.3333333333333333 0.1
current_case_0004 (0.03250874132097111, 5) 0.043478260869565216 0.0
current_case_0005 (0.008739505886979818, 5) 0.005952380952380952 0.0
current_case_0006 (0.002214262061795316, 5) 0.0018450184501845018 0.0
current_case_0007 (0.007260939675889436, 5) 0.014285714285714285 0.0
current_case_0008 (0.06190702153515208, 5) 0.2 0.1
current_case_0009 (0.009583082957353665, 5) 0.018518518518518517 0.0
current_case_0010 (0.005841698627898987, 5) 0.004405286343612335 0.0
current_case_0011 (0.11353802105907798, 5) 0.5 0.1
current_case_0012 (0.010576515255360967, 5) 0.010869565217391304 0.0
current_case_0013 (0.01279465473639618, 5) 0.014084507042253521 0.0
current_case_0014 (0.02161997714564128, 5) 0.05 0.0
current_case_0015 (0.005325472061374147, 5) 0.013888888888888888 0.0
current_case_0016 (0.0

current_case_0130 (0.03042866499368697, 5) 0.125 0.1
current_case_0131 (0.0515565541712318, 5) 0.0625 0.0
current_case_0132 (0.048637618314508094, 5) 0.0625 0.0
current_case_0133 (0.005501827270867142, 5) 0.0032258064516129032 0.0
current_case_0134 (0.1459681799906625, 5) 0.5 0.2
current_case_0135 (0.023849772184155054, 5) 0.022727272727272728 0.0
current_case_0136 (0.005184109221907212, 5) 0.0033333333333333335 0.0
current_case_0137 (0.0021924319925137074, 5) 0.0019646365422396855 0.0
current_case_0138 (0.4663779180737043, 5) 1.0 0.2
current_case_0139 (0.031495575876579036, 5) 0.04 0.0
current_case_0140 (0.06357456799168575, 5) 0.14285714285714285 0.1
current_case_0141 (0.03615836874417904, 5) 0.16666666666666666 0.1
current_case_0142 (0.002050161001711738, 5) 0.001402524544179523 0.0
current_case_0143 (0.01926956287697099, 5) 0.01694915254237288 0.0
current_case_0144 (0.005159315240336884, 5) 0.0038461538461538464 0.0
current_case_0145 (0.0050259553956471794, 5) 0.004405286343612335 

(0.06047672275970297, 0.17572219236205633, 0.04321608040201005)