In [38]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score

In [39]:
categories = {
    "00-10": ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10"],
    "11-13": ["11", "12", "13"],
    "14-20": ["14", "15", "16", "17", "18", "19", "20"],
    "21-23": ["21", "22", "23"],
    "24-27": ["24", "25", "26", "27"],
    "28": ["28"],
    "29-31": ["29", "30", "31"],
    "32-35": ["32", "33", "34", "35"],
    "36-38": ["36", "37", "38"],
    "39-45": ["39", "40", "41", "42", "43", "44", "45"],
    "46-51": ["46", "47", "48", "49", "50", "51"],
    "52-57": ["52", "53", "54", "55", "56", "57"],
    "58-62": ["58", "59", "60", "61", "62"],
    "63-66": ["63", "64", "65", "66"],
    "67": ["67"],
    "68-70": ["68", "69", "70"],
    "71-73": ["71", "72", "73"],
    "74-75": ["74", "75"],
    "76-77": ["76", "77"],
    "78": ["78"],
    "79": ["79"],
    "80-83": ["80", "81", "82", "83"],
    "84-95": ["84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95"],
    "96-99": ["96", "97", "98", "99"],
    "V01-V09": ["V01", "V02", "V03", "V04", "V05", "V06", "V07", "V08", "V09"],
    "V10-V19": ["V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17", "V18", "V19"],
    "V20-V29": ["V20", "V21", "V22", "V23", "V24", "V25", "V26", "V27", "V28", "V29"],
    "V30-V39": ["V30", "V31", "V32", "V33", "V34", "V35", "V36", "V37", "V38", "V39"],
    "V40-V49": ["V40", "V41", "V42", "V43", "V44", "V45", "V46", "V47", "V48", "V49"],
    "V50-V59": ["V50", "V51", "V52", "V53", "V54", "V55", "V56", "V57", "V58", "V59"],
    "V60-V69": ["V60", "V61", "V62", "V63", "V64", "V65", "V66", "V67", "V68", "V69"],
    "V70-V82": ["V70", "V71", "V72", "V73", "V74", "V75", "V76", "V77", "V78", "V79", "V80", "V81", "V82"],
    "V83-V84": ["V83", "V84"],
    "V85-V85": ["V85"],
    "V86-V86": ["V86"],
    "V87-V87": ["V87"],
    "V88-V88": ["V88"],
    "V89-V89": ["V89"],
    "V90-V90": ["V90"],
    "V91-V91": ["V91"],
    "E00-E09": ["E00", "E01", "E02", "E03", "E04", "E05", "E06", "E07", "E08", "E09"]
}


In [40]:
one_sym = pd.read_json('one.json').reset_index()
one_sym.head()

Unnamed: 0,index,short_codes,Symptoms
0,0,41401997141114273140192720,[exertional chest pain]
1,1,"80508,5070,99739,5180,2939,2513,E8130,8820,401...",[neck pain]
2,2,"1541,99811,V6441,45829,25000,2724,4019,41400,V...",[rectal cancer]
3,3,"44101,42833,4233,5601,5990,4241,7885,2875,3272...",[chest pain radiating to back]
4,4,"41401,4139,4019,2724,45829,V4582,V1582",[chest pain]


In [41]:
one_sym.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   index        100 non-null    int64 
 1   short_codes  100 non-null    object
 2   Symptoms     100 non-null    object
dtypes: int64(1), object(2)
memory usage: 2.5+ KB


In [42]:
two_sym = pd.read_json('two.json').reset_index()
three_sym = pd.read_json('three.json').reset_index()
gt3_sym = pd.read_json('gt3.json').reset_index()

In [43]:
gt3_sym.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   index        100 non-null    int64 
 1   short_codes  100 non-null    object
 2   Symptoms     100 non-null    object
dtypes: int64(1), object(2)
memory usage: 2.5+ KB


In [44]:
def merge_category(df):

    df = df.drop('short_codes', axis=1)

    df = df.groupby('index').agg({
        'Category': lambda x: ','.join(map(str, x)),
        'Symptoms': 'first'
    }).reset_index()
    
    return df

In [45]:
def category_addition(df, categories = categories) : 
    icd9_to_category = {code: category for category, codes in categories.items() for code in codes}
    df['short_codes'] = df['short_codes'].astype(str).str.zfill(2)
    df['Category'] = df['short_codes'].map(icd9_to_category)
    return df

In [46]:
def preprocessing(test_df) : 
    test_df.loc[
        test_df['short_codes'].str.startswith("V"), 'short_codes'] = test_df.short_codes.apply(
        lambda x: x[:3])
    test_df.loc[
        test_df['short_codes'].str.startswith("E"), 'short_codes'] = test_df.short_codes.apply(
        lambda x: x[:3])
    test_df.loc[(~test_df.short_codes.str.startswith("E")) & (
        ~test_df.short_codes.str.startswith("V")), 'short_codes'] = test_df.short_codes.apply(
        lambda x: x[:2])
    return test_df

In [47]:
def xplode(df) : 
    df = df.assign(short =df['short_codes'].str.split(',')).explode('short').reset_index(drop = True)
    df = df.drop('short_codes' ,axis =1)
    df.columns = ['index','Symptoms', 'short_codes']
    return df

one_sym = merge_category(category_addition(preprocessing(xplode(one_sym))))

In [48]:
one_sym 

Unnamed: 0,index,Category,Symptoms
0,0,"39-45,96-99,39-45,39-45,39-45,24-27",[exertional chest pain]
1,1,"80-83,46-51,96-99,46-51,29-31,24-27,nan,84-95,...",[neck pain]
2,2,"14-20,96-99,V60-V69,39-45,24-27,24-27,39-45,39...",[rectal cancer]
3,3,"39-45,39-45,39-45,52-57,58-62,39-45,78,28,32-3...",[chest pain radiating to back]
4,4,"39-45,39-45,39-45,24-27,39-45,V40-V49,V10-V19",[chest pain]
...,...,...,...
95,95,"39-45,74-75,58-62,32-35,39-45,39-45",[mild dyspnea on exertion]
96,96,"39-45,52-57,nan,28,24-27,24-27,V50-V59,36-38,5...",[loose bloody bowel movements]
97,97,"39-45,58-62,46-51,39-45,24-27,39-45,V40-V49",[enlarging 5.6 cm aneurysm of the infrarenal a...
98,98,"39-45,39-45,39-45,39-45,39-45,46-51,39-45,39-4...",[shortness of breath]


In [49]:
two_sym = merge_category(category_addition(preprocessing(xplode(two_sym))))

three_sym = merge_category(category_addition(preprocessing(xplode(three_sym))))

gt3_sym = merge_category(category_addition(preprocessing(xplode(gt3_sym))))

In [50]:
gt3_sym.head()

Unnamed: 0,index,Category,Symptoms
0,0,"84-95,46-51,46-51,39-45,00-10,96-99,80-83,80-8...","[chest pain, back pain, decreased breath sound..."
1,1,"46-51,29-31,24-27,29-31,24-27,V60-V69","[lethargic, unable to answer questions, normal..."
2,2,"39-45,39-45,46-51,39-45,00-10,78,78,39-45","[slurred speech, inability to move left arm an..."
3,3,"46-51,39-45,46-51,46-51,46-51,58-62,24-27,24-2...","[hypercarbic respiratory failure, coughing, in..."
4,4,"39-45,39-45,24-27,39-45,58-62,V40-V49,V10-V19","[unequal pulses in upper extremities, minimall..."


In [56]:
import os
from py2neo import Graph
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
from neo4j import GraphDatabase
from dotenv import load_dotenv
import json
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score


uri = "neo4j://localhost:7687" 
auth = ("neo4j", "neo4j_pass5")

driver_1= GraphDatabase.driver(uri, auth=auth) 
driver_1.verify_connectivity()




In [57]:
def get_prioritized_relationships(symptom_names, weightage=5, limit=20):
    with driver_1.session() as session:
        result = session.run(f"""
            MATCH (s:Symptom)-[r:ASSOCIATED_WITH]->(d:Disease)
            WHERE s.name IN $symptoms AND r.weight >= {weightage}
            WITH d.title AS disease_name, collect(s.name) AS symptoms, max(r.weight) AS max_weight
            RETURN disease_name, symptoms, max_weight
            ORDER BY max_weight DESC
            LIMIT {limit}
        """, symptoms=symptom_names)

        codes = []
        all_info = {}
        for record in result:
            codes.append(record['disease_name'])
            all_info[record['disease_name']] = [record['disease_name'], record['max_weight']]

            
        return codes, all_info
    

def create_binary_matrix(labels, classes):
    binary_matrix = np.zeros((len(labels), len(classes)), dtype=int)
    for i, label_list in enumerate(labels):
        for label in label_list:
            if label in classes:
                idx = classes.index(label)
                binary_matrix[i, idx] = 1       
    return binary_matrix
    
    
def roc_auc(probs, labels, multilabel=False, average='macro', multi_class='ovo'):
    if isinstance(labels, list):
        labels = np.array(labels, dtype=int)
    else:
        labels = labels.astype(int)
    y_score = probs
    if multilabel:
        dim_size = len(labels[0])
        mask = np.ones((dim_size), dtype=bool)
        for c in range(dim_size):
            if max(labels[:, c]) == 0:
                mask[c] = False
        labels = labels[:, mask]
        y_score = np.array(probs)[:, mask]

        filtered_cols = np.count_nonzero(mask == False)
        # print(f"{filtered_cols} columns not considered for ROC AUC calculation!")

    return roc_auc_score(y_true=labels, y_score=y_score, average=average, multi_class=multi_class)


In [94]:
def escape_special_chars(query):
    return query.replace("'", "\\'").replace("/", "\\/")

def search_symptoms(query_strings, expansion_limit =20):
    all_symptoms = []
    for query_string in query_strings:
        escaped_query = escape_special_chars(query_string)  # Escape the query string
        with driver_1.session() as session:
            result = session.run(
                f"CALL db.index.fulltext.queryNodes('symptomIndex', '{escaped_query}') "
                "YIELD node, score "
                "RETURN node.name AS symptom, score "
                "ORDER BY score DESC "
                f"LIMIT {expansion_limit}"
            )
            symptoms = [record['symptom'] for record in result]
            all_symptoms.extend(symptoms)
    return all_symptoms

# Example usage
search_queries = ['headache', "extensive microcalcification's", 'nausea']
search_results = search_symptoms(search_queries)
print(search_results)

['HEADACHE', 'Headache', 'headache', 'Severe headache during headache episodes', 'Massive headache', 'Headache (migraine)', 'Dull headache', 'whole headache', 'worst headache', 'waxing headache', 'worse headache', 'vasospasm headache', 'Light headache', 'Migraine headache', 'headache-frontal', 'Mild Headache', 'holocranial headache', 'Mild headache', 'increased headache', 'worsening headache', 'Extensive fractures', 'Extensive injuries', 'Extensive bruising', 'Extensive supratentorial edema', 'Extensive cardiac history', 'Extensive pulmonary contusions', 'Extensive rib fractures', 'Extensive brain injury', 'Extensive superficial lacerations', 'Extensive blood loss', 'Extensive bilateral PEs', 'Extensive intraparenchymal hemorrhage', 'Extensive facial lacerations', 'Extensive smoking history', 'Extensive front end damage', 'Extensive left-sided injuries', 'Extensive vertebral compression fractures', 'Extensive soft tissue damage', 'extensive bleed during bronchoscopy', 'extensive necrot

In [59]:
classes = classes = ['32-35','79','96-99','52-57','74-75','V20-V29','V60-V69','76-77','21-23','V50-V59','36-38','28','00-10','V01-V09','V40-V49','46-51','80-83','E00-E09','V30-V39','11-13','58-62','24-27','V85-V85','V87-V87','68-70','39-45','78','V83-V84','63-66','14-20','67','V88-V88','V10-V19','V70-V82','84-95','29-31','V90-V90','71-73']

In [63]:


true_labels = []
predicted_labels = []

for i,j in tqdm(one_sym.iterrows()): 
                true_label = j.Category.split(',')
                true_labels.append(list(set(true_label)))
                expanded_query =  search_symptoms(list(j.Symptoms))
                pred_labels = get_prioritized_relationships(expanded_query,weightage=1,limit= 10)[0]
                predicted_labels.append(pred_labels)
    
true_binary_matrix = create_binary_matrix(true_labels, classes)
predicted_binary_matrix = create_binary_matrix(predicted_labels, classes)

auroc_score = roc_auc(predicted_binary_matrix, true_binary_matrix, multilabel=True)
print(f"AUROC Score: {auroc_score}")
               
        

100it [00:02, 42.35it/s]

AUROC Score: 0.5386322640188874





In [64]:
# weightage_range = [2,3,5,7,10,15,20] 
# limit_range = [4,5,6,7,8,9,10,12,15,20,25,30]
weightage_range = [1]
limit_range = [5,10,15,20,25,30,35]

best_weightage = None
best_limit = None
best_precision_score = -np.inf

for limit in tqdm(limit_range, desc='Limit', leave=False):
    for weightage in tqdm(weightage_range, desc='Weightage'):
    
        
        true_labels = []
        predicted_labels = []
        
        for i, j in one_sym.iterrows(): 
           if len(j.Symptoms) ==1 : 
                true_label = j.Category.split(',')
                true_labels.append(true_label)
                expanded_query =  search_symptoms(list(j.Symptoms))
                pred_labels = get_prioritized_relationships(expanded_query, weightage=weightage, limit=limit)[0]
                predicted_labels.append(pred_labels)

        
        true_binary_matrix = create_binary_matrix(true_labels, classes)
        predicted_binary_matrix = create_binary_matrix(predicted_labels, classes)
        
        precision = f1_score(true_binary_matrix, predicted_binary_matrix, average='micro')
        
   
        if precision > best_precision_score:
            best_precision_score = precision
            best_weightage = weightage
            best_limit = limit

print(f"Best Weightage: {best_weightage}")
print(f"Best Limit: {best_limit}")
print(f"Best Precision Score: {best_precision_score}")


Weightage: 100%|██████████| 1/1 [00:02<00:00,  2.43s/it]
Weightage: 100%|██████████| 1/1 [00:02<00:00,  2.40s/it]
Weightage: 100%|██████████| 1/1 [00:02<00:00,  2.54s/it]
Weightage: 100%|██████████| 1/1 [00:04<00:00,  4.69s/it]
Weightage: 100%|██████████| 1/1 [00:05<00:00,  5.06s/it]
Weightage: 100%|██████████| 1/1 [00:05<00:00,  5.13s/it]
Weightage: 100%|██████████| 1/1 [00:05<00:00,  5.13s/it]
                                                    

Best Weightage: 1
Best Limit: 5
Best Precision Score: 0.4730290456431535




In [85]:
uri = "neo4j://localhost:8687" 
auth = ("neo4j", "neo4j_pass7")

driver = GraphDatabase.driver(uri, auth=auth) 
driver.verify_connectivity()



In [86]:
def get_prioritized_relationships_pubmed(symptom_names, weightage=1, limit=40):
    with driver.session() as session:
        result = session.run(f"""
            MATCH (s:Symptom)-[r:ASSOCIATED_WITH]->(d:Disease)
            WHERE s.name IN $symptoms AND r.pos_weight >= $weightage
            RETURN s.name AS symptom, d.disease_code AS disease_name, r.pos_weight AS weight, r.euclidean_distance AS distance
            ORDER BY r.pos_weight DESC, r.euclidean_distance ASC
            LIMIT $limit
        """, symptoms=symptom_names, weightage=weightage, limit=limit)
        
        codes = []
        all_info = {}

        for record in result:
            codes.append(record['disease_name'])
            all_info[record['disease_name']] = [record['disease_name'], record['weight']]
            
        return codes, all_info


In [102]:
from tqdm import tqdm


def only_kg(df,weight = 1, limit_kg = 5, limit_pub = 7,expansion_limit = 14) : 

        classes = ['32-35','79','96-99','52-57','74-75','V20-V29','V60-V69','76-77','21-23','V50-V59','36-38','28','00-10','V01-V09','V40-V49','46-51','80-83','E00-E09','V30-V39','11-13','58-62','24-27','V85-V85','V87-V87','68-70','39-45','78','V83-V84','63-66','14-20','67','V88-V88','V10-V19','V70-V82','84-95','29-31','V90-V90','71-73']
        true_labels = []
        predicted_labels = []

        for i,j in tqdm(df.iterrows()): 
                    true_label = j.Category.split(',')
                    true_labels.append(list(set(true_label)))
                    expanded_query =  search_symptoms(list(j.Symptoms), expansion_limit=expansion_limit)
                    pred_labels_kg = get_prioritized_relationships(expanded_query,weightage=weight,limit= limit_kg )[0]
                    pred_labels_pubmed = get_prioritized_relationships(expanded_query,weightage=weight,limit= limit_pub )[0]
                    pred_labels = list(set(pred_labels_kg + pred_labels_pubmed))
                    predicted_labels.append(pred_labels)

        print(f"Number of rows : {len(true_labels)}")

        true_binary_matrix = create_binary_matrix(true_labels, classes)
        predicted_binary_matrix = create_binary_matrix(predicted_labels, classes)

        auroc_score = roc_auc(predicted_binary_matrix, true_binary_matrix, multilabel=True)
        print(f"Macro Averaged AUROC Score: {auroc_score}")


        precision = precision_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)
        recall = recall_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)
        f1 = f1_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)

        precision_m = precision_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)
        recall_m = recall_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)
        f1_m = f1_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)


        print(f"Micro Averaged Precision: {precision}")
        print(f"Micro Averaged Recall: {recall}")
        print(f"Micro Averaged F1-Score: {f1}")

        print(f"Macro Averaged Precision: {precision_m}")
        print(f"Macro Averaged Recall: {recall_m}")
        print(f"Macro Averaged F1-Score: {f1_m}")

only_kg(one_sym)

100it [00:04, 22.21it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5309862742113977
Micro Averaged Precision: 0.4014285714285714
Micro Averaged Recall: 0.6056034482758621
Micro Averaged F1-Score: 0.48281786941580757
Macro Averaged Precision: 0.1576460786870042
Macro Averaged Recall: 0.21797058612236545
Macro Averaged F1-Score: 0.15213267097413832





In [103]:
import optuna
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
from tqdm import tqdm

# Define the objective function to optimize
def objective(trial):
    weight = trial.suggest_int('weight', 0.1, 10.0)
    limit_kg = trial.suggest_int('limit_kg', 5, 20)
    limit_pub = trial.suggest_int('limit_pub', 5, 20)
    expansion_limit = trial.suggest_int('expansion_limit', 1, 15)

    true_labels = []
    predicted_labels = []

    for i, j in tqdm(one_sym.iterrows()):
        true_label = j.Category.split(',')
        true_labels.append(list(set(true_label)))
        expanded_query = search_symptoms(list(j.Symptoms), expansion_limit=expansion_limit)
        pred_labels_kg = get_prioritized_relationships(expanded_query, weightage=weight, limit=limit_kg)[0]
        pred_labels_pubmed = get_prioritized_relationships(expanded_query, weightage=weight, limit=limit_pub)[0]
        pred_labels = list(set(pred_labels_kg + pred_labels_pubmed))
        predicted_labels.append(pred_labels)

    classes = ['32-35','79','96-99','52-57','74-75','V20-V29','V60-V69','76-77','21-23','V50-V59','36-38','28','00-10','V01-V09','V40-V49','46-51','80-83','E00-E09','V30-V39','11-13','58-62','24-27','V85-V85','V87-V87','68-70','39-45','78','V83-V84','63-66','14-20','67','V88-V88','V10-V19','V70-V82','84-95','29-31','V90-V90','71-73']

    true_binary_matrix = create_binary_matrix(true_labels, classes)
    predicted_binary_matrix = create_binary_matrix(predicted_labels, classes)

    # Calculate the F1 score to optimize
    f1 = f1_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)

    return f1

# Create an Optuna study to maximize F1 score
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# Print the best parameters found
print("Best Parameters:", study.best_params)
print("Best F1 Score:", study.best_value)


In [None]:
weight_range = range(1,5)
limit_kg = range(10,15)
limit_pub = range(5,15)
expansion_limit = range(5,20)

In [81]:
only_kg(two_sym)

100it [00:08, 12.48it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5154808582464904
Micro Averaged Precision: 0.2638100154878678
Micro Averaged Recall: 0.9393382352941176
Micro Averaged F1-Score: 0.41193067311567916
Macro Averaged Precision: 0.14489542402621045
Macro Averaged Recall: 0.5284636725130893
Macro Averaged F1-Score: 0.2102061330839318





In [82]:
only_kg(three_sym)

100it [00:09, 10.47it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5208514441264436
Micro Averaged Precision: 0.2825413223140496
Micro Averaged Recall: 0.9366438356164384
Micro Averaged F1-Score: 0.43412698412698414
Macro Averaged Precision: 0.17709513005222785
Macro Averaged Recall: 0.5372208931419457
Macro Averaged F1-Score: 0.24571572508421505





In [83]:
only_kg(gt3_sym)

100it [00:12,  8.02it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5309378995502019
Micro Averaged Precision: 0.32852729145211124
Micro Averaged Recall: 0.9423929098966026
Micro Averaged F1-Score: 0.48720885834287897
Macro Averaged Precision: 0.2124467769880395
Macro Averaged Recall: 0.5562769496980023
Macro Averaged F1-Score: 0.28380086742830485





### PubMED KG


In [70]:
import os
from py2neo import Graph
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
from neo4j import GraphDatabase
from dotenv import load_dotenv
import json


driver = GraphDatabase.driver(uri, auth=auth) 
driver.verify_connectivity()

with open('mapped_terms_icd9.json', 'r') as file:
    data = file.read()  # Read the content of the file

# Parse the JSON data
parsed_data = json.loads(data)

result = {}

for key, values in parsed_data.items():
    filtered_diseases = {}
    for value in values:
        disease_info = value['disease'].split(' - ')
        disease_code = disease_info[0].strip()
        disease_desc = disease_info[1].strip()
        euclidean_distance = float(disease_info[2].strip())
        
        if disease_code not in filtered_diseases or filtered_diseases[disease_code]['euclidean_distance'] > euclidean_distance:
            filtered_diseases[disease_code] = {
                'disease_code': disease_code,
                'disease_desc': disease_desc,
                'euclidean_distance': euclidean_distance,
                'pos_weight': value['pos_weight']
            }
    
    result[key] = list(filtered_diseases.values())

with open('output_pubmed.json', 'w') as f:
    json.dump(result, f, indent=4)

In [71]:
def delete_all_data():
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")
        print("All data has been deleted.")

# delete_all_data()

In [72]:
for i,j in result.items():
    for k in j : 
        if  k['disease_code'].startswith("V") : 
            k['disease_code'] = k['disease_code'][:3]
        elif k['disease_code'].startswith("E") : 
            k['disease_code'] = k['disease_code'][:3]
        else :  k['disease_code'] =  k['disease_code'][:2]


In [78]:
one_sym.head()

Unnamed: 0,index,Category,Symptoms
0,0,"39-45,96-99,39-45,39-45,39-45,24-27",[exertional chest pain]
1,1,"80-83,46-51,96-99,46-51,29-31,24-27,nan,84-95,...",[neck pain]
2,2,"14-20,96-99,V60-V69,39-45,24-27,24-27,39-45,39...",[rectal cancer]
3,3,"39-45,39-45,39-45,52-57,58-62,39-45,78,28,32-3...",[chest pain radiating to back]
4,4,"39-45,39-45,39-45,24-27,39-45,V40-V49,V10-V19",[chest pain]


### Single Symptom

In [79]:
from tqdm import tqdm


def only_kg_pubmed(df) : 

        classes = ['32-35','79','96-99','52-57','74-75','V20-V29','V60-V69','76-77','21-23','V50-V59','36-38','28','00-10','V01-V09','V40-V49','46-51','80-83','E00-E09','V30-V39','11-13','58-62','24-27','V85-V85','V87-V87','68-70','39-45','78','V83-V84','63-66','14-20','67','V88-V88','V10-V19','V70-V82','84-95','29-31','V90-V90','71-73']
        true_labels = []
        predicted_labels = []

        for i,j in tqdm(df.iterrows()): 
                    true_label = j.Category.split(',')
                    true_labels.append(list(set(true_label)))
                    expanded_query =  search_symptoms(list(j.Symptoms))
                    pred_labels = get_prioritized_relationships_pubmed(expanded_query,weightage=1,limit= 20 )[0]
                    predicted_labels.append(pred_labels)

        print(f"Number of rows : {len(true_labels)}")

        true_binary_matrix = create_binary_matrix(true_labels, classes)
        predicted_binary_matrix = create_binary_matrix(predicted_labels, classes)

        auroc_score = roc_auc(predicted_binary_matrix, true_binary_matrix, multilabel=True)
        print(f"Macro Averaged AUROC Score: {auroc_score}")

        precision = precision_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)
        recall = recall_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)
        f1 = f1_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)

        precision_m = precision_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)
        recall_m = recall_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)
        f1_m = f1_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)


        print(f"Micro Averaged Precision: {precision}")
        print(f"Micro Averaged Recall: {recall}")
        print(f"Micro Averaged F1-Score: {f1}")

        print(f"Macro Averaged Precision: {precision_m}")
        print(f"Macro Averaged Recall: {recall_m}")
        print(f"Macro Averaged F1-Score: {f1_m}")

only_kg_pubmed(one_sym)

100it [00:00, 136.46it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5018102785646645
Micro Averaged Precision: 0.5
Micro Averaged Recall: 0.004310344827586207
Micro Averaged F1-Score: 0.008547008547008548
Macro Averaged Precision: 0.039473684210526314
Macro Averaged Recall: 0.003205128205128205
Macro Averaged F1-Score: 0.005928282244071718





In [75]:
only_kg_pubmed(two_sym)

100it [00:03, 27.44it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5001813436791932
Micro Averaged Precision: 0.2
Micro Averaged Recall: 0.001838235294117647
Micro Averaged F1-Score: 0.0036429872495446266
Macro Averaged Precision: 0.008771929824561403
Macro Averaged Recall: 0.001547987616099071
Macro Averaged F1-Score: 0.002631578947368421





In [76]:
only_kg_pubmed(three_sym)

100it [00:03, 26.55it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.5000369713102633
Micro Averaged Precision: 0.2857142857142857
Micro Averaged Recall: 0.003424657534246575
Micro Averaged F1-Score: 0.00676818950930626
Macro Averaged Precision: 0.010526315789473684
Macro Averaged Recall: 0.0017543859649122807
Macro Averaged F1-Score: 0.003007518796992481





In [77]:
only_kg_pubmed(gt3_sym)

100it [00:09, 10.17it/s]

Number of rows : 100
Macro Averaged AUROC Score: 0.49922711058263974
Micro Averaged Precision: 0.2
Micro Averaged Recall: 0.0029542097488921715
Micro Averaged F1-Score: 0.005822416302765648
Macro Averaged Precision: 0.005847953216374269
Macro Averaged Recall: 0.0017543859649122807
Macro Averaged F1-Score: 0.002699055330634278





### MIMIC KG (RAG)

In [115]:
from groq import Groq

In [116]:
disease_df = pd.read_csv('disease_classes.csv')
disease_df.head()

expanded_rows = []

for index, row in disease_df.iterrows():
    code_range = row['Code Range']
    description = row['Description']
    

    codes = categories.get(code_range, [])
    
 
    for code in codes:
        expanded_rows.append({"Code": code, "Description": description})

expanded_df = pd.DataFrame(expanded_rows)

expanded_df.head()

Unnamed: 0,Code,Description
0,0,Infectious disease
1,1,Infectious disease
2,2,Infectious disease
3,3,Infectious disease
4,4,Infectious disease


In [117]:
total_disease_df = pd.merge(expanded_df, disease_df, how = 'inner', on = 'Description')
total_disease_df.head()

Unnamed: 0,Code,Description,Code Range
0,0,Infectious disease,00-10
1,1,Infectious disease,00-10
2,2,Infectious disease,00-10
3,3,Infectious disease,00-10
4,4,Infectious disease,00-10


In [118]:
disease_df.columns = ['Code', 'Description']

In [119]:
filtered_df = disease_df[disease_df.Code.isin(['39-45', '24-27', '96-99', '28', '52-57', '78', 'V50-V59', '58-62', '79', 'V10-V19', '74-75', '32-35', 'V40-V49', '68-70', 'V60-V69', '71-73', '00-10', '46-51', 'V01-V09', 'V70-V82'])][['Code', 'Description']].reset_index(drop = True)
code_desc_tuples = list(set(filtered_df.itertuples(index=False, name=None)))


In [120]:

def aug_and_gen(codes, symptom, model ='mistral', method = 'rag') : 

    prompt_start = f"""You are an AI medical assistant tasked with identifying the top most relevant ICD-9 code categories (e.g.'11-13','14-20','21-23','24-27',etc) for a given symptom, based on a provided list of code categories and category name
Symptoms: {symptom}"""
    kg_prompt = """
Task:
Output only a Python list of relevant ICD-9 code categories.

Format:
['XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX']

Important:
Don't consider the code to be included in JSON if it doesn't relate to the symptoms provided.

Rules:
1. Use only code categories in the format 'XX-XX' or 'XX' (e.g., '39-45', '78').
2. Include only most relevant categories from the provided outputs for the symptoms.
3. Output only the list, nothing else.
4. Do not include any explanations, comments, or additional text before or after the list.
5. Use single quotes for each category.
6. Separate categories with commas and spaces.
7. The entire list must be on a single line.
8. Do not mention the symptom in your output.
9. Do not describe or explain the categories.
10. Do not mention the number of categories or rows.
11. Make sure the chosen categories are one of them : ['28', '71-73', 'V88-V88', '68-70', 'V20-V29', '39-45', 'V90-V90', '21-23', 'V60-V69', 'V10-V19', '24-27', '80-83', '78', '52-57', '36-38', '58-62', '74-75', '67', '76-77', '63-66', '11-13', 'V87-V87', '46-51', '79', 'V70-V82', '32-35', 'E00-E09', 'V40-V49', 'V85-V85', '14-20', '29-31', '00-10', 'V01-V09', 'V50-V59', 'V83-V84', '84-95', '96-99', 'V30-V39']

Relevant Outputs (list only):"""
    
    only_llm_prompt_start = f"""You are an AI medical assistant tasked with identifying the top most relevant ICD-9 code categories (e.g.'11-13','14-20','21-23','24-27',etc) for a given symptom, based on a provided list of code categories and category name
Symptoms: {symptom}"""
    only_llm_prompt = """
Given these code categories and their meanings below in a list of tuples, in which of these categories do you think the above symptoms of a patient lie in: 
>>> (Categories, Meanings) : [('14-20', 'Malignant neoplasms'), ('V30-V39', 'Liveborn Infants According To Type Of Birth'), ('V88-V88', 'Acquired Absence Of Other Organs And Tissue'), ('V40-V49', 'Persons With A Condition Influencing Their Health Status'), ('29-31', 'Mental disorders'), ('78', 'Symptoms'), ('67', 'Complications Of puerperium'), ('46-51', 'Diseases Of The Respiratory System'), ('96-99', 'Poisoning'), ('71-73', 'Diseases Of The Musculoskeletal System And Connective Tissue'), ('V85-V85', 'Body Mass Index'), ('21-23', 'Benign neoplasms'), ('58-62', 'Diseases Of The Genitourinary System'), ('76-77', 'Certain Conditions Originating In The Perinatal Period'), ('32-35', 'Disorders of central nervous system'), ('63-66', 'Complications Of Pregnancy and Childbirth'), ('V50-V59', 'Persons Encountering Health Services For Specific Procedures And Aftercare'), ('68-70', 'Diseases Of The Skin And Subcutaneous Tissue'), ('39-45', 'Diseases Of The Circulatory System'), ('80-83', 'Fracture'), ('V10-V19', 'Persons With Potential Health Hazards Related To Personal And Family History'), ('V60-V69', 'Persons Encountering Health Services In Other Circumstances'), ('V91-V91', 'Multiple Gestation Placenta Status'), ('74-75', 'Congenital anomalies'), ('52-57', 'Diseases Of The Digestive System'), ('V90-V90', 'Retained Foreign Body'), ('36-38', 'Organ diseases'), ('28', 'Diseases of blood'), ('V89-V89', 'Other Suspected Conditions Not Found'), ('11-13', 'Parasitic diseases'), ('79', 'Findings and conditions'), ('24-27', 'Endocrine nutritional and metabolic diseases and immunity disorders'), ('V20-V29', 'Persons Encountering Health Services In Circumstances Related To Reproduction And Development'), ('V86-V86', 'Estrogen Receptor Status'), ('E00-E09', 'Supplementary Classification Of External Causes Of Injury And Poisoning'), ('00-10', 'Infectious disease'), ('V70-V82', 'Persons Without Reported Diagnosis Encountered During Examination And Investigation Of Individuals And Populations'), ('V87-V87', 'Other Specified Personal Exposures And History Presenting Hazards To Health'), ('V83-V84', 'Genetics'), ('84-95', 'Injury'), ('V01-V09', 'Persons With Potential Health Hazards Related To Communicable Diseases')]

Task:
Output only a Python list of relevant ICD-9 code categories.

Format:
['XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX', 'XX-XX']

Important:
Don't consider the code to be included in JSON if it doesn't relate to the symptoms provided.

Rules:
1. Use only code categories in the format 'XX-XX' or 'XX' (e.g., '39-45', '78').
2. Include only most relevant categories .
3. Output only the list, nothing else.
4. Do not include any explanations, comments, or additional text before or after the list.
5. Use single quotes for each category.
6. Separate categories with commas and spaces.
7. The entire list must be on a single line.
8. Do not mention the symptom in your output.
9. Do not describe or explain the categories.
10. Do not mention the number of categories or rows.
11. Make sure the chosen categories are one of them : ['28', '71-73', 'V88-V88', '68-70', 'V20-V29', '39-45', 'V90-V90', '21-23', 'V60-V69', 'V10-V19', '24-27', '80-83', '78', '52-57', '36-38', '58-62', '74-75', '67', '76-77', '63-66', '11-13', 'V87-V87', '46-51', '79', 'V70-V82', '32-35', 'E00-E09', 'V40-V49', 'V85-V85', '14-20', '29-31', '00-10', 'V01-V09', 'V50-V59', 'V83-V84', '84-95', '96-99', 'V30-V39']

Relevant Outputs (list only):
"""

    if model == 'mistral' : 
        model= "open-mistral-nemo-2407" 
        MISTRAL_API_KEY =  "WRhxY4qx7jsun5iYThrdS3Dk4dubsjnV"
        client = MistralClient(api_key=MISTRAL_API_KEY)

        if method == 'rag' : 

            prompt_template = prompt_start + kg_prompt + str(codes)

            chat_response = client.chat(
                model=model,
                max_tokens=200,
                temperature = 0.1,
                messages=[ChatMessage(role="user", content=prompt_template)])
            
            response = chat_response.choices[0].message.content

        if method == 'llm' : 

            prompt_template = only_llm_prompt_start + only_llm_prompt

            chat_response = client.chat(
                model=model,
                max_tokens=200,
                temperature = 0.1,
                messages=[ChatMessage(role="user", content=prompt_template)])
            
            response = chat_response.choices[0].message.content
        
    elif model == 'groq' : 
        #"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"
        # backup=
        client = Groq(
        api_key="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",
    )
        if method == 'rag' : 
            prompt_template = prompt_start + kg_prompt + str(codes)
            chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": f"{prompt_template}",
                }
            ],
            model="llama-3.1-70b-versatile",
        )

            response = chat_completion.choices[0].message.content

        if method == 'llm' : 
            prompt_template = only_llm_prompt_start + only_llm_prompt
            chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": f"{prompt_template}",
                }
            ],
            model="llama-3.1-70b-versatile",
        )

            response = chat_completion.choices[0].message.content



    return response

def codes_n_desc(list_codes, results) : 
    
    try : 
        five_codes = list_codes[2:-2].split("', '")
    except KeyError as s: 
        five_codes = list_codes[2:-2].split(", ")
    except : 
        
        five_codes = list_codes[2:-2].split('", "')
    
    return [(i,results.get(i)) for i in five_codes]

In [126]:
def metrics_calc( raw_test_df = one_sym, method = 'llm', model_name = 'groq'):

    true_labels = []
    predicted_labels = []

    for i,j in tqdm(raw_test_df.iterrows()): 

                    true_label = j.Category.split(',')
                    true_labels.append(list(set(true_label)))
                    expanded_query =  search_symptoms(list(j.Symptoms), expansion_limit=15)
                    pred_labels_kg = get_prioritized_relationships(expanded_query,weightage=6,limit= 16)[0]
                    pred_labels_pubmed = get_prioritized_relationships(expanded_query,weightage=1,limit= 16 )[0]
                    pred_labels = list(set(pred_labels_kg + pred_labels_pubmed))
                    code_desc_tuples = list(set(disease_df[disease_df.Code.isin(pred_labels)][['Code', 'Description']].reset_index(drop = True).itertuples(index=False, name=None)))
                    refined_pred_labels = aug_and_gen(code_desc_tuples,j.Symptoms, model=model_name, method = method)
                    refined_pred_labels = refined_pred_labels[2:-2].split("', '")    
                    predicted_labels.append(list(set(refined_pred_labels)))

    print(f"Number of rows : {len(true_labels)}")
    print(true_labels)
    print(predicted_labels)

    true_binary_matrix = create_binary_matrix(true_labels, classes)
    predicted_binary_matrix = create_binary_matrix(predicted_labels, classes)
    

    auroc_score = roc_auc(predicted_binary_matrix, true_binary_matrix, multilabel=True)
    print(f"Macro Averaged AUROC Score: {auroc_score}")


    precision = precision_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)
    recall = recall_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)
    f1 = f1_score(true_binary_matrix, predicted_binary_matrix, average='micro', zero_division=0)

    precision_m = precision_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)
    recall_m = recall_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)
    f1_m = f1_score(true_binary_matrix, predicted_binary_matrix, average='macro', zero_division=0)


    print(f"Micro Averaged Precision: {precision}")
    print(f"Micro Averaged Recall: {recall}")
    print(f"Micro Averaged F1-Score: {f1}")

    print(f"Macro Averaged Precision: {precision_m}")
    print(f"Macro Averaged Recall: {recall_m}")
    print(f"Macro Averaged F1-Score: {f1_m}")


metrics_calc(raw_test_df=one_sym,method='llm', model_name= 'groq')

In [124]:
metrics_calc(raw_test_df=one_sym,method='rag', model_name= 'groq')

100it [01:36,  1.03it/s]

Number of rows : 100
[['96-99', '39-45', '24-27'], ['71-73', '39-45', '29-31', '96-99', '46-51', '84-95', 'nan', '80-83', '24-27'], ['V60-V69', '39-45', '96-99', '14-20', 'V40-V49', '24-27'], ['32-35', '39-45', '52-57', '78', '28', '58-62', '24-27'], ['V10-V19', '39-45', 'V40-V49', '24-27'], ['74-75', '71-73', '32-35', '39-45', '52-57', '96-99', 'V10-V19', '46-51', 'nan', '24-27'], ['71-73', '39-45', '29-31', '52-57', 'V10-V19', '78', '46-51', '58-62'], ['32-35', '71-73', '39-45', 'V10-V19', '78', '21-23'], ['71-73', '39-45', '96-99', 'nan', '24-27'], ['39-45', 'V50-V59', '46-51', 'V70-V82', 'V40-V49', '24-27'], ['39-45', 'V50-V59', '00-10', '58-62', 'V40-V49', '24-27'], ['28', '39-45', 'V40-V49'], ['39-45', 'V50-V59', '78', '96-99', 'V10-V19', 'V70-V82', '79', 'nan', 'V40-V49', '24-27'], ['39-45', '46-51', 'nan', 'V40-V49', '24-27'], ['32-35', '39-45', '96-99', '46-51', '24-27'], ['32-35', '71-73', '39-45', '24-27'], ['32-35', '39-45', '52-57', '58-62', 'nan', '24-27'], ['96-99', '39-




In [128]:
metrics_calc(raw_test_df=two_sym,method='rag', model_name= 'groq')

100it [01:47,  1.08s/it]

Number of rows : 100
[['71-73', '39-45', '58-62', 'nan', 'V40-V49', '24-27'], ['V10-V19', '39-45', '29-31'], ['32-35', '39-45', 'V50-V59', '29-31', 'V10-V19', '46-51', '84-95', 'nan', '80-83', '24-27'], ['84-95'], ['V01-V09', '39-45'], ['71-73', '39-45', '58-62', 'V40-V49', '24-27'], ['39-45', '52-57', '28', '84-95', '36-38', 'nan', '24-27'], ['39-45', '29-31', '96-99', '28', '46-51', 'V10-V19', '24-27'], ['39-45', 'V40-V49', '24-27'], ['39-45', '29-31', '28', '79', '58-62'], ['32-35', '71-73', '39-45', '29-31', '96-99', '28', '78', '58-62', 'nan', 'V40-V49'], ['39-45', '84-95', 'nan', '80-83', 'V40-V49', '24-27'], ['V60-V69', '11-13', '39-45', '52-57', '68-70', '96-99', '28', '46-51', '21-23', '00-10', '78', '84-95', '79', '58-62', 'nan', '24-27'], ['39-45', '68-70', '28', '46-51', '00-10', 'V40-V49', '24-27'], ['78', '39-45', '46-51', 'nan'], ['32-35', '39-45', 'V50-V59', '52-57', '78', '58-62', 'V40-V49', '24-27'], ['39-45', 'V40-V49', '24-27'], ['28', '24-27', '52-57', '00-10'], ['




In [129]:
metrics_calc(raw_test_df=two_sym,method='llm', model_name= 'groq')

100it [03:01,  1.82s/it]

Number of rows : 100
[['71-73', '39-45', '58-62', 'nan', 'V40-V49', '24-27'], ['V10-V19', '39-45', '29-31'], ['32-35', '39-45', 'V50-V59', '29-31', 'V10-V19', '46-51', '84-95', 'nan', '80-83', '24-27'], ['84-95'], ['V01-V09', '39-45'], ['71-73', '39-45', '58-62', 'V40-V49', '24-27'], ['39-45', '52-57', '28', '84-95', '36-38', 'nan', '24-27'], ['39-45', '29-31', '96-99', '28', '46-51', 'V10-V19', '24-27'], ['39-45', 'V40-V49', '24-27'], ['39-45', '29-31', '28', '79', '58-62'], ['32-35', '71-73', '39-45', '29-31', '96-99', '28', '78', '58-62', 'nan', 'V40-V49'], ['39-45', '84-95', 'nan', '80-83', 'V40-V49', '24-27'], ['V60-V69', '11-13', '39-45', '52-57', '68-70', '96-99', '28', '46-51', '21-23', '00-10', '78', '84-95', '79', '58-62', 'nan', '24-27'], ['39-45', '68-70', '28', '46-51', '00-10', 'V40-V49', '24-27'], ['78', '39-45', '46-51', 'nan'], ['32-35', '39-45', 'V50-V59', '52-57', '78', '58-62', 'V40-V49', '24-27'], ['39-45', 'V40-V49', '24-27'], ['28', '24-27', '52-57', '00-10'], ['




In [1120]:
metrics_calc(type = 'three symptoms',method='llm', model_name= 'groq')

4it [00:00,  4.60it/s]

['39-45', '74-75', '21-23', '78']


10it [00:01,  6.54it/s]

['78', '32-35', '24-27', '39-45', '46-51']


29it [00:02, 14.70it/s]

['46-51', '78', '96-99']


31it [00:10,  2.09it/s]

['78', '32-35', '39-45']


32it [00:21,  1.20s/it]

['39-45', '52-57', '24-27', '78']


47it [00:33,  1.03it/s]

['46-51', '39-45', '24-27', '78']


49it [00:45,  1.45s/it]

['71-73', '78', '80-83', '32-35', '58-62', '79']


51it [00:57,  2.04s/it]

['46-51', '39-45', '24-27', '78']


52it [01:09,  2.88s/it]

['14-20', '39-45', '21-23']


55it [01:20,  3.10s/it]

['46-51', '78', '39-45', '24-27']


63it [01:33,  2.31s/it]

['46-51', '78', '39-45']


70it [01:45,  2.11s/it]

['39-45', '52-57', '24-27', '11-13', '68-70', '46-51']


75it [01:56,  2.15s/it]

['46-51', '39-45', '78']


79it [02:08,  2.31s/it]

['39-45', '46-51', '78']


87it [02:20,  2.00s/it]

['32-35', '39-45', '78', '24-27']


114it [02:31,  1.06it/s]

['39-45', '46-51', '78']


139it [02:43,  1.37it/s]

['39-45', '46-51', '78']


159it [02:55,  1.46it/s]

['71-73', '68-70', '58-62', '39-45', '21-23', '78', '79']


163it [03:07,  1.11it/s]

['39-45', '46-51', '78']


179it [03:19,  1.20it/s]

['46-51', '39-45', '78']


192it [03:30,  1.18it/s]

['46-51', '39-45', '24-27', '78']


196it [03:43,  1.10s/it]

['39-45', '78', '46-51', '29-31', '79']


197it [03:54,  1.50s/it]

['46-51', '71-73', '78']


200it [04:06,  1.23s/it]

['39-45', '46-51', '78', '24-27']
Number of rows : 24
[['78', '74-75', '39-45'], ['24-27', '78', 'V50-V59', '46-51', '79', '58-62', '39-45', 'V40-V49', '29-31', 'V10-V19'], ['24-27', '71-73', '96-99', '46-51', '79', '58-62', 'V40-V49', '14-20', '29-31', 'V10-V19', '00-10'], ['24-27', '78', '32-35', '39-45', 'V10-V19'], ['78', '28', '52-57', '96-99', '58-62', 'V40-V49', '29-31', 'V10-V19', '00-10'], ['46-51', '71-73', '39-45'], ['80-83', '28', '84-95'], ['V40-V49', '39-45'], ['21-23', '14-20', '39-45'], ['24-27', '71-73', '52-57', '58-62', '39-45', 'V40-V49', 'V10-V19'], ['24-27', '46-51', '32-35', '39-45'], ['24-27', '52-57', '39-45'], ['24-27', 'V10-V19', '58-62', '39-45'], ['V50-V59', '28', '32-35', '39-45'], ['24-27', '78', 'V50-V59', '96-99', '46-51', '39-45', '21-23', 'V10-V19'], ['24-27', '28', '71-73', '46-51', '39-45', 'V10-V19'], ['24-27', '39-45'], ['24-27', '28', '71-73', '96-99', '58-62', '32-35', '39-45', 'V10-V19'], ['24-27', '28', '39-45'], ['28', '78', '39-45', '14-20',




In [1121]:
metrics_calc(type = 'three symptoms',method='rag', model_name= 'groq')

4it [00:06,  1.56s/it]

['39-45', '74-75', '78']


10it [00:14,  1.42s/it]

['39-45', '24-27', '78', '28', '11-13']


29it [00:22,  1.48it/s]

['78', '46-51', '96-99', '39-45', '28', '79']


31it [00:30,  1.02s/it]

['78', '39-45', '21-23', '32-35', '80-83']


32it [00:39,  1.51s/it]

['11-13', '24-27', '39-45', '52-57', '78']


47it [00:47,  1.06it/s]

['46-51', '28', '39-45', '24-27', '78', '96-99']


49it [00:55,  1.26s/it]

['72-73', '78', '80-83', '29-31', '21-23', '52-57', '58-62']


51it [01:03,  1.62s/it]

['46-51', '78', '24-27', '39-45', '28']


52it [01:09,  2.00s/it]

['174-175', '39-45', '394-398']


55it [01:17,  2.19s/it]

['46-51', '78', '39-45', '29-31', '24-27']


63it [01:26,  1.60s/it]

['46-51', '78', '96-99', '39-45', '24-27']


70it [01:32,  1.32s/it]

['39-45', '46-51', 'V10-V19', '24-27', 'V60-V69']


75it [01:40,  1.41s/it]

['78', '46-51', '39-45', '29-31', '24-27', '68-70', '52-57', '14-20']


79it [01:49,  1.60s/it]

['39-45', '21-23', '46-51', '78', '29-31', '24-27']


87it [01:57,  1.37s/it]

['78', '21-23', '14-20', '39-45', '79', '24-27']


114it [02:05,  1.51it/s]

['39-45', '46-51', '24-27', '78', '79', 'V10-V19', 'V40-V49']


139it [02:13,  1.98it/s]

['39-45', '28', '24-27', '46-51', '79', '14-20']


159it [02:20,  2.22it/s]

['39-45', '46-51', '78', '58-62', '96-99']


163it [02:28,  1.68it/s]

['39-45', '46-51', '32-35', '78', '28', '24-27']


179it [02:37,  1.77it/s]

['46-51', '28', '39-45', '78', '79']


192it [02:45,  1.71it/s]

['46-51', '39-45', '28', '78', '24-27', '96-99', '14-20', 'V40-V49', 'V10-V19']


196it [02:53,  1.34it/s]

['39-45', '78', '46-51', '29-31', '96-99', '71-73', '79', '24-27']


197it [02:59,  1.05it/s]

['21-23', '46-51', '79']


200it [03:06,  1.07it/s]

['39-45', '24-27', '78', '29-31', '46-51']
Number of rows : 24
[['78', '74-75', '39-45'], ['24-27', '78', 'V50-V59', '46-51', '79', '58-62', '39-45', 'V40-V49', '29-31', 'V10-V19'], ['24-27', '71-73', '96-99', '46-51', '79', '58-62', 'V40-V49', '14-20', '29-31', 'V10-V19', '00-10'], ['24-27', '78', '32-35', '39-45', 'V10-V19'], ['78', '28', '52-57', '96-99', '58-62', 'V40-V49', '29-31', 'V10-V19', '00-10'], ['46-51', '71-73', '39-45'], ['80-83', '28', '84-95'], ['V40-V49', '39-45'], ['21-23', '14-20', '39-45'], ['24-27', '71-73', '52-57', '58-62', '39-45', 'V40-V49', 'V10-V19'], ['24-27', '46-51', '32-35', '39-45'], ['24-27', '52-57', '39-45'], ['24-27', 'V10-V19', '58-62', '39-45'], ['V50-V59', '28', '32-35', '39-45'], ['24-27', '78', 'V50-V59', '96-99', '46-51', '39-45', '21-23', 'V10-V19'], ['24-27', '28', '71-73', '46-51', '39-45', 'V10-V19'], ['24-27', '39-45'], ['24-27', '28', '71-73', '96-99', '58-62', '32-35', '39-45', 'V10-V19'], ['24-27', '28', '39-45'], ['28', '78', '39-45',




In [1122]:
metrics_calc(type = 'more than 3 symptoms',method='llm', model_name= 'groq')

1it [00:01,  1.02s/it]

['58-62', '52-57', '46-51', '24-27', '21-23', '14-20']


3it [00:01,  1.84it/s]

['46-51', '24-27', '78', '39-45', '68-70']


5it [00:02,  2.13it/s]

['52-57', '11-13', '78', '36-38', '46-51', '24-27']


6it [00:03,  1.89it/s]

['46-51', '39-45', '24-27', '78', '32-35']


7it [00:04,  1.65it/s]

['78', '46-51', '39-45', '52-57', '24-27', '32-35']


8it [00:14,  3.39s/it]

['78', '32-35', '39-45', '46-51', '52-57', '71-73', '24-27', '29-31', '84-95']


11it [00:26,  3.79s/it]

['39-45', '52-57', '78', '24-27', '46-51', '36-38']


12it [00:45,  6.83s/it]

['39-45', '46-51', '24-27', '32-35']


13it [00:52,  7.01s/it]

['00-10', '11-13', '21-23', '24-27', '39-45', '52-57', '58-62', '68-70', '78']


16it [01:05,  5.56s/it]

['00-10', '46-51', '52-57', '58-62', '24-27', '78']


17it [01:18,  7.00s/it]

['32-35', '39-45', '52-57', '68-70', '78']


18it [01:29,  7.89s/it]

['46-51', '39-45', '52-57', '24-27', '78']


19it [01:54, 11.85s/it]

['46-51', '24-27', '78', '79']


20it [01:55,  9.08s/it]

['39-45', '32-35', '78', '79', '84-95']


21it [02:07,  9.92s/it]

['39-45', '46-51', '24-27', '32-35', '78']


22it [02:20, 10.79s/it]

['68-70', '71-73', '24-27', '78', '52-57', '39-45', '00-10']


23it [02:32, 11.17s/it]

['71-73', '39-45', '24-27', '78', '84-95']


24it [02:44, 11.51s/it]

['24-27', '39-45', '46-51', '52-57', '58-62', '71-73', '78']


25it [02:56, 11.45s/it]

['32-35', '71-73', '78', '24-27']


26it [03:08, 11.68s/it]

['39-45', '24-27', '78', '32-35']


28it [03:20,  9.16s/it]

['39-45', '46-51', '52-57', '78', '24-27']


30it [03:33,  7.97s/it]

['39-45', '46-51', '78', '24-27', '36-38', '79']


33it [03:44,  6.06s/it]

['52-57', '78', '24-27']


34it [03:56,  7.18s/it]

['32-35', '78', '52-57', '14-20', '79']


35it [04:09,  8.25s/it]

['46-51', '52-57', '58-62', '68-70', '39-45', '24-27', '78']


36it [04:20,  8.96s/it]

['46-51', '29-31', '24-27', '78', '00-10']


37it [04:33,  9.92s/it]

['78', '32-35', '39-45', '46-51', '71-73']


38it [04:46, 10.65s/it]

['32-35', '78', '46-51', '52-57', '29-31', '24-27', '39-45']


39it [04:57, 10.78s/it]

['46-51', '24-27', '39-45', '52-57', '78']


40it [05:10, 11.43s/it]

['39-45', '46-51', '58-62', '21-23', '78', '79']


41it [05:22, 11.62s/it]

['39-45', '46-51', '52-57', '24-27', '78', '79']


43it [05:34,  9.13s/it]

['28', '39-45', '52-57', '63-66', '78', '24-27', '46-51']


45it [05:46,  7.93s/it]

['58-62', '52-57', '39-45', '29-31', '24-27']


46it [05:58,  8.82s/it]

['39-45', '46-51', '29-31', '78', '24-27']


48it [06:11,  7.77s/it]

['39-45', '52-57', '46-51', '24-27', '78']


50it [06:23,  7.16s/it]

['78', '32-35', '39-45', '46-51', '71-73', '84-95']


54it [06:35,  5.10s/it]

['32-35', '52-57', '78', '79', '84-95', '96-99']


56it [06:47,  5.40s/it]

['52-57', '58-62', '39-45', '24-27', '78', '36-38', '11-13', '00-10']


57it [07:00,  6.59s/it]

['24-27', '29-31', '32-35', '39-45', '46-51', '52-57', '58-62', '78']


58it [07:12,  7.63s/it]

['46-51', '71-73', '39-45', '78']


59it [07:25,  8.61s/it]

['00-10', '11-13', '52-57', '58-62', '63-66', '78', '24-27', '39-45', '71-73']


60it [07:37,  9.46s/it]

['78', '71-73', '52-57', '24-27', '46-51', '39-45', '32-35', '58-62']


64it [07:49,  5.83s/it]

['39-45', '46-51', '71-73', '78', '32-35']


65it [08:02,  6.90s/it]

['39-45', '46-51', '24-27', '78', '79']


66it [08:14,  7.93s/it]

['78', '29-31', '32-35', '24-27']


67it [08:25,  8.64s/it]

['39-45', '46-51', '52-57', '24-27', '78']


68it [08:37,  9.51s/it]

['29-31', '24-27', '52-57', '80-83', '96-99']


69it [08:50, 10.27s/it]

['32-35', '39-45', '46-51', '52-57', '58-62', '78', '79', '84-95']


72it [09:03,  7.23s/it]

['52-57', '24-27', '78', '39-45', '21-23', '46-51', '58-62', '63-66', '29-31']


73it [09:15,  8.22s/it]

['52-57', '39-45', '78', '46-51', '24-27']


74it [09:26,  8.86s/it]

['52-57', '58-62', '11-13', '24-27', '78']


76it [09:39,  7.79s/it]

['39-45', '46-51', '78']


77it [09:52,  8.97s/it]

['24-27', '39-45', '46-51', '58-62', '78']


78it [10:04,  9.70s/it]

['39-45', '24-27', '32-35', '29-31', '78']


80it [10:16,  8.31s/it]

['24-27', '39-45', '52-57', '58-62', '78']


82it [10:29,  7.53s/it]

['46-51', '39-45', '71-73', '78', '24-27']


83it [10:40,  8.30s/it]

['46-51', '39-45', '24-27', '78']


84it [10:53,  9.27s/it]

['46-51', '24-27', '78', '29-31', '32-35']


85it [11:05, 10.04s/it]

['46-51', '78', '36-38', '24-27', '79']


86it [11:17, 10.61s/it]

['32-35', '39-45', '78', '29-31']


88it [11:29,  8.70s/it]

['32-35', '58-62', '29-31', '24-27', '68-70', '80-83']


89it [11:42,  9.51s/it]

['32-35', '39-45', '46-51', '58-62', '78']


91it [11:54,  8.20s/it]

['78', '46-51', '39-45', '24-27', '36-38', '58-62', '32-35', '79', '84-95']


92it [12:06,  9.08s/it]

['32-35', '24-27', '39-45', '46-51', '78', '84-95', '96-99']


93it [12:19,  9.99s/it]

['39-45', '46-51', '78', '24-27']


96it [12:30,  6.79s/it]

['52-57', '24-27', '46-51', '78', '39-45']


97it [12:43,  8.02s/it]

['80-83', '46-51', '39-45', '84-95']


98it [12:55,  8.91s/it]

['39-45', '46-51', '24-27', '78', '79']


99it [13:08,  9.69s/it]

['39-45', '46-51', '24-27', '52-57', '28', '36-38']


100it [13:21, 10.55s/it]

['39-45', '46-51', '52-57', '78', '24-27']


101it [13:33, 10.99s/it]

['68-70', '71-73', '24-27', '39-45', '52-57', '58-62', '78', '79']


102it [13:45, 11.33s/it]

['39-45', '46-51', '24-27']


103it [13:58, 11.85s/it]

['46-51', '52-57', '39-45', '58-62', '24-27', '78']


104it [14:10, 11.70s/it]

['78', '46-51', '52-57', '24-27', '39-45', '58-62']


105it [14:21, 11.57s/it]

['78', '52-57', '24-27', '39-45', '58-62', '63-66']


106it [14:33, 11.78s/it]

['46-51', '78', '39-45']


107it [14:45, 11.95s/it]

['46-51', '52-57', '29-31', '78', '39-45', '00-10']


108it [14:58, 12.08s/it]

['46-51', '78', '39-45', '29-31', '36-38', '79']


110it [15:10,  9.40s/it]

['46-51', '39-45', '78']


111it [15:23, 10.18s/it]

['46-51', '29-31', '32-35', '00-10', '24-27', '39-45']


112it [15:34, 10.50s/it]

['46-51', '24-27', '78', '39-45']


113it [15:47, 10.97s/it]

['39-45', '78', '24-27', '46-51', '68-70', '80-83', '52-57', '36-38']


115it [15:59,  8.88s/it]

['39-45', '46-51', '32-35', '78', '24-27']


116it [16:11,  9.65s/it]

['46-51', '39-45', '24-27', '78', '79']


117it [16:23, 10.29s/it]

['52-57', '36-38', '11-13', '78', '24-27', '96-99']


119it [16:36,  8.63s/it]

['39-45', '46-51', '32-35', '78', '24-27']


120it [16:48,  9.43s/it]

['39-45', '24-27', '78', '46-51', '32-35']


121it [17:00, 10.10s/it]

['32-35', '39-45', '78', '84-95']


122it [17:12, 10.63s/it]

['52-57', '39-45', '58-62', '24-27', '46-51', '78']


123it [17:26, 11.39s/it]

['78', '46-51', '39-45', '68-70', '71-73', '84-95']


124it [17:39, 11.86s/it]

['46-51', '28', '78', '24-27', '39-45']


125it [17:50, 11.77s/it]

['39-45', '78', '52-57', '80-83', '24-27', '79']


126it [18:02, 11.64s/it]

['78', '32-35', '39-45', '24-27', '46-51', '79', '84-95', '96-99']


127it [18:14, 11.86s/it]

['39-45', '68-70', '58-62', '24-27', '78', '46-51']


128it [18:28, 12.50s/it]

['32-35', '78', '79', '46-51', '39-45', '84-95']


129it [18:41, 12.69s/it]

['39-45', '46-51', '52-57', '24-27', '78', '79']


130it [18:53, 12.37s/it]

['52-57', '39-45', '78', '58-62', '24-27', '68-70', '32-35', '46-51', '11-13']


131it [19:05, 12.38s/it]

['46-51', '32-35', '79', '24-27', '36-38', '39-45']


132it [19:18, 12.62s/it]

['46-51', '52-57', '24-27', '39-45', '78']


133it [19:30, 12.20s/it]

['46-51', '39-45', '78']


134it [19:42, 12.16s/it]

['46-51', '52-57', '24-27', '39-45', '78']


136it [19:55,  9.66s/it]

['39-45', '46-51', '52-57', '78', '24-27', '32-35', '68-70']


137it [20:07, 10.10s/it]

['78', '39-45', '52-57', '24-27', '36-38', '46-51', '29-31', '32-35']


140it [20:19,  6.98s/it]

['39-45', '46-51', '21-23', '78', '24-27', '32-35']


141it [20:31,  8.04s/it]

['52-57', '39-45', '58-62', '78', '24-27', '36-38']


142it [20:43,  8.98s/it]

['46-51', '39-45', '24-27', '78', '00-10']


143it [20:55,  9.73s/it]

['39-45', '46-51', '78', '79']


144it [21:08, 10.36s/it]

['39-45', '46-51', '24-27', '78', 'V88-V88']


145it [21:21, 11.11s/it]

['78', '80-83', '52-57', '32-35', '29-31', '58-62', '84-95']


146it [21:31, 10.85s/it]

['39-45', '46-51', '78', '24-27']


147it [21:45, 11.70s/it]

['46-51', '39-45', '78', '24-27', '79']


148it [21:57, 11.68s/it]

['71-73', '46-51', '80-83', '39-45', '29-31', '32-35', '84-95']


149it [22:11, 12.47s/it]

['24-27', '39-45', '46-51', '68-70', '71-73', '78', '80-83']


150it [22:23, 12.41s/it]

['52-57', '39-45', '24-27', '78', '58-62', '46-51']


151it [22:36, 12.40s/it]

['39-45', '46-51', '78', '24-27', '79']


153it [22:48,  9.52s/it]

['78', '29-31', '32-35', '58-62', '24-27']


154it [23:00, 10.21s/it]

['52-57', '39-45', '46-51', '24-27', '36-38', '58-62']


155it [23:12, 10.76s/it]

['78', '39-45', '46-51', '52-57', '32-35', '29-31', '00-10']


156it [23:26, 11.50s/it]

['68-70', '39-45', '24-27', '28', '46-51', '78', '11-13', '32-35']


157it [23:38, 11.69s/it]

['52-57', '78', '39-45', '24-27', '58-62', '46-51', '84-95']


158it [23:51, 12.06s/it]

['46-51', '52-57', '24-27', '39-45', '78']


160it [24:03,  9.36s/it]

['14-20', '21-23', '46-51', '58-62', '78']


161it [24:15, 10.06s/it]

['28', '52-57', '39-45', '24-27', 'V40-V49', '21-23', '58-62', '11-13', '14-20']


162it [24:28, 10.59s/it]

['32-35', '78', '39-45', '79', '29-31']


164it [24:41,  8.89s/it]

['39-45', '78', '46-51', '52-57', '24-27', '29-31', '32-35', '36-38']


165it [24:53,  9.57s/it]

['39-45', '46-51', '78', '96-99']


167it [25:06,  8.40s/it]

['39-45', '32-35', '71-73', '68-70', '78', '58-62', '46-51', '29-31', '24-27']


168it [25:18,  9.25s/it]

['52-57', '39-45', '24-27', '78', '29-31', '32-35', '14-20']


169it [25:30,  9.95s/it]

['58-62', '63-66', '67', '24-27', '78', '79']


170it [25:42, 10.53s/it]

['24-27', '29-31', '32-35', '39-45', '58-62', '78']


171it [25:55, 11.07s/it]

['39-45', '71-73', '24-27', '78', '46-51', '52-57', '58-62', '67', '63-66', '79']


172it [26:07, 11.42s/it]

['78', '52-57', '36-38', '24-27']


173it [26:21, 11.97s/it]

['00-10', '46-51', '78', '39-45', '24-27']


174it [26:34, 12.43s/it]

['39-45', '78', '32-35', '46-51', '52-57', '24-27', '84-95']


175it [26:45, 12.08s/it]

['46-51', '39-45', '78', '79']


176it [26:58, 12.14s/it]

['52-57', '36-38', '39-45', '24-27', '78', '46-51', '32-35', '79']


177it [27:10, 12.16s/it]

['39-45', '46-51', '78', '52-57', '24-27']


178it [27:22, 12.16s/it]

['52-57', '39-45', '28', '46-51', '24-27', '63-66', '58-62']


180it [27:34,  9.40s/it]

['39-45', '46-51', '24-27', '52-57', '58-62', '78', '79']


181it [27:47, 10.23s/it]

['46-51', '78', '68-70', '71-73']


182it [27:59, 10.53s/it]

['71-73', '78', '21-23', '39-45', '80-83', '58-62', '32-35', '29-31', 'V87-V87']


183it [28:12, 11.27s/it]

['32-35', '78', '29-31', '79', '71-73']


184it [28:24, 11.57s/it]

['78', '29-31', '52-57', '24-27', '39-45', '46-51', '68-70', '32-35']


185it [28:36, 11.78s/it]

['52-57', '39-45', '46-51', '24-27', '78']


186it [28:49, 11.93s/it]

['29-31', '39-45', '32-35', '78', '24-27']


187it [29:00, 11.75s/it]

['78', '46-51', '52-57', '21-23', '39-45', '00-10']


188it [29:12, 11.89s/it]

['52-57', '28', '46-51', '78', '79']


189it [29:26, 12.33s/it]

['32-35', '39-45', '78', '84-95', '96-99']


190it [29:37, 12.02s/it]

['46-51', '32-35', '78', '24-27', 'V88-V88', 'V40-V49']


193it [29:49,  7.57s/it]

['32-35', '39-45', '78', '24-27', '84-95']


194it [30:02,  8.68s/it]

['39-45', '46-51', '52-57', '24-27', '78']


199it [30:13,  4.83s/it]

['78', '32-35', '71-73', '39-45', '84-95']


200it [30:26,  5.92s/it]

['32-35', '78', '29-31', '24-27', '46-51', '39-45', '84-95']


200it [30:38,  9.19s/it]

['39-45', '46-51', '78', '24-27', '32-35']
Number of rows : 154
[['24-27', '14-20'], ['24-27', '52-57', '46-51', '74-75', '32-35', '39-45', 'V40-V49', '29-31'], ['28', '84-95', '52-57', '96-99', '46-51', '39-45', '00-10'], ['24-27', '28', '96-99', '52-57', '58-62', '39-45', '29-31'], ['24-27', '28', '78', '58-62', '39-45', 'V40-V49'], ['24-27', '78', '32-35', '39-45', '29-31'], ['78', '52-57', '96-99', '58-62', '29-31', '00-10'], ['24-27', '78', '46-51', '39-45', '00-10'], ['24-27', '78', '29-31', '39-45'], ['78', '96-99', '46-51', '39-45', '14-20', '00-10'], ['28', '11-13', '96-99', '46-51', '58-62', '39-45', '00-10'], ['24-27', '52-57', '46-51', '58-62', '39-45', '00-10'], ['46-51', '28', '58-62', '39-45'], ['32-35', '84-95'], ['24-27', '28', '11-13', '36-38', '39-45', '21-23'], ['24-27', '78', '71-73', '84-95', '96-99', '39-45'], ['24-27', '28', '71-73', '78', '96-99', '46-51', '79', '58-62', '68-70', '29-31'], ['24-27', '28', '78', '52-57', '46-51', '79', '58-62', '68-70', '39-45',




In [1123]:
metrics_calc(type = 'more than 3 symptoms',method='rag', model_name= 'groq')

1it [00:08,  8.26s/it]

['14-20', '78', '46-51', '39-45', '52-57', '29-31', 'V40-V49', '24-27', '11-13']


3it [00:17,  5.55s/it]

['46-51', '28', '78', '24-27', '39-45', '79', '14-20', '29-31']


5it [00:26,  5.06s/it]

['11-13', '39-45', '52-57', '14-20', '71-73', '79']


6it [00:34,  5.89s/it]

['39-45', '46-51', '28', '24-27', '78']


7it [00:42,  6.51s/it]

['78', '14-20', '39-45', '46-51', '24-27', '52-57', '68-70', '28', 'V40-V49']


8it [00:52,  7.29s/it]

['78', '96-99', '46-51', '39-45', '29-31', '28', '24-27', '14-20', '71-73', '32-35']


11it [01:01,  4.96s/it]

['52-57', '24-27', '14-20', '39-45', '78', '46-51']


12it [01:11,  6.06s/it]

['39-45', '46-51', '28', '24-27', '78']


13it [01:20,  6.77s/it]

['00-10', '24-27', '28', '39-45', '52-57', '58-62', '68-70', '71-73', '78', '79', '96-99']


16it [01:29,  4.93s/it]

['14-20', 'V40-V49', '29-31', '78', '46-51', '96-99', '39-45', 'V10-V19', '52-57', '36-38', '28', '24-27', '11-13']


17it [01:39,  5.75s/it]

['39-45', '78', '71-73', '58-62', '32-35', '24-27', '84-95', '46-51', '79', '14-20']


18it [01:47,  6.26s/it]

['78', '39-45', '24-27', '46-51', '52-57', '11-13', '29-31', '79']


19it [01:56,  6.90s/it]

['78', '46-51', 'V40-V49', '96-99']


20it [02:04,  7.21s/it]

['32-35', '39-45', '14-20', '46-51', '78', '80-83', '28']


21it [02:13,  7.72s/it]

['39-45', '46-51', '24-27', '11-13', '29-31', '78']


22it [02:23,  8.36s/it]

['78', '29-31', '39-45', '46-51', '96-99', '71-73', '68-70', '52-57', '24-27', '11-13']


23it [02:31,  8.32s/it]

['78', '39-45', '71-73', '24-27', '79', '46-51']


24it [02:41,  8.62s/it]

['78', '46-51', '29-31', '32-35', '52-57', '36-38', '24-27', '39-45', 'V40-V49', 'V10-V19']


25it [02:47,  7.93s/it]

['36-38', '21-23', '11-13', '14-20']


26it [02:55,  8.01s/it]

['78', '46-51', '39-45', '32-35', '29-31', '24-27', '14-20']


28it [03:03,  6.25s/it]

['V40-V49', '78', '46-51', '58-62', '39-45', '52-57', '24-27', '11-13', '28']


30it [03:12,  5.45s/it]

['78', '46-51', '39-45', '24-27', '68-70', '79', '28', '84-95']


33it [03:20,  4.25s/it]

['52-57', '39-45', '78', '11-13', '24-27', '36-38', '79', '84-95']


34it [03:29,  4.98s/it]

['14-20', '39-45', '58-62', '32-35', 'V40-V49', '24-27', '29-31', '78', '46-51', 'V50-V59', 'V10-V19', '52-57', '36-38']


35it [03:38,  5.88s/it]

['46-51', '39-45', '32-35', '68-70', '78', '58-62', '24-27']


36it [03:46,  6.42s/it]

['24-27', '46-51', '78', '28', '39-45', '11-13']


37it [03:55,  6.87s/it]

['32-35', '39-45', '78', '79', '80-83', '96-99']


38it [04:04,  7.45s/it]

['78', '32-35', '39-45', '52-57', '29-31', '24-27', '36-38', '28']


39it [04:12,  7.65s/it]

['78', '46-51', '39-45', '24-27', '52-57', '29-31', 'V40-V49', 'V50-V59']


40it [04:21,  8.06s/it]

['39-45', '46-51', '52-57', '58-62', '78', '79', '11-13', '14-20', '21-23', '24-27']


41it [04:30,  8.34s/it]

['78', '11-13', '24-27', '39-45', '52-57', '68-70']


43it [04:38,  6.45s/it]

['28', '39-45', '46-51', '52-57', '78', '96-99']


45it [04:47,  5.59s/it]

['78', '29-31', '52-57', '39-45', '21-23', '24-27', '58-62', '11-13', '14-20']


46it [04:56,  6.35s/it]

['78', 'V40-V49', '46-51', '39-45', '24-27', 'V60-V69', '28', '79']


48it [05:04,  5.47s/it]

['39-45', '24-27', '52-57', '36-38', '46-51', '78', '79']


50it [05:13,  5.15s/it]

['14-20', '39-45', '46-51', '71-73', '78', '32-35', '68-70', '24-27', '79', '96-99']


54it [05:22,  3.70s/it]

['78', '32-35', '46-51', 'V50-V59', '39-45', '24-27', '96-99']


56it [05:31,  3.88s/it]

['52-57', '39-45', '14-20', '46-51', '78', '24-27', '29-31', '58-62', '36-38']


57it [05:39,  4.56s/it]

['14-20', 'V40-V49', '29-31', '78', '46-51', '96-99', '32-35', '39-45', '52-57', '28', '79', '24-27']


58it [05:49,  5.50s/it]

['39-45', '46-51', '71-73', '78']


59it [05:57,  6.07s/it]

['78', '24-27', '39-45', '52-57', '58-62', '71-73', '28', '14-20', '46-51', '96-99']


60it [06:06,  6.67s/it]

['78', '59-59', '52-57', '29-31', '24-27', '21-23', '14-20']


64it [06:14,  4.05s/it]

['39-45', '71-73', '46-51', '78', '29-31', 'V40-V49', '24-27']


65it [06:22,  4.77s/it]

['39-45', '46-51', '28', '71-73', '24-27', '79']


66it [06:31,  5.50s/it]

['78', '29-31', '32-35', '46-51', '11-13', '24-27']


67it [06:46,  7.66s/it]

['39-45', '52-57', '71-73', '78', 'V40-V49']


68it [06:47,  6.05s/it]

['V40-V49', '29-31', '78', '96-99', '71-73', '39-45', '80-83', 'V60-V69', '52-57', '28', '79', '24-27', '00-10']


69it [06:57,  7.13s/it]

['78', '32-35', '39-45', '29-31', '46-51', '96-99', '79', '28', '24-27', '79']


72it [07:06,  4.87s/it]

['52-57', '39-45', '24-27', '78', '32-35']


73it [07:15,  5.75s/it]

['39-45', '46-51', '28', '52-57', '24-27', '78']


74it [07:23,  6.32s/it]

['78', '52-57', '24-27', '36-38', '58-62', '39-45', '28', '11-13']


76it [07:32,  5.47s/it]

['39-45', '78', '46-51', '71-73', '29-31']


77it [07:41,  6.30s/it]

['29-31', '78', '39-45', '32-35', '52-57', '79', '28', '46-51', '71-73', '58-62']


78it [07:50,  7.01s/it]

['39-45', '28', '24-27', '46-51', 'V40-V49']


80it [07:59,  5.87s/it]

['11-13', '24-27', '28', '39-45', '52-57', '68-70', '78', 'V40-V49', 'V60-V69']


82it [08:07,  5.23s/it]

['78', '46-51', '71-73', '39-45', '24-27', '11-13', 'V40-V49', 'V10-V19']


83it [08:16,  6.05s/it]

['46-51', '28', '39-45', '78', '96-99', '79', '24-27']


84it [08:24,  6.57s/it]

['78', '46-51', '24-27', '11-13', '28', '39-45', '79', '29-31', 'V40-V49', 'V10-V19']


85it [08:33,  7.18s/it]

['46-51', '78', '29-31', '39-45', '24-27', '28']


86it [08:42,  7.43s/it]

['32-35', '39-45', '46-51', '52-57', '78']


88it [08:51,  6.21s/it]

['78', '21-23', '39-45', '58-62', '32-35', '29-31', '14-20', '96-99', '24-27', '79']


89it [08:59,  6.70s/it]

['78', '39-45', '46-51', '32-35', '29-31', '71-73', '24-27', '96-99', 'V40-V49']


91it [09:08,  5.87s/it]

['78', '46-51', '39-45', '24-27', '79', '96-99', '84-95']


92it [09:16,  6.38s/it]

['29-31', '78', '96-99', '32-35', '39-45', '80-83', '24-27', '28', '79']


93it [09:26,  7.05s/it]

['39-45', '52-57', '46-51', '36-38', '29-31', '78']


96it [09:34,  4.83s/it]

['52-57', '39-45', '24-27', '78', '11-13', 'V10-V19', '58-62', '28']


97it [09:43,  5.74s/it]

['46-51', '24-27', '39-45', '78', '21-23', '32-35', '84-95', '96-99']


98it [09:51,  6.29s/it]

['39-45', '46-51', '28', '24-27', '11-13']


99it [10:01,  7.00s/it]

['39-45', '46-51', '28', '52-57', '11-13', '24-27', '78']


100it [10:09,  7.36s/it]

['39-45', '52-57', '71-73', '29-31', '46-51', '78', '14-20', 'V40-V49', '32-35', '68-70']


101it [10:19,  7.97s/it]

['39-45', '24-27', 'V40-V49', '78', '58-62', '46-51', '68-70', '28', '79', '14-20', '84-95']


102it [10:27,  8.05s/it]

['39-45', '28', '46-51', '11-13', '24-27', '79', 'V10-V19', 'V40-V49']


103it [10:36,  8.39s/it]

['78', '46-51', '39-45', '52-57', '24-27', '14-20', 'V40-V49']


104it [10:45,  8.39s/it]

['78', '39-45', '24-27', '46-51', '52-57', 'V40-V49']


105it [10:53,  8.35s/it]

['52-57', '39-45', '24-27', '11-13', '28', '78', '14-20', '58-62']


106it [11:01,  8.38s/it]

['46-51', '14-20', '24-27', '78', '79', '29-31', '96-99']


107it [11:11,  8.71s/it]

['78', '29-31', '46-51', '52-57', '39-45', '11-13', '24-27', '28']


108it [11:19,  8.59s/it]

['78', '46-51', '39-45', '24-27', '11-13', 'V40-V49']


110it [11:28,  6.55s/it]

['39-45', '46-51', '24-27', '28', '78']


111it [11:36,  7.01s/it]

['32-35', '39-45', '46-51', '78', '79', '24-27', '29-31', '14-20']


112it [11:44,  7.31s/it]

['28', '78', '46-51', '24-27', '79', '39-45', 'V40-V49']


113it [11:53,  7.83s/it]

['39-45', '46-51', '14-20', '24-27', '28', '39-45', '58-62', '52-57', '68-70', '71-73', '78', '79']


115it [12:02,  6.41s/it]

['78', '46-51', '39-45', '32-35', '11-13', '24-27', '52-57', '68-70']


116it [12:11,  6.84s/it]

['78', '24-27', '39-45', '46-51', '79']


117it [12:20,  7.46s/it]

['52-57', '21-23', '39-45', '24-27', '78', '14-20', '79']


119it [12:29,  6.12s/it]

['39-45', '46-51', '29-31', '78', 'V10-V19', 'V40-V49', '24-27', '36-38']


120it [12:37,  6.59s/it]

['39-45', '24-27', '78', '28', '46-51', 'V40-V49', 'V10-V19', '79']


121it [12:44,  6.71s/it]

['38-39', '21-23', '39-45', '67', '79']


122it [12:52,  7.10s/it]

['52-57', '39-45', '24-27', '71-73', '58-62', '14-20', '78', '79']


123it [13:01,  7.69s/it]

['39-45', '46-51', '28', '78', '24-27', '79', '11-13', '14-20', '68-70', '32-35']


124it [13:10,  8.12s/it]

['78', '46-51', '29-31', '96-99', '39-45', 'V40-V49', '11-13', '24-27', '14-20', '28']


125it [13:19,  8.12s/it]

['78', '79', '24-27', '39-45', '52-57', '68-70', '71-73', '80-83']


126it [13:28,  8.43s/it]

['28', '29-31', '32-35', '39-45', '46-51', '52-57', '71-73', '78', '79', '84-95', '96-99']


127it [13:36,  8.37s/it]

['39-45', '46-51', '58-62', '68-70', '78', 'V10-V19', 'V40-V49', '79']


128it [13:45,  8.64s/it]

['78', '32-35', '39-45', '29-31', '24-27', '46-51', '14-20', '21-23', '68-70', 'V40-V49']


129it [13:56,  9.19s/it]

['78', '24-27', '71-73', '46-51', '39-45', '52-57', '28', '11-13', '14-20', '58-62']


130it [14:04,  8.94s/it]

['78', '58-62', '39-45', '52-57', '28', '11-13', '24-27', '00-10', '79']


131it [14:13,  9.05s/it]

['46-51', '78', '29-31', '96-99', '32-35', '39-45', '52-57', '28', '24-27', '46-51']


132it [14:23,  9.18s/it]

['52-57', '46-51', '39-45', '14-20', '28', '68-70', '78', 'V40-V49', 'V50-V59', '96-99']


133it [14:31,  8.91s/it]

['39-45', '46-51', '78', '24-27', '14-20', '29-31']


134it [14:39,  8.71s/it]

['78', '52-57', '39-45', '46-51', '24-27', '00-10', '21-23', '28', '36-38']


136it [14:49,  6.83s/it]

['39-45', '46-51', '24-27', '52-57', '78', '29-31', '32-35', '58-62']


137it [14:57,  7.23s/it]

['39-45', '71-73', 'V40-V49', '29-31', '78', '46-51', '96-99', '58-62', '32-35', 'V50-V59', '68-70', 'V10-V19', '52-57', '36-38', '28', '79', '24-27', '00-10']


140it [15:06,  5.08s/it]

['46-51', '39-45', '24-27', 'V78-V78', '28', '78', 'V10-V19', 'V40-V49']


141it [15:15,  5.69s/it]

['28', '52-57', '21-23', '39-45', '78', '14-20']


142it [15:23,  6.26s/it]

['46-51', '14-20', '39-45', '11-13', '78', '24-27', '28', 'V40-V49']


143it [15:32,  6.97s/it]

['39-45', '46-51', '24-27', '78', 'V40-V49', 'V60-V69']


144it [15:40,  7.26s/it]

['46-51', '39-45', '24-27', '28', '79']


145it [15:48,  7.50s/it]

['80-83', '14-20', '96-99', '46-51', '28', '78', '79', '24-27', '84-95']


146it [15:58,  8.03s/it]

['39-45', '46-51', '78', 'V40-V49', 'V70-V82']


147it [16:07,  8.32s/it]

['39-45', '46-51', '78', '68-70', '24-27', '11-13']


148it [16:15,  8.29s/it]

Here is the output:

['78', '39-45', '46-51', '71-73', '84-95']


149it [16:25,  8.84s/it]

['14-20', '46-51', '39-45', '78', '71-73', '24-27', '11-13', '28']


150it [16:34,  8.95s/it]

['78', '79', '52-57', '39-45', '28', '24-27', '68-70', '32-35', '46-51', '80-83']


151it [16:44,  9.06s/it]

['78', '39-45', '46-51', '24-27', '29-31', 'V40-V49', 'V60-V69']


153it [16:52,  6.83s/it]

['14-20', '29-31', '78', '46-51', '96-99', '71-73', '58-62', '32-35', '39-45', 'V10-V19', '52-57', '28', '79', '11-13', '24-27', '00-10']


154it [17:01,  7.46s/it]

['52-57', '39-45', '28', '24-27', '46-51', '79', '78', '11-13', '29-31']


155it [17:11,  7.90s/it]

['78', '24-27', '39-45', '46-51', '29-31', '52-57', '79', '32-35', '74-75', '84-95', '96-99']


156it [17:20,  8.23s/it]

['14-20', '39-45', '46-51', '58-62', '68-70', '71-73', '78', '79', '96-99']


157it [17:29,  8.48s/it]

['00-10', '24-27', '28', '39-45', '52-57', '78', '80-83', '84-95', '96-99']


158it [17:50, 12.16s/it]

['39-45', '78', '46-51', '28', '52-57', '24-27', 'V40-V49']


160it [17:51,  6.89s/it]

['14-20', '21-23', 'V40-V49', '78', '46-51', '32-35', '39-45', 'V10-V19', '28', '11-13', '24-27', '52-57']


161it [17:56,  6.47s/it]

['52-57', '28', '39-45', '58-62', '21-23', '46-51', '52-57', '14-20']


162it [18:04,  6.91s/it]

['32-35', '39-45', '28', '78', '24-27']


164it [18:14,  5.98s/it]

['V40-V49', '78', '39-45', '46-51', '96-99', '32-35', 'V50-V59', '68-70', '28', '79', '11-13', '24-27', '00-10']


165it [18:22,  6.51s/it]

['39-45', '46-51', '24-27', '36-38', '79', '78', 'V40-V49']


167it [18:32,  5.81s/it]

['39-45', '29-31', '71-73', '46-51', '96-99', '32-35', '14-20', '58-62', '68-70', '78', '80-83', 'V10-V19', '52-57', '28', '79', '11-13', '24-27', '00-10']


168it [18:41,  6.58s/it]

['78', '39-45', '71-73', '52-57', '46-51', '29-31', '32-35', '11-13', '24-27', '14-20']


169it [18:50,  7.27s/it]

['58-62', '14-20', '78', '32-35', '39-45', '28', '24-27', '52-57']


170it [18:59,  7.52s/it]

['29-31', '39-45', '46-51', '52-57', '58-62', '78', '79', '96-99']


171it [19:07,  7.80s/it]

['39-45', '46-51', 'V40-V49', 'V50-V59', '68-70', '78', '79', '28', '24-27', '52-57']


172it [19:15,  7.91s/it]

['52-57', '24-27', '39-45', '78', '14-20', '46-51', '11-13', '79']


173it [19:26,  8.60s/it]

['46-51', '39-45', '24-27', '78', '14-20', '28', '96-99']


174it [19:36,  9.13s/it]

['39-45', '29-31', '78', '46-51', '96-99', '71-73', '58-62', '32-35', 'V50-V59', '68-70', '39-45', 'V60-V69', 'V10-V19', '52-57', '36-38', '28', '79', '24-27']


175it [19:44,  8.86s/it]

['46-51', '39-45', '14-20', '78', '29-31']


176it [19:53,  8.67s/it]

['52-57', '39-45', '24-27', '28', '78', '46-51', '96-99', '58-62', '68-70', '79']


177it [20:02,  8.82s/it]

['39-45', '46-51', '24-27', '52-57', '78', 'V40-V49']


178it [20:10,  8.60s/it]

['52-57', '39-45', '28', '24-27', '46-51', '58-62', '78', '32-35']


180it [20:19,  6.74s/it]

['39-45', '46-51', '78', '52-57', '28', '24-27', '96-99']


181it [20:28,  7.31s/it]

['46-51', '24-27', '14-20', '71-73', '28', '78', '39-45', '52-57']


182it [20:34,  6.98s/it]

['71-73', '78', '36-38', '24-27', '29-31']


183it [20:43,  7.56s/it]

['14-20', '78', '32-35', '39-45', '80-83', '28', '24-27', '79', '14-20', '68-70', '96-99']


184it [20:52,  8.01s/it]

['78', '39-45', '24-27', '28', '29-31', '52-57', '58-62', '68-70', '79', '80-83', '84-95', 'V40-V49', 'V50-V59']


185it [21:01,  8.08s/it]

['11-13', '78', '52-57', '39-45', '24-27', '36-38', '46-51', 'V10-V19']


186it [21:10,  8.40s/it]

['39-45', '78', '32-35', '28', '24-27', '46-51']


187it [21:18,  8.39s/it]

['78', '21-23', '39-45', '52-57', '24-27', '36-38', '11-13']


188it [21:26,  8.33s/it]

['28', '78', '46-51', '24-27', '39-45', '52-57', '28', '79']


189it [21:36,  8.63s/it]

['32-35', '39-45', '46-51', '52-57', '78', '79', 'V40-V49', 'V50-V59', 'V60-V69']


190it [21:42,  7.90s/it]

['39-45', '21-23', '11-13', '78', '14-20', '46-51']


193it [21:51,  5.17s/it]

['78', '32-35', '39-45', '79', '24-27', '11-13', '68-70']


194it [21:58,  5.64s/it]

['57-57', '52-57', '786-79', '21-23', '46-51', '39-45', '14-20', '24-27']


199it [22:07,  3.36s/it]

['78', '32-35', '39-45', '52-57', '68-70', '71-73', '79', '84-95', '96-99']


200it [41:50, 176.10s/it]

['78', '32-35', '68-70', '39-45', '79', '24-27', '11-13']


200it [41:51, 12.56s/it] 

['39-45', '46-51', '52-57', '24-27', '78', '79', 'V10-V19', 'V40-V49']
Number of rows : 154
[['24-27', '14-20'], ['24-27', '52-57', '46-51', '74-75', '32-35', '39-45', 'V40-V49', '29-31'], ['28', '84-95', '52-57', '96-99', '46-51', '39-45', '00-10'], ['24-27', '28', '96-99', '52-57', '58-62', '39-45', '29-31'], ['24-27', '28', '78', '58-62', '39-45', 'V40-V49'], ['24-27', '78', '32-35', '39-45', '29-31'], ['78', '52-57', '96-99', '58-62', '29-31', '00-10'], ['24-27', '78', '46-51', '39-45', '00-10'], ['24-27', '78', '29-31', '39-45'], ['78', '96-99', '46-51', '39-45', '14-20', '00-10'], ['28', '11-13', '96-99', '46-51', '58-62', '39-45', '00-10'], ['24-27', '52-57', '46-51', '58-62', '39-45', '00-10'], ['46-51', '28', '58-62', '39-45'], ['32-35', '84-95'], ['24-27', '28', '11-13', '36-38', '39-45', '21-23'], ['24-27', '78', '71-73', '84-95', '96-99', '39-45'], ['24-27', '28', '71-73', '78', '96-99', '46-51', '79', '58-62', '68-70', '29-31'], ['24-27', '28', '78', '52-57', '46-51', '79'




Macro Averaged AUROC Score: 0.5109374285884725
Micro Averaged Precision: 0.20837487537387836
Micro Averaged Recall: 0.40543161978661496
Micro Averaged F1-Score: 0.2752716496542641
Macro Averaged Precision: 0.1178654710859196
Macro Averaged Recall: 0.20021469407742604
Macro Averaged F1-Score: 0.11538564445445447