diff --git a/README.md b/README.md
index abc5879..269d01a 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ The following additional libraries need to be installed in order to use the prog
 + scipy (v1.10.1)
 + scikit-learn (v1.3.0)
 
-If a newer scikit-learn version is used the models provided should still work (they were created using the version v0.19.1 and the import was tested with v0.21.3 and v0.22.2).
+If a newer scikit-learn version is used it is advised to create a new model with the newer scikit version.
 
 
 ## Annotation resources and tools
diff --git a/aidiva/helper_modules/convert_indels_to_snps_and_create_vcf.py b/aidiva/helper_modules/convert_indels_to_snps_and_create_vcf.py
index 66c314d..b159325 100644
--- a/aidiva/helper_modules/convert_indels_to_snps_and_create_vcf.py
+++ b/aidiva/helper_modules/convert_indels_to_snps_and_create_vcf.py
@@ -5,27 +5,31 @@
 import random
 
 
+RANDOM_SEED = 14038
+
 logger = logging.getLogger(__name__)
 
 
 def write_data_information_to_file(input_data, outfile, ref_sequence, header):
-    data_grouped = [group for key, group in input_data.groupby("CHROM")]
+    data_grouped = [group for key, group in input_data.groupby("#CHROM")]
     in_fasta = pysam.FastaFile(ref_sequence)
-    random.seed(14038)
+    random.seed(RANDOM_SEED)
 
     for line in header:
         if line.startswith("#CHROM"):
             outfile.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
+
         else:
             outfile.write(line)
 
     for group in data_grouped:
-        if "chr" in str(group["CHROM"].iloc[0]):
-            chrom_id = str(group["CHROM"].iloc[0])
+        if "chr" in str(group["#CHROM"].iloc[0]):
+            chrom_id = str(group["#CHROM"].iloc[0])
+
         else:
-            chrom_id = "chr" + str(group["CHROM"].iloc[0])
+            chrom_id = "chr" + str(group["#CHROM"].iloc[0])
 
-        for row in group.itertuples():
+        for row in group.itertuples(index=False):
             # make sure that the window starts at the beginning of the reference sequence
             window_start = max(int(row.POS) - 3, 1)
 
@@ -37,47 +41,51 @@ def write_data_information_to_file(input_data, outfile, ref_sequence, header):
                 alt_variant = ""
                 if (extended_ref_seq[i] == "A") or (extended_ref_seq[i] == "T"):
                     alt_variant = random.choice(["G", "C"])
+
                 elif (extended_ref_seq[i] == "G") or (extended_ref_seq[i] == "C"):
                     alt_variant = random.choice(["A", "T"])
+
                 elif (extended_ref_seq[i] == "N"):
                     logger.debug("Reference base was skipped because it was 'N'!")
                     continue
+
                 else:
                     logger.error("The given reference sequence seems to be corrupted!")
 
-                outfile.write(str(row.CHROM).strip() + "\t" + str(window_start + i + 1).strip() + "\t" + "." + "\t" + str(extended_ref_seq[i]).strip() + "\t" + str(alt_variant).strip() + "\t" + "." + "\t" + "." + "\t" + str(row.INFO).strip() + "\n")
+                outfile.write(str(chrom_id).strip() + "\t" + str(window_start + i + 1).strip() + "\t" + "." + "\t" + str(extended_ref_seq[i]).strip() + "\t" + str(alt_variant).strip() + "\t" + "." + "\t" + "." + "\t" + str(row.INFO).strip() + "\n")
 
 
 def import_vcf_data(in_data):
     header_line = ""
     comment_lines = []
 
+    # WARNING: reset file pointer to begin after reading header to start processing from the start again (is done by closing the file before reading again)
     with open(in_data, "r") as input_vcf:
         # extract header from vcf file
         for line in input_vcf:
             if line.strip().startswith("##"):
                 comment_lines.append(line)
-            if line.strip().startswith("#CHROM"):
+
+            elif line.strip().startswith("#CHROM"):
                 header_line = line.strip()
                 comment_lines.append(line)
                 break # now the variant entries are coming
+
             else:
                 continue
 
         if header_line == "":
             logger.error("The VCF seems to be corrupted, missing header line!")
 
-        # reset file pointer to begin reading at the beginning (is done by closing the file before reading again)
-
     data = pd.read_csv(in_data, names=header_line.split("\t"), sep="\t", comment="#", low_memory=False)
     data.fillna(".", inplace=True)
-    data = data.rename(columns={"#CHROM": "CHROM"})
 
     return data, comment_lines
 
 
 def convert_indel_vcf_to_expanded_indel_vcf(in_data, out_data, ref_folder):
     input_data, header = import_vcf_data(in_data)
+
     with open(out_data, "w", newline="") as outfile:
         write_data_information_to_file(input_data, outfile, ref_folder, header)
 
diff --git a/aidiva/helper_modules/convert_vcf_to_csv.py b/aidiva/helper_modules/convert_vcf_to_csv.py
index fc8848b..e3e6077 100644
--- a/aidiva/helper_modules/convert_vcf_to_csv.py
+++ b/aidiva/helper_modules/convert_vcf_to_csv.py
@@ -2,6 +2,7 @@
 import logging
 import multiprocessing as mp
 import numpy as np
+import os
 import pandas as pd
 import tempfile
 
@@ -77,11 +78,11 @@
                     "RF_SCORE",
                     "SpliceAI",
                     "oe_lof",
+                    "oe_mis",
+                    "oe_syn",
                     "SegDup",
                     "SimpleRepeats",
                     "CLINVAR_DETAILS",
-                    #"DETAILS",
-                    #"CLASS",
                     "HGMD_CLASS",
                     "HGMD_RANKSCORE",
                     "OMIM",
@@ -96,17 +97,20 @@ def reformat_vcf_file_and_read_into_pandas_and_extract_header(filepath):
 
     with open(filepath, "r") as vcf_file_to_reformat, tempfile.NamedTemporaryFile(mode="w+") as tmp:
         # make sure that there are no unwanted linebreaks in the variant entries
-        tmp.write(vcf_file_to_reformat.read().replace(r"(\n(?!((((((chr)?[0-9]{1,2}|(chr)?[xXyY]{1}|(chr)?(MT|mt){1})\t)(.+\t){6,}(.+(\n|\Z))))|(#{1,2}.*(\n|\Z))|(\Z))))", ""))
-        tmp.seek(0)
+        ## TODO: remove regular expression, should not be needed anymore
+        #tmp.write(vcf_file_to_reformat.read().replace(r"(\n(?!((((((chr)?[0-9]{1,2}|(chr)?[xXyY]{1}|(chr)?(MT|mt){1})\t)(.+\t){6,}(.+(\n|\Z))))|(#{1,2}.*(\n|\Z))|(\Z))))", ""))
+        #tmp.seek(0)
 
         # extract header from vcf file
-        for line in tmp:
+        for line in vcf_file_to_reformat:
             if line.strip().startswith("##"):
                 comment_lines.append(line.strip())
-            if line.strip().startswith("#CHROM"):
+
+            elif line.strip().startswith("#CHROM"):
                 header_line = line.strip()
                 comment_lines.append(header_line)
                 break # now the variant entries are coming
+
             else:
                 continue
 
@@ -116,8 +120,6 @@ def reformat_vcf_file_and_read_into_pandas_and_extract_header(filepath):
         vcf_header = header_line.strip().split("\t")
         vcf_as_dataframe = pd.read_csv(tmp.name, names=vcf_header, sep="\t", comment="#", low_memory=False)
 
-    #vcf_as_dataframe = vcf_as_dataframe.rename(columns={"#CHROM": "CHROM"})
-
     return comment_lines, vcf_as_dataframe
 
 
@@ -171,6 +173,8 @@ def extract_columns(cell, process_indel):
     spliceAI = np.nan
     spliceAI_raw = []
     oe_lof = np.nan
+    oe_mis = np.nan
+    oe_syn = np.nan
     simpleRepeat = ""
     clinvar_details = ""
     hgmd_class = ""
@@ -179,9 +183,6 @@ def extract_columns(cell, process_indel):
     annotation = ""
     repeat_masker = ""
 
-    #details = ""
-    #class_orig = ""
-
     for field in info_fields:
         field_splitted = field.split("=")
 
@@ -202,132 +203,154 @@ def extract_columns(cell, process_indel):
                 elif field_splitted[0] == "FATHMM_XF":
                     if "&" in field_splitted[1]:
                         fathmm_xf = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         fathmm_xf = float(field_splitted[1])
 
                 elif field_splitted[0] == "CONDEL":
                     if "&" in field_splitted[1]:
                         condel = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         condel = float(field_splitted[1])
 
                 elif field_splitted[0] == "EIGEN_PHRED":
                     if "&" in field_splitted[1]:
                         eigen_phred = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         eigen_phred = float(field_splitted[1])
 
                 elif field_splitted[0] == "MutationAssessor":
                     if "&" in field_splitted[1]:
                         mutation_assessor = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         mutation_assessor = float(field_splitted[1])
 
                 elif field_splitted[0] == "REVEL":
                     if "&" in field_splitted[1]:
                         revel = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         revel = float(field_splitted[1])
 
                 elif field_splitted[0] == "phyloP_primate":
                     if "&" in field_splitted[1]:
                         phyloP_primate = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         phyloP_primate = float(field_splitted[1])
 
                 elif field_splitted[0] == "phyloP_mammal":
                     if "&" in field_splitted[1]:
                         phyloP_mammal = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         phyloP_mammal = float(field_splitted[1])
 
                 elif field_splitted[0] == "phyloP_vertebrate":
                     if "&" in field_splitted[1]:
                         phyloP_vertebrate = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         phyloP_vertebrate = float(field_splitted[1])
 
                 elif field_splitted[0] == "phastCons_primate":
                     if "&" in field_splitted[1]:
                         phastCons_primate = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         phastCons_primate = float(field_splitted[1])
 
                 elif field_splitted[0] == "phastCons_mammal":
                     if "&" in field_splitted[1]:
                         phastCons_mammal = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         phastCons_mammal = float(field_splitted[1])
 
                 elif field_splitted[0] == "phastCons_vertebrate":
                     if "&" in field_splitted[1]:
                         phastCons_vertebrate = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         phastCons_vertebrate = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_Hom":
                     if "&" in field_splitted[1]:
                         gnomAD_hom = float(field_splitted[1].split("&")[0])
+
                     else:
                         gnomAD_hom = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_AN":
                     if "&" in field_splitted[1]:
                         gnomAD_an = float(field_splitted[1].split("&")[0])
+
                     else:
                         gnomAD_an = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_AFR_AF":
                     if "&" in field_splitted[1]:
                         gnomAD_afr_af = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         gnomAD_afr_af = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_AMR_AF":
                     if "&" in field_splitted[1]:
                         gnomAD_amr_af = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         gnomAD_amr_af = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_EAS_AF":
                     if "&" in field_splitted[1]:
                         gnomAD_eas_af = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         gnomAD_eas_af = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_NFE_AF":
                     if "&" in field_splitted[1]:
                         gnomAD_nfe_af = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         gnomAD_nfe_af = float(field_splitted[1])
 
                 elif field_splitted[0] == "gnomAD_SAS_AF":
                     if "&" in field_splitted[1]:
                         gnomAD_sas_af = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         gnomAD_sas_af = float(field_splitted[1])
 
                 elif field_splitted[0] == "CAPICE":
                     if "&" in field_splitted[1]:
                         capice = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         capice = float(field_splitted[1])
 
                 elif field_splitted[0] == "CADD":
                     if "&" in field_splitted[1]:
                         cadd = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         cadd = float(field_splitted[1])
 
                 elif field_splitted[0] == "ADA_SCORE":
                     if "&" in field_splitted[1]:
                         ada = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         ada = float(field_splitted[1])
 
                 elif field_splitted[0] == "RF_SCORE":
                     if "&" in field_splitted[1]:
                         rf = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         rf = float(field_splitted[1])
 
@@ -338,6 +361,14 @@ def extract_columns(cell, process_indel):
                 elif field_splitted[0] == "oe_lof":
                     oe_lof = min([float(value) for value in field_splitted[1].split("&")  if (value != "." and value != "nan" and value !="")], default=np.nan)
 
+                ## currently not used
+                elif field_splitted[0] == "oe_mis":
+                    oe_mis = min([float(value) for value in field_splitted[1].split("&")  if (value != "." and value != "nan" and value !="")], default=np.nan)
+
+                ## currently not used
+                elif field_splitted[0] == "oe_syn":
+                    oe_syn = min([float(value) for value in field_splitted[1].split("&")  if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                 elif field_splitted[0] == "SegDup":
                     segDup = max([float(value) for value in field_splitted[1].split("&")  if (value != "." and value != "nan" and value !="")], default=np.nan)
 
@@ -347,18 +378,13 @@ def extract_columns(cell, process_indel):
                 elif field_splitted[0] == "CLINVAR_DETAILS":
                     clinvar_details = field_splitted[1]
 
-                #elif field_splitted[0] == "DETAILS":
-                #    details = field_splitted[1]
-
-                #elif field_splitted[0] == "CLASS":
-                #    class_orig = field_splitted[1]
-
                 elif field_splitted[0] == "HGMD_CLASS":
                     hgmd_class = field_splitted[1]
 
                 elif field_splitted[0] == "HGMD_RANKSCORE":
                     if "&" in field_splitted[1]:
                         hgmd_rankscore = max([float(value) for value in field_splitted[1].split("&") if (value != "." and value != "nan" and value !="")], default=np.nan)
+
                     else:
                         hgmd_rankscore = float(field_splitted[1])
 
@@ -383,28 +409,40 @@ def extract_columns(cell, process_indel):
     max_af = max([gnomAD_afr_af, gnomAD_amr_af, gnomAD_eas_af, gnomAD_nfe_af, gnomAD_sas_af], default=np.nan)
 
     if process_indel:
-        extracted_columns = [indel_ID, annotation, fathmm_xf, condel, eigen_phred, mutation_assessor, revel, phyloP_primate, phyloP_mammal, phyloP_vertebrate, phastCons_primate, phastCons_mammal, phastCons_vertebrate, gnomAD_homAF, gnomAD_afr_af, gnomAD_amr_af, gnomAD_eas_af, gnomAD_nfe_af, gnomAD_sas_af, max_af, capice, cadd, oe_lof, segDup, simpleRepeat, ada, rf, spliceAI, repeat_masker, clinvar_details, hgmd_class, hgmd_rankscore, omim_details] #, details, class_orig] #, abb_score]
+        extracted_columns = [indel_ID, annotation, fathmm_xf, condel, eigen_phred, mutation_assessor, revel, phyloP_primate, phyloP_mammal, phyloP_vertebrate, phastCons_primate, phastCons_mammal, phastCons_vertebrate, gnomAD_homAF, gnomAD_afr_af, gnomAD_amr_af, gnomAD_eas_af, gnomAD_nfe_af, gnomAD_sas_af, max_af, capice, cadd, oe_lof, oe_mis, oe_syn, segDup, simpleRepeat, ada, rf, spliceAI, repeat_masker, clinvar_details, hgmd_class, hgmd_rankscore, omim_details] #, details, class_orig] #, abb_score]
 
     else:
-        extracted_columns = [annotation, fathmm_xf, condel, eigen_phred, mutation_assessor, revel, phyloP_primate, phyloP_mammal, phyloP_vertebrate, phastCons_primate, phastCons_mammal, phastCons_vertebrate, gnomAD_homAF, gnomAD_afr_af, gnomAD_amr_af, gnomAD_eas_af, gnomAD_nfe_af, gnomAD_sas_af, max_af, capice, cadd, oe_lof, segDup, simpleRepeat, ada, rf, spliceAI, repeat_masker, clinvar_details, hgmd_class, hgmd_rankscore, omim_details] #, details, class_orig, abb_score]
+        extracted_columns = [annotation, fathmm_xf, condel, eigen_phred, mutation_assessor, revel, phyloP_primate, phyloP_mammal, phyloP_vertebrate, phastCons_primate, phastCons_mammal, phastCons_vertebrate, gnomAD_homAF, gnomAD_afr_af, gnomAD_amr_af, gnomAD_eas_af, gnomAD_nfe_af, gnomAD_sas_af, max_af, capice, cadd, oe_lof, oe_mis, oe_syn, segDup, simpleRepeat, ada, rf, spliceAI, repeat_masker, clinvar_details, hgmd_class, hgmd_rankscore, omim_details] #, details, class_orig, abb_score]
 
     return extracted_columns
 
 
-def extract_vep_annotation(cell, annotation_header):
+def extract_vep_annotation(cell, annotation_header, canonical_transcripts=[]):
     annotation_fields = str(cell["CSQ"]).split(",")
     new_cols = []
 
-    # choose the most severe annotation variant
-    # if new consequence terms were added to the database that are not yet handled from aiDIVA use default consequence "unknown" with lowest severity value
+    
+
     if (len(annotation_fields) >= 1) and (annotation_fields[0] != ""):
-        consequences = [min([VARIANT_CONSEQUENCES.get(consequence) if consequence in VARIANT_CONSEQUENCES.keys() else VARIANT_CONSEQUENCES.get("unknown") for consequence in field.split("|")[annotation_header.index("Consequence")].split("&")]) for field in annotation_fields]
-        target_index = min(enumerate(consequences), key=itemgetter(1))[0]
-        new_cols = annotation_fields[target_index].strip().split("|")
+    
+        if canonical_transcripts:
+            for annotation in annotation_fields:
+                transcript_index = annotation_header.index("Feature")
+
+                if annotation.split("|")[transcript_index] in canonical_transcripts:
+                    new_cols = annotation.split("|")
+                    break
+
+        if new_cols == []:
+            # choose the most severe annotation variant
+            # if new consequence terms were added to the database that are not yet handled from aiDIVA use default consequence "unknown" with lowest severity value
+            consequences = [min([VARIANT_CONSEQUENCES.get(consequence) if consequence in VARIANT_CONSEQUENCES.keys() else VARIANT_CONSEQUENCES.get("unknown") for consequence in field.split("|")[annotation_header.index("Consequence")].split("&")]) for field in annotation_fields]
+            target_index = min(enumerate(consequences), key=itemgetter(1))[0]
+            new_cols = annotation_fields[target_index].strip().split("|")
 
     else:
         # can happen with the expanded InDels
-        logger.warn("Empty VEP annotation!")
+        logger.warning("Empty VEP annotation!")
 
     return new_cols
 
@@ -422,18 +460,23 @@ def extract_sample_information(row, sample, sample_header=None):
                 splitted_sample_information = sample_dict[sample_id].split("|")
                 if splitted_sample_information[0] == "wt":
                     sample_information.append("0/0")
+
                 elif splitted_sample_information[0] == "hom":
                     sample_information.append("1/1")
+
                 elif splitted_sample_information[0] == "het":
                     sample_information.append("0/1")
+
                 else:
                     logger.warning("Genotype not recognized! (%s)" % (splitted_sample_information[0]))
 
                 sample_information.append(splitted_sample_information[1])
                 sample_information.append(splitted_sample_information[2])
                 sample_information.append(sample_id + "=" + sample_dict[sample_id])
+
         else:
             logger.error("Format is MULTI but no sample_header is given!")
+
     else:
         format_entries = str(row["FORMAT"]).strip().split(":")
         sample_fields = str(row[sample + ".full"]).strip().split(":")
@@ -445,23 +488,27 @@ def extract_sample_information(row, sample, sample_header=None):
 
         if "GT" in format_entries:
             sample_gt_information = sample_fields[format_entries.index("GT")]
+        
         else:
             sample_gt_information = "./."
 
         if "DP" in format_entries:
             sample_dp_information = sample_fields[format_entries.index("DP")]
+
         else:
             sample_dp_information = "."
 
         if "AD" in format_entries:
             sample_ref_information = sample_fields[format_entries.index("AD")].split(",")[0]
             sample_alt_information = sample_fields[format_entries.index("AD")].split(",")[1]
+
         else:
             sample_ref_information = "."
             sample_alt_information = "."
 
         if "GQ" in format_entries:
             sample_gq_information = sample_fields[format_entries.index("GQ")]
+
         else:
             sample_gq_information = "."
 
@@ -469,6 +516,7 @@ def extract_sample_information(row, sample, sample_header=None):
             divisor = (int(sample_ref_information) + int(sample_alt_information))
             if divisor == 0:
                 sample_af_information = 0
+
             else:
                 sample_af_information = (int(sample_alt_information) / divisor)
 
@@ -568,28 +616,22 @@ def convert_variant_representation(row):
 
 
 def add_INFO_fields_to_dataframe(process_indel, expanded_indel, vcf_as_dataframe):
-    indel_annotation_columns = ["INDEL_ID", "CSQ", "FATHMM_XF", "CONDEL", "EIGEN_PHRED", "MutationAssessor", "REVEL", "phyloP_primate", "phyloP_mammal", "phyloP_vertebrate", "phastCons_primate", "phastCons_mammal", "phastCons_vertebrate", "homAF", "gnomAD_AFR_AF", "gnomAD_AMR_AF", "gnomAD_EAS_AF", "gnomAD_NFE_AF", "gnomAD_SAS_AF", "MAX_AF", "CAPICE", "CADD_PHRED", "oe_lof", "segmentDuplication", "simpleRepeat", "ada_score", "rf_score", "SpliceAI", "REPEATMASKER", "CLINVAR_DETAILS", "HGMD_CLASS", "HGMD_RANKSCORE", "OMIM"]
-    snp_annotation_columns = ["CSQ", "FATHMM_XF", "CONDEL", "EIGEN_PHRED", "MutationAssessor", "REVEL", "phyloP_primate", "phyloP_mammal", "phyloP_vertebrate", "phastCons_primate", "phastCons_mammal", "phastCons_vertebrate", "homAF", "gnomAD_AFR_AF", "gnomAD_AMR_AF", "gnomAD_EAS_AF", "gnomAD_NFE_AF", "gnomAD_SAS_AF", "MAX_AF", "CAPICE", "CADD_PHRED", "oe_lof", "segmentDuplication", "simpleRepeat", "ada_score", "rf_score", "SpliceAI", "REPEATMASKER", "CLINVAR_DETAILS", "HGMD_CLASS", "HGMD_RANKSCORE", "OMIM"]
+    indel_annotation_columns = ["INDEL_ID", "CSQ", "FATHMM_XF", "CONDEL", "EIGEN_PHRED", "MutationAssessor", "REVEL", "phyloP_primate", "phyloP_mammal", "phyloP_vertebrate", "phastCons_primate", "phastCons_mammal", "phastCons_vertebrate", "homAF", "gnomAD_AFR_AF", "gnomAD_AMR_AF", "gnomAD_EAS_AF", "gnomAD_NFE_AF", "gnomAD_SAS_AF", "MAX_AF", "CAPICE", "CADD_PHRED", "oe_lof", "oe_mis", "oe_syn", "segmentDuplication", "simpleRepeat", "ada_score", "rf_score", "SpliceAI", "REPEATMASKER", "CLINVAR_DETAILS", "HGMD_CLASS", "HGMD_RANKSCORE", "OMIM"]
+    snp_annotation_columns = ["CSQ", "FATHMM_XF", "CONDEL", "EIGEN_PHRED", "MutationAssessor", "REVEL", "phyloP_primate", "phyloP_mammal", "phyloP_vertebrate", "phastCons_primate", "phastCons_mammal", "phastCons_vertebrate", "homAF", "gnomAD_AFR_AF", "gnomAD_AMR_AF", "gnomAD_EAS_AF", "gnomAD_NFE_AF", "gnomAD_SAS_AF", "MAX_AF", "CAPICE", "CADD_PHRED", "oe_lof", "oe_mis", "oe_syn", "segmentDuplication", "simpleRepeat", "ada_score", "rf_score", "SpliceAI", "REPEATMASKER", "CLINVAR_DETAILS", "HGMD_CLASS", "HGMD_RANKSCORE", "OMIM"]
 
     if process_indel:
         if not expanded_indel:
             vcf_as_dataframe["GSVAR_VARIANT"] = vcf_as_dataframe.apply(lambda row: convert_variant_representation(row), axis=1)
             vcf_as_dataframe[indel_annotation_columns] = vcf_as_dataframe["INFO"].apply(lambda x: pd.Series(extract_columns(x, process_indel)))
             vcf_as_dataframe["IS_INDEL"] = 1
-            #vcf_as_dataframe["HIGH_IMPACT"] = vcf_as_dataframe.apply(lambda row: specify_impact_class(row), axis=1)
-            #vcf_as_dataframe["MOST_SEVERE_CONSEQUENCE"] = vcf_as_dataframe.apply(lambda row: get_most_severe_consequence(row), axis=1)
 
         else:
             vcf_as_dataframe[indel_annotation_columns] = vcf_as_dataframe["INFO"].apply(lambda x: pd.Series(extract_columns(x, process_indel)))
-            #vcf_as_dataframe["HIGH_IMPACT"] = vcf_as_dataframe.apply(lambda row: specify_impact_class(row), axis=1)
-            #vcf_as_dataframe["MOST_SEVERE_CONSEQUENCE"] = vcf_as_dataframe.apply(lambda row: get_most_severe_consequence(row), axis=1)
 
     else:
         vcf_as_dataframe["GSVAR_VARIANT"] = vcf_as_dataframe.apply(lambda row: convert_variant_representation(row), axis=1)
         vcf_as_dataframe[snp_annotation_columns] = vcf_as_dataframe["INFO"].apply(lambda x: pd.Series(extract_columns(x, process_indel)))
         vcf_as_dataframe["IS_INDEL"] = 0
-        #vcf_as_dataframe["HIGH_IMPACT"] = vcf_as_dataframe.apply(lambda row: specify_impact_class(row), axis=1)
-        #vcf_as_dataframe["MOST_SEVERE_CONSEQUENCE"] = vcf_as_dataframe.apply(lambda row: get_most_severe_consequence(row), axis=1)
 
     vcf_as_dataframe = vcf_as_dataframe.drop(columns=["INFO"])
 
@@ -620,12 +662,13 @@ def specify_impact_class(row):
     
     if variant_impact == "HIGH":
         return 1
+
     else:
         return 0
 
 
-def add_VEP_annotation_to_dataframe(annotation_header, vcf_as_dataframe):
-    vcf_as_dataframe[annotation_header] = vcf_as_dataframe.apply(lambda x: pd.Series(extract_vep_annotation(x, annotation_header)), axis=1)
+def add_VEP_annotation_to_dataframe(annotation_header, canonical_transcripts, vcf_as_dataframe):
+    vcf_as_dataframe[annotation_header] = vcf_as_dataframe.apply(lambda x: pd.Series(extract_vep_annotation(x, annotation_header, canonical_transcripts)), axis=1)
     vcf_as_dataframe = vcf_as_dataframe.drop(columns=["CSQ"])
     vcf_as_dataframe = vcf_as_dataframe.rename(columns={"am_class": "ALPHA_MISSENSE_CLASS"})
     vcf_as_dataframe = vcf_as_dataframe.rename(columns={"am_pathogenicity": "ALPHA_MISSENSE_SCORE"})
@@ -645,6 +688,7 @@ def add_sample_information_to_dataframe(sample_ids, sample_header, vcf_as_datafr
                 sample_header_multi.append("DP." + sample_id)
                 sample_header_multi.append("ALT." + sample_id)
                 sample_header_multi.append(sample_id + ".full")
+
             vcf_as_dataframe[sample_header_multi] = vcf_as_dataframe.apply(lambda x: pd.Series(extract_sample_information(x, sample, sample_header)), axis=1)
 
         else:
@@ -658,11 +702,22 @@ def add_sample_information_to_dataframe(sample_ids, sample_header, vcf_as_datafr
     return vcf_as_dataframe
 
 
-def convert_vcf_to_pandas_dataframe(input_file, process_indel, expanded_indel, num_cores):
+def convert_vcf_to_pandas_dataframe(input_file, process_indel, expanded_indel, transcript_file_path, num_cores):
     header, vcf_as_dataframe = reformat_vcf_file_and_read_into_pandas_and_extract_header(input_file)
 
     logger.debug("Convert VCF file")
 
+    canonical_transcripts = []
+
+    if os.path.isfile(transcript_file_path):
+        with open(transcript_file_path, "r") as transcript_file:
+            for line in transcript_file:
+                if line.startswith("#"):
+                    continue
+
+                else:
+                    canonical_transcripts.append(line.strip())
+
     sample_ids = []
     # FORMAT column has index 8 (counted from 0) and sample columns follow afterwards (sample names are unique)
     # Check if FORMAT column exists
@@ -676,7 +731,7 @@ def convert_vcf_to_pandas_dataframe(input_file, process_indel, expanded_indel, n
 
     if not vcf_as_dataframe.empty:
         vcf_as_dataframe = parallelize_dataframe_processing(vcf_as_dataframe, partial(add_INFO_fields_to_dataframe, process_indel, expanded_indel), num_cores)
-        vcf_as_dataframe = parallelize_dataframe_processing(vcf_as_dataframe, partial(add_VEP_annotation_to_dataframe, annotation_header), num_cores)
+        vcf_as_dataframe = parallelize_dataframe_processing(vcf_as_dataframe, partial(add_VEP_annotation_to_dataframe, annotation_header, canonical_transcripts), num_cores)
 
         if len(vcf_as_dataframe.columns) > 8:
             if "FORMAT" in vcf_as_dataframe.columns:
@@ -728,6 +783,7 @@ def write_vcf_to_csv(vcf_as_dataframe, out_file):
     parser.add_argument("--out_data", type=str, dest="out_data", metavar="output.csv", required=True, help="CSV file containing the converted VCF file\n")
     parser.add_argument("--indel", action="store_true", required=False, help="Flag to indicate whether the file to convert consists of indel variants or not.\n")
     parser.add_argument("--expanded", action="store_true", required=False, help="Flag to indicate whether the file to convert consists of expanded indel variants.\n")
+    parser.add_argument("--transcript_ids", type=str, dest="transcript_ids", metavar="mane_v13_transcripts_ids.txt", required=True, help="File specifying the ensembl transcripts that should be used (e.g. MANE-SELECT).\n")
     parser.add_argument("--threads", type=int, dest="threads", metavar="1", required=False, help="Number of threads to use.")
     args = parser.parse_args()
 
@@ -737,5 +793,8 @@ def write_vcf_to_csv(vcf_as_dataframe, out_file):
     else:
         num_cores = 1
 
-    vcf_as_dataframe = convert_vcf_to_pandas_dataframe(args.in_data, args.indel, args.expanded, num_cores)
+    # TODO: add as parameter if it works
+    #transcript_file_path = "/mnt/storage2/users/ahboced1/phd_project/mane_grch38_v13_transcript_ids.txt"
+
+    vcf_as_dataframe = convert_vcf_to_pandas_dataframe(args.in_data, args.indel, args.expanded, args.transcript_ids, num_cores)
     write_vcf_to_csv(vcf_as_dataframe, args.out_data)
diff --git a/aidiva/helper_modules/create_result_vcf.py b/aidiva/helper_modules/create_result_vcf.py
index efe5c5f..a6812c9 100644
--- a/aidiva/helper_modules/create_result_vcf.py
+++ b/aidiva/helper_modules/create_result_vcf.py
@@ -10,13 +10,14 @@
 def write_header(out_file, assembly_build, single):
     out_file.write("##fileformat=VCFv4.2\n")
     if not single:
-        out_file.write("##INFO=<ID=AIDIVA,Number=5,Type=String,Description=\"AIdiva scores: AIdiva-score,AIdiva-final-score,AIdiva-hpo-relatedness,AIdiva-hpo-relatedness-interacting,AIdiva-filter. (AIdiva-score is the pathogenicity prediction from the random forest; AIdiva-final-score is the predction finalized with the given HPO terms; AIdiva-hpo-relatedness indicates how strong the currrent variant is associated with the given HPO terms; AIdiva-filter 0 or 1 wether all internal filters were passed or not)\">\n")
-        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE,Number=4,Type=String,Description=\"AIdiva inheritance flags: dominant,denovo,recessive,xlinked,compound. (Each value can be 0 or 1)\">\n")
-        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE_COMMENT,Number=1,Type=String,Description=\"AIdiva inheritance flags (dominant,denovo,recessive,xlinked,compound) in written form\">\n")
+        out_file.write("##INFO=<ID=AIDIVA,Number=5,Type=String,Description=\"aiDIVA scores: aiDIVA-score,aiDIVA-final-score,aiDIVA-hpo-relatedness,aiDIVA-hpo-relatedness-interacting,aiDIVA-filter. (aiDIVA-score is the pathogenicity prediction from the random forest; aiDIVA-final-score is the predction finalized with the given HPO terms; aiDIVA-hpo-relatedness indicates how strong the currrent variant is associated with the given HPO terms; aiDIVA-filter 0 or 1 wether all internal filters were passed or not)\">\n")
+        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE,Number=4,Type=String,Description=\"aiDIVA inheritance flags: dominant,denovo,recessive,xlinked,compound. (Each value can be 0 or 1)\">\n")
+        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE_COMMENT,Number=1,Type=String,Description=\"aiDIVA inheritance flags (dominant,denovo,recessive,xlinked,compound) in written form\">\n")
+
     else:
-        out_file.write("##INFO=<ID=AIDIVA,Number=5,Type=String,Description=\"AIdiva scores: AIdiva-score,AIdiva-final-score,AIdiva-hpo-relatedness,AIdiva-hpo-relatedness-interacting,AIdiva-filter. (AIdiva-score is the pathogenicity prediction from the random forest; AIdiva-final-score is the predction finalized with the given HPO terms; AIdiva-hpo-relatedness indicates how strong the currrent variant is associated with the given HPO terms; AIdiva-filter 0 or 1 wether all internal filters were passed or not)\">\n")
-        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE,Number=2,Type=String,Description=\"AIdiva inheritance flags: recessive,compound. (Each value can be 0 or 1)\">\n")
-        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE_COMMENT,Number=1,Type=String,Description=\"AIdiva inheritance flags (recessive,compound) in written form\">\n")
+        out_file.write("##INFO=<ID=AIDIVA,Number=5,Type=String,Description=\"aiDIVA scores: aiDIVA-score,aiDIVA-final-score,aiDIVA-hpo-relatedness,aiDIVA-hpo-relatedness-interacting,aiDIVA-filter. (aiDIVA-score is the pathogenicity prediction from the random forest; aiDIVA-final-score is the predction finalized with the given HPO terms; aiDIVA-hpo-relatedness indicates how strong the currrent variant is associated with the given HPO terms; aiDIVA-filter 0 or 1 wether all internal filters were passed or not)\">\n")
+        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE,Number=2,Type=String,Description=\"aiDIVA inheritance flags: recessive,compound. (Each value can be 0 or 1)\">\n")
+        out_file.write("##INFO=<ID=AIDIVA_INHERITANCE_COMMENT,Number=1,Type=String,Description=\"aiDIVA inheritance flags (recessive,compound) in written form\">\n")
 
     if assembly_build == "GRCh38":
         out_file.write("##reference=GRCh38.fa\n")
@@ -44,6 +45,7 @@ def write_header(out_file, assembly_build, single):
         out_file.write("##contig=<ID=chr22,length=50818468>\n")
         out_file.write("##contig=<ID=chrX,length=156040895>\n")
         out_file.write("##contig=<ID=chrY,length=57227415>\n")
+
     elif assembly_build == "GRCh37":
         out_file.write("##reference=GRCh37.fa\n")
         out_file.write("##contig=<ID=chr1,length=249250621>\n")
@@ -70,6 +72,7 @@ def write_header(out_file, assembly_build, single):
         out_file.write("##contig=<ID=chr22,length=51304566>\n")
         out_file.write("##contig=<ID=chrX,length=155270560>\n")
         out_file.write("##contig=<ID=chrY,length=59373566>\n")
+
     else:
         logger.error(f"Unsupported assembly build: {assembly_build}")
 
@@ -82,112 +85,136 @@ def write_result_vcf(input_data, vcf_file, assembly_build, single):
             input_data = input_data.sort_values(["CHROM", "POS"], ascending=[True, True])
             input_data = input_data.reset_index(drop=True)
             colnames = input_data.columns
-        
+
             write_header(out_file, assembly_build, single)
 
             for row in input_data.itertuples():
                 if ("AIDIVA_SCORE" in colnames):
                     if (str(row.AIDIVA_SCORE) == "nan"):
                         aidiva_score = "."
+
                     else:
                         aidiva_score = str(row.AIDIVA_SCORE)
+
                 else:
                     aidiva_score = "."
 
                 if ("FINAL_AIDIVA_SCORE" in colnames):
                     if (str(row.FINAL_AIDIVA_SCORE) == "nan"):
                         final_aidiva_score = "."
+
                     else:
                         final_aidiva_score = str(row.FINAL_AIDIVA_SCORE)
+
                 else:
                     final_aidiva_score = "."
 
                 if ("HPO_RELATEDNESS" in colnames):
                     if (str(row.HPO_RELATEDNESS) == "nan"):
                         hpo_relatedness = "."
+
                     else:
                         hpo_relatedness = str(row.HPO_RELATEDNESS)
+
                 else:
                     hpo_relatedness = "."
 
                 if ("HPO_RELATEDNESS_INTERACTING" in colnames):
                     if (str(row.HPO_RELATEDNESS_INTERACTING) == "nan"):
                         hpo_relatedness_interacting = "."
+
                     else:
                         hpo_relatedness_interacting = str(row.HPO_RELATEDNESS_INTERACTING)
+
                 else:
                     hpo_relatedness_interacting = "."
-                
+
                 if ("FILTER_PASSED" in colnames):
                     if (str(row.FILTER_PASSED) == "nan"):
                         filter_passed = "."
+
                     else:
                         filter_passed = str(row.FILTER_PASSED)
+
                 else:
                     filter_passed = "."
-                
+
                 if ("DOMINANT" in colnames):
                     if (str(row.DOMINANT) == "nan"):
                         dominant = "."
+
                     else:
                         dominant = str(row.DOMINANT)
+
                 else:
                     dominant = "."
-                
+
                 if ("DOMINANT_DENOVO" in colnames):
                     if (str(row.DOMINANT_DENOVO) == "nan"):
                         dominant_denovo = "."
+
                     else:
                         dominant_denovo = str(row.DOMINANT_DENOVO)
+
                 else:
                     dominant_denovo = "."
-                
+
                 if ("RECESSIVE" in colnames):
                     if (str(row.RECESSIVE) == "nan"):
                         recessive = "."
+
                     else:
                         recessive = str(row.RECESSIVE)
+
                 else:
                     recessive = "."
-                
+
                 if ("XLINKED" in colnames):
                     if (str(row.XLINKED) == "nan"):
                         xlinked = "."
+
                     else:
                         xlinked = str(row.XLINKED)
+
                 else:
                     xlinked = "."
-                
+
                 if ("COMPOUND" in colnames):
                     if (str(row.COMPOUND) == "nan"):
                         compound = "."
+
                     else:
                         compound = str(row.COMPOUND)
+
                 else:
                     compound = "."
-                
+
                 if ("INHERITANCE" in colnames):
                     if (str(row.INHERITANCE) == "nan") or (str(row.INHERITANCE) == ""):
                         inheritance_comment = "."
+
                     else:
                         inheritance_comment = str(row.INHERITANCE)
+
                 else:
                     inheritance_comment = "."
 
                 if not single:
                     info_entry = "AIDIVA=" + aidiva_score + "," + final_aidiva_score + "," + hpo_relatedness + "," + hpo_relatedness_interacting + "," + filter_passed + ";AIDIVA_INHERITANCE=" + dominant + "," + dominant_denovo + "," + recessive + "," + xlinked + "," + compound + ";AIDIVA_INHERITANCE_COMMENT=" + inheritance_comment
+
                 else:
                     info_entry = "AIDIVA=" + aidiva_score + "," + final_aidiva_score + "," + hpo_relatedness + "," + hpo_relatedness_interacting + "," + filter_passed + ";AIDIVA_INHERITANCE=" + recessive + "," + compound + ";AIDIVA_INHERITANCE_COMMENT=" + inheritance_comment
 
                 out_file.write(str(row.CHROM).strip() + "\t" + str(row.POS) + "\t" + "." + "\t" + str(row.REF) + "\t" + str(row.ALT) + "\t" + "." + "\t" + "." + "\t" + info_entry + "\n")
+
         else:
             write_header(out_file, assembly_build, single)
-            logger.warn(f"Wrote an empty result file!")
+            logger.warning(f"Wrote an empty result file!")
 
 if __name__=="__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--in", type=str, dest="in_data", metavar="in.csv", required=True, help="CSV file with the results from the AIdiva run that should be converted to a VCF file\n")
-    parser.add_argument("--out", type=str, dest="out_data", metavar="out.vcf", required=True, help="VCF file witht the results of the AIdiva run\n")
+    parser.add_argument("--in", type=str, dest="in_data", metavar="in.csv", required=True, help="CSV file with the results from the aiDIVA run that should be converted to a VCF file\n")
+    parser.add_argument("--out", type=str, dest="out_data", metavar="out.vcf", required=True, help="VCF file witht the results of the aiDIVA run\n")
     parser.add_argument("--not_single", action="store_false", required=False, help="Flag to indicate whether the file is single sample or not.\n")
     args = parser.parse_args()
 
diff --git a/aidiva/helper_modules/filter_vcf.py b/aidiva/helper_modules/filter_vcf.py
index dd4a703..49616cb 100644
--- a/aidiva/helper_modules/filter_vcf.py
+++ b/aidiva/helper_modules/filter_vcf.py
@@ -31,8 +31,10 @@
 def filter_coding_variants(filepath, filepath_out, annotation_field_name):
     if filepath.endswith(".gz"):
         vcf_file_to_reformat = gzip.open(filepath, "rt")
+
     else:
         vcf_file_to_reformat = open(filepath, "r")
+
     outfile = open(filepath_out, "w")
     consequence_index = 0
 
@@ -43,32 +45,40 @@ def filter_coding_variants(filepath, filepath_out, annotation_field_name):
 
     # extract header from vcf file
     for line in tmp:
-        splitted_line = line.split("\t")
+        splitted_line = line.replace("\n", "").split("\t")
         if line.strip().startswith("##"):
             if line.strip().startswith("##fileformat="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##filedate="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##source="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##reference="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##contig="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##FORMAT="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##FILTER="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##ANALYSISTYPE="):
                 outfile.write(line)
                 continue
+
             elif line.strip().startswith("##SAMPLE="):
                 outfile.write(line)
                 continue
@@ -79,7 +89,9 @@ def filter_coding_variants(filepath, filepath_out, annotation_field_name):
                     if annotation_header[i] == "Consequence":
                         consequence_index = i
                         break
+
                 continue
+
             continue
 
         if line.strip().startswith("#CHROM"):
diff --git a/aidiva/helper_modules/split_vcf_in_indel_and_snp_set.py b/aidiva/helper_modules/split_vcf_in_indel_and_snp_set.py
index 5d90c28..2730fb8 100644
--- a/aidiva/helper_modules/split_vcf_in_indel_and_snp_set.py
+++ b/aidiva/helper_modules/split_vcf_in_indel_and_snp_set.py
@@ -12,8 +12,10 @@
 def split_vcf_file_in_indel_and_snps_set(filepath, filepath_snp, filepath_indel):
     if filepath.endswith(".gz"):
         vcf_file_to_reformat = gzip.open(filepath, "rt")
+
     else:
         vcf_file_to_reformat = open(filepath, "r")
+
     outfile_snps = open(filepath_snp, "w")
     outfile_indel = open(filepath_indel, "w")
 
@@ -30,7 +32,8 @@ def split_vcf_file_in_indel_and_snps_set(filepath, filepath_snp, filepath_indel)
             outfile_snps.write(line)
             outfile_indel.write(line)
             continue
-        if line.strip().startswith("#CHROM"):
+
+        elif line.strip().startswith("#CHROM"):
             outfile_snps.write(line)
             outfile_indel.write(line)
             continue
@@ -43,19 +46,24 @@ def split_vcf_file_in_indel_and_snps_set(filepath, filepath_snp, filepath_indel)
         if "," in splitted_line[4]:
             logger.debug(f"Processed variant was removed, too many alternative alleles reported (ALT={splitted_line[4]})!")
             continue
+
         else:
             ref_length = len(splitted_line[3])
             alt_length = max([len(alt) for alt in splitted_line[4].split(",")])
 
             if (ref_length == 1) and (alt_length == 1):
                 outfile_snps.write(line)
+
             elif (ref_length > 1 and alt_length == 1) or (alt_length > 1 and ref_length == 1):
                 indel_ID += 1
                 if splitted_line[7].endswith("\n"):
                     splitted_line[7] = splitted_line[7].replace("\n", "") + ";INDEL_ID=" + str(indel_ID) + "\n"
+
                 else:
                     splitted_line[7] = splitted_line[7].replace("\n", "") + ";INDEL_ID=" + str(indel_ID)
+
                 outfile_indel.write("\t".join(splitted_line))
+
             else:
                 logger.critical("Something bad happened!")
 
diff --git a/aidiva/run_aiDIVA.py b/aidiva/run_aiDIVA.py
index 1762ad9..11aa19c 100644
--- a/aidiva/run_aiDIVA.py
+++ b/aidiva/run_aiDIVA.py
@@ -13,21 +13,22 @@
 
 
 if __name__=="__main__":
-    parser = argparse.ArgumentParser(description = "aiDIVA -- augmented intelligence-based Disease Variant Analysis")
+    parser = argparse.ArgumentParser(description = "AIdiva -- Augmented Intelligence Disease Variant Analysis")
     parser.add_argument("--snp_vcf", type=str, dest="snp_vcf", metavar="snp.vcf", required=True, help="VCF file with the annotated SNP variants [required]")
     parser.add_argument("--indel_vcf", type=str, dest="indel_vcf", metavar="indel.vcf", required=True, help="VCF file with the annotated (only basic annotation) InDel variants [required]")
     parser.add_argument("--expanded_indel_vcf", type=str, dest="expanded_indel_vcf", metavar="expanded_indel.vcf", required=True, help="VCF file with the annotated expanded InDel variants [required]")
     parser.add_argument("--out_prefix", type=str, dest="out_prefix", metavar="/output_path/aidiva_result", required=True, help="Prefix that is used to save the results [required]")
-    parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for AIdiva [required]")
+    parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for aiDIVA [required]")
     parser.add_argument("--workdir", type=str, dest="workdir", metavar="/tmp/aidiva_workdir/", required=False, help="Path to the working directory, here all intermediate files are saved (if not specified a temporary folder will be created and used)")
     parser.add_argument("--hpo_list", type=str, dest="hpo_list", metavar="hpo.txt", required=False, help="TXT file containing the HPO terms reported for the current patient")
     parser.add_argument("--gene_exclusion", type=str, dest="gene_exclusion", metavar="gene_exclusion.txt", required=False, help="Tab separated file containing the genes to exclude in the analysis. Genes are assumed to be in the first column.")
     parser.add_argument("--family_file", type=str, dest="family_file", metavar="family.txt", required=False, help="TXT file showing the sample relations of the current data")
     parser.add_argument("--family_type", type=str, dest="family_type", metavar="SINGLE", required=False, help="In case of multisample data the kind of sample relation [SINGLE, TRIO, MULTI]")
     parser.add_argument("--skip_db_check", dest="skip_db_check", action="store_true", required=False, help="Flag to skip database (ClinVar, HGMD) lookup")
-    parser.add_argument("--only_top_results", dest="only_top_results", action="store_true", required=False, help="Report only the top 25 variants as result")
+    parser.add_argument("--only_top_results", dest="only_top_results", action="store_true", required=False, help="Report only the top ranking variants as result. The desired rank can be given as parameter with '--top_rank' (default: 25)")
+    parser.add_argument("--top_rank", type=str, dest="top_rank", metavar="25", required=False, help="Rank parameter for '--only_top_results' (default: 25)")
     parser.add_argument("--threads", type=int, dest="threads", metavar="1", required=False, help="Number of threads to use (default: 1)")
-    parser.add_argument("--log_file", type=str, dest="log_file", metavar="/tmp/aidiva_workdir/sample_id_aidiva.log", required=False, help="Custom log file, if not specified the log file is saved in the working directory")
+    parser.add_argument("--log_file", type=str, dest="log_file", metavar="/output_path/logs/aidiva_log.txt", required=False, help="Path plus name of the log file to be saved, if not specified the log file is saved in the working directory")
     parser.add_argument("--log_level", type=str, dest="log_level", metavar="INFO", required=False, help="Define logging level, if unsure just leave the default [DEBUG, INFO] (default: INFO)")
     parser.add_argument("--save_as_vcf", dest="save_as_vcf", action="store_true", required=False, help="Flag to create additional result files in VCF format.")
     args = parser.parse_args()
@@ -37,6 +38,7 @@
         snp_vcf = args.snp_vcf
         indel_vcf = args.indel_vcf
         expanded_indel_vcf = args.expanded_indel_vcf
+
     else:
         raise SystemExit("The input VCF files were not specified!")
 
@@ -44,17 +46,21 @@
     if args.config is not None:
         try:
             config_file = open(args.config, "r")
+
         except OSError:
             raise SystemExit("The given config file could not be opened!")
+
         else:
             configuration = yaml.load(config_file, Loader=yaml.SafeLoader)
             config_file.close()
+
     else:
         raise SystemExit("The parameter 'config' was not specified!")
 
     # parse output files
     if args.out_prefix is not None:
         output_filename = args.out_prefix
+
     else:
         raise SystemExit("The parameter 'out_prefix' was not specified!")
 
@@ -63,6 +69,7 @@
 
         if not working_directory.endswith("/"):
             working_directory = working_directory + "/"
+
     else:
         workdir = tempfile.TemporaryDirectory(suffix="", prefix="aidiva_workdir_", dir=None)
         working_directory = workdir.name
@@ -73,17 +80,20 @@
     # parse disease and inheritance information
     if args.hpo_list is not None:
         hpo_file = args.hpo_list
+
     else:
         hpo_file = None
 
     if args.gene_exclusion is not None:
         gene_exclusion_file = args.gene_exclusion
+
     else:
         gene_exclusion_file = None
 
     if (args.family_file is not None) and (args.family_type is not None):
         family_file = args.family_file
         family_type = args.family_type
+
     else:
         family_file = None
         family_type = "SINGLE"
@@ -91,44 +101,57 @@
     skip_db_check = args.skip_db_check
     only_top_results = args.only_top_results
     save_as_vcf = args.save_as_vcf
-    
+
     if args.threads is not None:
         num_cores = int(args.threads)
+
     else:
         num_cores = 1
-    
+
+    if args.top_rank is not None:
+        top_rank = int(args.top_rank)
+
+    else:
+        top_rank = 25
+
     # use log level INFO as default
     if args.log_level is not None:
         if args.log_level == "DEBUG":
             log_level = logging.DEBUG
             log_format = "%(asctime)s -- %(name)s - %(levelname)s - %(message)s"
+
         elif args.log_level == "INFO":
             log_level = logging.INFO
             log_format = "%(asctime)s -- %(levelname)s - %(message)s"
-        #elif args.log_level == "WARN":
-        #    log_level = logging.WARN
+
+        #elif args.log_level == "WARNING":
+        #    log_level = logging.WARNING
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         #elif args.log_level == "ERROR":
         #    log_level = logging.ERROR
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         #elif args.log_level == "CRITICAL":
         #    log_level = logging.CRITICAL
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         else:
             log_level = logging.INFO
             log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
     else:
         log_level = logging.INFO
         log_format = "%(asctime)s -- %(levelname)s - %(message)s"
 
     if args.log_file is not None:
         log_file = args.log_file
+
     else:
         timestamp = time.strftime("%Y%m%d-%H%M%S")
-        log_file = str(working_directory + "/" + "aidiva_" + timestamp + ".log")
+        log_file = str(working_directory + "/" + "aidiva_" + timestamp + ".txt")
 
     # set up logger
-    
     logging.basicConfig(filename=log_file,
                             filemode="a",
                             format=log_format,
@@ -136,16 +159,10 @@
                             level=log_level)
     logger = logging.getLogger()
 
-    logger.info("Running aiDIVA on already annotated data")
+    logger.info("Running aiDIVA on annotated data")
     logger.info("Start program")
     logger.info(f"Working directory: {working_directory}")
 
-    # load SNP ML model
-    #scoring_model_snp = configuration["Analysis-Input"]["scoring-model-snp"]
-    
-    # load InDel ML model
-    #scoring_model_indel = configuration["Analysis-Input"]["scoring-model-indel"]
-
     # load ML model
     scoring_model = configuration["Analysis-Input"]["scoring-model"]
 
@@ -157,43 +174,50 @@
     assembly_build = configuration["Assembly-Build"]
     ref_path = configuration["Analysis-Input"]["ref-path"]
 
+    transcript_path = configuration["Canonical-Transcripts"]
+
     # convert splitted input data to vcf and annotate
     if snp_vcf is not None:
-        input_data_snp = convert_vcf.convert_vcf_to_pandas_dataframe(snp_vcf, False, False, num_cores)
+        input_data_snp = convert_vcf.convert_vcf_to_pandas_dataframe(snp_vcf, False, False, transcript_path, num_cores)
+
     else:
         input_data_snp = pd.DataFrame()
 
     if indel_vcf is not None and expanded_indel_vcf is not None:
-        input_data_indel = convert_vcf.convert_vcf_to_pandas_dataframe(indel_vcf, True, False, num_cores)
-        input_data_expanded_indel = convert_vcf.convert_vcf_to_pandas_dataframe(expanded_indel_vcf, True, True, num_cores)
+        input_data_indel = convert_vcf.convert_vcf_to_pandas_dataframe(indel_vcf, True, False, transcript_path, num_cores)
+        input_data_expanded_indel = convert_vcf.convert_vcf_to_pandas_dataframe(expanded_indel_vcf, True, True, transcript_path, num_cores)
+
     else:
         input_data_indel = pd.DataFrame()
         input_data_expanded_indel = pd.DataFrame()
 
+    #logger.debug(f"Condition-Check: {input_data_snp.dropna(how='all').empty}, {input_data_indel.dropna(how='all').empty}, {input_data_expanded_indel.dropna(how='all').empty}")
+    #logger.debug(f"Condition: {(not input_data_snp.dropna(how='all').empty) or ((not input_data_indel.dropna(how='all').empty) and (not input_data_expanded_indel.dropna(how='all').empty))}")
+
     if (not input_data_snp.dropna(how='all').empty) or ((not input_data_indel.dropna(how='all').empty) and (not input_data_expanded_indel.dropna(how='all').empty)):
         if ((not input_data_indel.empty) and (not input_data_expanded_indel.empty)):
             logger.info("Combine InDel variants ...")
             input_data_combined_indel = combine_expanded_indels.parallelized_indel_combination(input_data_indel, input_data_expanded_indel, feature_list, num_cores)
-        
+
         else:
-            logger.warn("No InDel variants given move on to SNP processing!")
+            logger.warning("No InDel variants given move on to SNV processing!")
             input_data_combined_indel = pd.DataFrame()
 
         # predict pathogenicity score
         logger.info("Score variants ...")
-        
+
         if not input_data_snp.dropna(how='all').empty:
             predicted_data_snp = predict.perform_pathogenicity_score_prediction(scoring_model, input_data_snp, allele_frequency_list, feature_list, num_cores)
-        
+
         else:
-            logger.warn("No SNP variants, skip SNP prediction!")
+            logger.warning("No SNV variants, skip SNV prediction!")
             predicted_data_snp = pd.DataFrame()
 
         if not input_data_combined_indel.dropna(how='all').empty:
             predicted_data_indel = predict.perform_pathogenicity_score_prediction(scoring_model, input_data_combined_indel, allele_frequency_list, feature_list, num_cores)
-        
+
         else:
-            logger.warn("No InDel variants, skip InDel prediction!")
+            logger.warning("No InDel variants, skip InDel prediction!")
             predicted_data_indel = pd.DataFrame()
 
         if (not predicted_data_snp.dropna(how='all').empty) and (not predicted_data_indel.dropna(how='all').empty):
@@ -214,10 +238,12 @@
         # prioritize and filter variants
         logger.info("Prioritize variants and finalize score ...")
         prioritized_data = prio.prioritize_variants(predicted_data, internal_parameter_dict, ref_path, num_cores, assembly_build, feature_list, skip_db_check, family_file, family_type, hpo_file, gene_exclusion_file)
+        prioritized_data["AIDIVA_RANK"] = prioritized_data["FINAL_AIDIVA_SCORE"].rank(method='min', ascending=False)
 
+        ## TODO: create additional output files according to the inheritance information (only filtered data)
         if only_top_results:
-            prioritized_data[prioritized_data["FILTER_PASSED"] == 1].head(n=25).to_csv(str(output_filename + "_aidiva_result_filtered.tsv"), sep="\t", index=False)
-            logger.info("Only 25 best variants are reported as result!")
+            prioritized_data[prioritized_data["FILTER_PASSED"] == 1 & prioritized_data["AIDIVA_RANK"] <= top_rank].to_csv(str(output_filename + "_aidiva_result_filtered.tsv"), sep="\t", index=False)
+            logger.info(f"Only top ranking variants are reported as result (rank: {top_rank})!")
 
         else:
             if save_as_vcf:
@@ -242,4 +268,4 @@
             write_result.write_result_vcf(None, str(output_filename + "_aidiva_result.vcf"), assembly_build, bool(family_type == "SINGLE"))
             write_result.write_result_vcf(None, str(output_filename + "_aidiva_result_filtered.vcf"), assembly_build, bool(family_type == "SINGLE"))
 
-        logger.warn("The given input files were empty!")
+        logger.warning("The given input files were empty!")
diff --git a/aidiva/run_annotation.py b/aidiva/run_annotation.py
index 685572b..e3ba96f 100644
--- a/aidiva/run_annotation.py
+++ b/aidiva/run_annotation.py
@@ -1,9 +1,12 @@
 import argparse
+import helper_modules.combine_expanded_indels_and_create_csv as combine_expanded_indels
 import helper_modules.convert_indels_to_snps_and_create_vcf as expand_indels_and_create_vcf
+import helper_modules.convert_vcf_to_csv as convert_vcf
 import helper_modules.filter_vcf as filt_vcf
 import helper_modules.split_vcf_in_indel_and_snp_set as split_vcf
 import logging
 import os
+import pandas as pd
 import tempfile
 import time
 import variant_annotation.annotate_with_vep as annotate
@@ -11,58 +14,67 @@
 
 
 if __name__=="__main__":
-    parser = argparse.ArgumentParser(description = "aiDIVA -- augmented intelligence-based Disease Variant Analysis")
+    parser = argparse.ArgumentParser(description = "AIdiva -- Augmented Intelligence Disease Variant Analysis")
     parser.add_argument("--vcf", type=str, dest="vcf", metavar="input.vcf(.gz)", required=True, help="VCF file with the variants to annotate [required]")
-    parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for AIdiva [required]")
+    parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for aiDIVA [required]")
     parser.add_argument("--out_folder", type=str, dest="out_folder", metavar="/output_path/aidiva_result", required=True, help="Prefix that is used to save the annotated files [required]")
     parser.add_argument("--filtered", dest="filtered", action="store_true", required=False, help="Flag indicating that the filtered files already exist in the result folder (skips the prefiltering step to save time)")
     parser.add_argument("--filtered_folder", type=str, dest="filtered_folder", metavar="/output_path/aidiva_filtered", required=False, help="Path to the prefiltered input VCF files")
     parser.add_argument("--inhouse_sample", dest="inhouse_sample", action="store_true", required=False, help="Flag to indicate that we are annotating an inhouse sample (skips leftNormalize since it is already performed)")
+    parser.add_argument("--output_table", dest="output_table", action="store_true", required=False, help="Flag to indicate that we want to save the annotation additionally as a TSV file")
     parser.add_argument("--threads", type=int, dest="threads", metavar="1", required=False, help="Number of threads to use. (default: 1)")
-    parser.add_argument("--log_file", type=str, dest="log_file", metavar="/tmp/aidiva_workdir/sample_id_aidiva.log", required=False, help="Custom log file, if not specified the log file is saved in the working directory")
+    parser.add_argument("--log_file", type=str, dest="log_file", metavar="/output_path/logs/aidiva_log.txt", required=False, help="Path plus name of the log file to be saved, if not specified the log file is saved in the working directory")
     parser.add_argument("--log_level", type=str, dest="log_level", metavar="INFO", required=False, help="Define logging level, if unsure just leave the default [DEBUG, INFO] (default: INFO)")
     args = parser.parse_args()
-    
+
     workdir = tempfile.TemporaryDirectory(suffix="", prefix="aidiva_workdir_", dir=None)
     working_directory = workdir.name
 
     if not working_directory.endswith("/"):
         working_directory = working_directory + "/"
-    
+
     inhouse_sample = args.inhouse_sample
+    is_filtered = args.filtered
+    output_table = args.output_table
 
     # use log level INFO as default
     if args.log_level is not None:
         if args.log_level == "DEBUG":
             log_level = logging.DEBUG
             log_format = "%(asctime)s -- %(name)s - %(levelname)s - %(message)s"
+
         elif args.log_level == "INFO":
             log_level = logging.INFO
             log_format = "%(asctime)s -- %(levelname)s - %(message)s"
-        #elif args.log_level == "WARN":
-        #    log_level = logging.WARN
+
+        #elif args.log_level == "WARNING":
+        #    log_level = logging.WARNING
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         #elif args.log_level == "ERROR":
         #    log_level = logging.ERROR
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         #elif args.log_level == "CRITICAL":
         #    log_level = logging.CRITICAL
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         else:
             log_level = logging.INFO
             log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
     else:
         log_level = logging.INFO
         log_format = "%(asctime)s -- %(levelname)s - %(message)s"
-    
+
     if args.log_file is not None:
         log_file = args.log_file
+
     else:
         timestamp = time.strftime("%Y%m%d-%H%M%S")
-        log_file = str(working_directory + "/" + "aidiva_" + timestamp + ".log")
+        log_file = str(working_directory + "/" + "aidiva_" + timestamp + ".txt")
 
     # set up logger
-    
     logging.basicConfig(filename=log_file,
                             filemode="a",
                             format=log_format,
@@ -76,6 +88,7 @@
     # parse input files
     if os.path.isfile(args.vcf):
         input_vcf = args.vcf
+
     else:
         raise SystemExit("The given input file could not be opened!")
 
@@ -83,17 +96,21 @@
     if args.config is not None:
         try:
             config_file = open(args.config, "r")
+
         except OSError:
             raise SystemExit("The given config file could not be opened!")
+
         else:
             configuration = yaml.load(config_file, Loader=yaml.SafeLoader)
             config_file.close()
+
     else:
         raise SystemExit("The parameter 'config' was not specified!")
 
     # parse output files
     if args.out_folder is not None:
         output_folder = args.out_folder
+
     else:
         raise SystemExit("The parameter 'out_prefix' was not specified!")
 
@@ -101,6 +118,7 @@
 
     if args.filtered and args.filtered_folder is not None:
         filtered_folder = args.filtered_folder
+
     else:
         logger.info("Pre-filtering will be performed and filtered files are saved to the output folder.")
         filtered_folder = output_folder
@@ -108,6 +126,7 @@
     # obtain number of threads to use during computation
     if args.threads is not None:
         num_cores = int(args.threads)
+
     else:
         num_cores = 1
 
@@ -117,6 +136,10 @@
 
     ref_path = configuration["Analysis-Input"]["ref-path"]
 
+    feature_list = configuration["Model-Features"]["feature-list"]
+
+    transcript_path = configuration["Canonical-Transcripts"]
+
     # convert splitted input data to vcf and annotate
     input_file = os.path.splitext(input_vcf)[0]
     input_filename = os.path.basename(input_file)
@@ -124,7 +147,7 @@
 
     logger.info("Starting VCF preparation...")
     # sorting and filtering step to remove unsupported variants
-    if not args.filtered:
+    if not is_filtered:
         annotate.left_normalize_and_sort_vcf(input_vcf, str(working_directory + "/" + input_filename + "_sorted.vcf"), annotation_dict, ref_path, inhouse_sample)
         annotate.annotate_consequence_information(str(working_directory + "/" + input_filename + "_sorted.vcf"), str(working_directory + "/" + input_filename + "_consequence.vcf"), annotation_dict, assembly_build, num_cores)
         filt_vcf.filter_coding_variants(str(working_directory + "/" + input_filename + "_consequence.vcf"), str(filtered_folder + "/" + input_filename + "_filtered.vcf"), "CONS")
@@ -157,3 +180,16 @@
     annotate.filter_regions(str(working_directory + "/" + input_filename + "_snp_vep_annotated_bed_bw.vcf"), str(output_folder + "/" + input_filename + "_snp_annotated.vcf"), annotation_dict)
     annotate.filter_regions(str(working_directory + "/" + input_filename + "_indel_vep_annotated_bed.vcf"), str(output_folder + "/" + input_filename + "_indel_annotated.vcf"), annotation_dict)
 
+    if output_table:
+        # Convert annotated VCF files to pandas dataframes
+        input_data_snp_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(output_folder + "/" + input_filename + "_snp_annotated.vcf"), False, False, transcript_path, num_cores)
+        input_data_indel_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(output_folder + "/" + input_filename + "_indel_annotated.vcf"), True, False, transcript_path, num_cores)
+        input_data_indel_expanded_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(output_folder + "/" + input_filename + "_indelExpanded_annotated.vcf"), True, True, transcript_path, num_cores)
+
+        # Combine InDel variants
+        input_data_indel_combined_annotated = combine_expanded_indels.parallelized_indel_combination(input_data_indel_annotated, input_data_indel_expanded_annotated, feature_list, num_cores)
+
+        # Create combined annotated CSV file
+        input_data_combined = pd.concat([input_data_snp_annotated, input_data_indel_combined_annotated]).sort_values(["#CHROM", "POS"], ascending=[True, True])
+        input_data_combined.to_csv(str(output_folder + "/" + input_filename + "_annotated.csv"), index=False, sep="\t")
+
diff --git a/aidiva/run_annotation_and_aiDIVA.py b/aidiva/run_annotation_and_aiDIVA.py
index 2ad77f5..147d5ad 100644
--- a/aidiva/run_annotation_and_aiDIVA.py
+++ b/aidiva/run_annotation_and_aiDIVA.py
@@ -17,9 +17,9 @@
 
 
 if __name__=="__main__":
-    parser = argparse.ArgumentParser(description = "aiDIVA -- augmented intelligence-based Disease Variant Analysis")
+    parser = argparse.ArgumentParser(description = "AIdiva -- Augmented Intelligence Disease Variant Analysis")
     parser.add_argument("--vcf", type=str, dest="vcf", metavar="input.vcf(.gz)", required=True, help="VCF file with the variants to analyze [required]")
-    parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for AIdiva [required]")
+    parser.add_argument("--config", type=str, dest="config", metavar="config.yaml", required=True, help="Config file specifying the parameters for aiDIVA [required]")
     parser.add_argument("--out_prefix", type=str, dest="out_prefix", metavar="/output_path/aidiva_result", required=True, help="Prefix that is used to save the results [required]")
     parser.add_argument("--workdir", type=str, dest="workdir", metavar="/tmp/aidiva_workdir/", required=False, help="Path to the working directory, here all intermediate files are saved (if not specified a temporary folder will be created and used)")
     parser.add_argument("--hpo_list", type=str, dest="hpo_list", metavar="hpo.txt", required=False, help="TXT file containing the HPO terms reported for the current patient")
@@ -27,10 +27,11 @@
     parser.add_argument("--family_file", type=str, dest="family_file", metavar="family.txt", required=False, help="TXT file showing the family relation of the current patient")
     parser.add_argument("--family_type", type=str, dest="family_type", metavar="SINGLE", required=False, help="String indicating the present family type [SINGLE, TRIO]")
     parser.add_argument("--skip_db_check", dest="skip_db_check", action="store_true", required=False, help="Flag to skip DB lookup of variants")
-    parser.add_argument("--only_top_results", dest="only_top_results", action="store_true", required=False, help="Report only the top 25 variants as result")
+    parser.add_argument("--only_top_results", dest="only_top_results", action="store_true", required=False, help="Report only the top ranking variants as result. The desired rank can be given as parameter with '--top_rank' (default: 25)")
+    parser.add_argument("--top_rank", type=str, dest="top_rank", metavar="25", required=False, help="Rank parameter for '--only_top_results' (default: 25)")
     parser.add_argument("--inhouse_sample", dest="inhouse_sample", action="store_true", required=False, help="Flag to indicate that we are annotating an inhouse sample (skips leftNormalize since it is already performed)")
     parser.add_argument("--threads", type=int, dest="threads", metavar="1", required=False, help="Number of threads to use. (default: 1)")
-    parser.add_argument("--log_file", type=str, dest="log_file", metavar="/tmp/aidiva_workdir/sample_id_aidiva.log", required=False, help="Custom log file, if not specified the log file is saved in the working directory")
+    parser.add_argument("--log_file", type=str, dest="log_file", metavar="/output_path/logs/aidiva_log.txt", required=False, help="Path plus name of the log file to be saved, if not specified the log file is saved in the working directory")
     parser.add_argument("--log_level", type=str, dest="log_level", metavar="INFO", required=False, help="Define logging level, if unsure just leave the default [DEBUG, INFO] (default: INFO)")
     parser.add_argument("--save_as_vcf", dest="save_as_vcf", action="store_true", required=False, help="Flag to create additional result files in VCF format.")
     args = parser.parse_args()
@@ -38,6 +39,7 @@
     # parse input files
     if os.path.isfile(args.vcf):
         input_vcf = args.vcf
+
     else:
         raise SystemExit("The given input file could not be opened!")
 
@@ -45,17 +47,21 @@
     if args.config is not None:
         try:
             config_file = open(args.config, "r")
+
         except OSError:
             raise SystemExit("The given config file could not be opened!")
+
         else:
             configuration = yaml.load(config_file, Loader=yaml.SafeLoader)
             config_file.close()
+
     else:
         raise SystemExit("The parameter 'config' was not specified!")
 
     # parse output files
     if args.out_prefix is not None:
         output_filename = args.out_prefix
+
     else:
         raise SystemExit("The parameter 'out_prefix' was not specified!")
 
@@ -64,6 +70,7 @@
 
         if not working_directory.endswith("/"):
             working_directory = working_directory + "/"
+
     else:
         workdir = tempfile.TemporaryDirectory(suffix="", prefix="aidiva_workdir_", dir=None)
         working_directory = workdir.name
@@ -74,11 +81,13 @@
     # parse disease and inheritance information
     if args.hpo_list is not None:
         hpo_file = args.hpo_list
+
     else:
         hpo_file = None
 
     if args.gene_exclusion is not None:
         gene_exclusion_file = args.gene_exclusion
+
     else:
         gene_exclusion_file = None
 
@@ -87,15 +96,15 @@
 
         if args.family_type is not None:
             family_type = args.family_type
+
         else:
             family_type = "SINGLE"
-    
+
     else:
         family_file = None
         family_type = "SINGLE"
 
     save_as_vcf = args.save_as_vcf
-    
     skip_db_check = args.skip_db_check
     only_top_results = args.only_top_results
     inhouse_sample = args.inhouse_sample
@@ -103,41 +112,54 @@
     # obtain number of threads to use during computation
     if args.threads is not None:
         num_cores = int(args.threads)
+
     else:
         num_cores = 1
 
+    if args.top_rank is not None:
+        top_rank = int(args.top_rank)
+
+    else:
+        top_rank = 25
+
     # use log level INFO as default
     if args.log_level is not None:
         if args.log_level == "DEBUG":
             log_level = logging.DEBUG
             log_format = "%(asctime)s -- %(name)s - %(levelname)s - %(message)s"
+
         elif args.log_level == "INFO":
             log_level = logging.INFO
             log_format = "%(asctime)s -- %(levelname)s - %(message)s"
-        #elif args.log_level == "WARN":
-        #    log_level = logging.WARN
+
+        #elif args.log_level == "WARNING":
+        #    log_level = logging.WARNING
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         #elif args.log_level == "ERROR":
         #    log_level = logging.ERROR
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         #elif args.log_level == "CRITICAL":
         #    log_level = logging.CRITICAL
         #    log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
         else:
             log_level = logging.INFO
             log_format = "%(asctime)s -- %(levelname)s - %(message)s"
+
     else:
         log_level = logging.INFO
         log_format = "%(asctime)s -- %(levelname)s - %(message)s"
-    
+
     if args.log_file is not None:
         log_file = args.log_file
+
     else:
         timestamp = time.strftime("%Y%m%d-%H%M%S")
-        log_file = str(working_directory + "/" + "aidiva_" + timestamp + ".log")
+        log_file = str(working_directory + "/" + "aidiva_" + timestamp + ".txt")
 
     # set up logger
-    
     logging.basicConfig(filename=log_file,
                             filemode="a",
                             format=log_format,
@@ -145,16 +167,10 @@
                             level=log_level)
     logger = logging.getLogger()
 
-    logger.info("Running annotation and aiDIVA")
+    logger.info("Running annotation and AIdiva")
     logger.info("Start program")
     logger.info(f"Working directory: {working_directory}")
 
-    # load SNP ML model
-    #scoring_model_snp = configuration["Analysis-Input"]["scoring-model-snp"]
-    
-    # load InDel ML model
-    #scoring_model_indel = configuration["Analysis-Input"]["scoring-model-indel"]
-
     # load ML model
     scoring_model = configuration["Analysis-Input"]["scoring-model"]
 
@@ -168,6 +184,8 @@
 
     ref_path = configuration["Analysis-Input"]["ref-path"]
 
+    transcript_path = configuration["Canonical-Transcripts"]
+
     # convert splitted input data to vcf and annotate
     input_file = os.path.splitext(input_vcf)[0]
     input_filename = os.path.basename(input_file)
@@ -208,9 +226,9 @@
     annotate.filter_regions(str(working_directory + "/" + input_filename + "_indel_vep_annotated_bed.vcf"), str(working_directory + "/" + input_filename + "_indel_vep_annotated_bed_filtered.vcf"), annotation_dict)
 
     # convert annotated vcfs back to pandas dataframes
-    input_data_snp_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(working_directory + "/" + input_filename + "_snp_vep_annotated_bed_bw_filtered.vcf"), False, False, num_cores)
-    input_data_indel_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(working_directory + "/" + input_filename + "_indel_vep_annotated_bed_filtered.vcf"), True, False, num_cores)
-    input_data_indel_expanded_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(working_directory + "/" + input_filename + "_indel_expanded_vep_annotated_bw.vcf"), True, True, num_cores)
+    input_data_snp_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(working_directory + "/" + input_filename + "_snp_vep_annotated_bed_bw_filtered.vcf"), False, False, transcript_path, num_cores)
+    input_data_indel_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(working_directory + "/" + input_filename + "_indel_vep_annotated_bed_filtered.vcf"), True, False, transcript_path, num_cores)
+    input_data_indel_expanded_annotated = convert_vcf.convert_vcf_to_pandas_dataframe(str(working_directory + "/" + input_filename + "_indel_expanded_vep_annotated_bw.vcf"), True, True, transcript_path, num_cores)
 
     if (not input_data_snp_annotated.dropna(how='all').empty) or ((not input_data_indel_annotated.dropna(how='all').empty) and (not input_data_indel_expanded_annotated.dropna(how='all').empty)):
         if ((not input_data_indel_annotated.empty) and (not input_data_indel_expanded_annotated.empty)):
@@ -219,7 +237,7 @@
             input_data_indel_combined_annotated = combine_expanded_indels.parallelized_indel_combination(input_data_indel_annotated, input_data_indel_expanded_annotated, feature_list, num_cores)
 
         else:
-            logger.warn("No InDel variants given move on to SNP processing!")
+            logger.warning("No InDel variants given move on to SNV processing!")
             input_data_indel_combined_annotated = pd.DataFrame()
 
         # predict pathogenicity score
@@ -229,14 +247,14 @@
             predicted_data_snp = predict.perform_pathogenicity_score_prediction(scoring_model, input_data_snp_annotated, allele_frequency_list, feature_list, num_cores)
 
         else:
-            logger.warn("No SNP variants, skip SNP prediction!")
+            logger.warning("No SNV variants, skip SNV prediction!")
             predicted_data_snp = pd.DataFrame()
-        
+
         if not input_data_indel_combined_annotated.dropna(how='all').empty:
             predicted_data_indel = predict.perform_pathogenicity_score_prediction(scoring_model, input_data_indel_combined_annotated, allele_frequency_list, feature_list, num_cores)
 
         else:
-            logger.warn("No InDel variants, skip InDel prediction!")
+            logger.warning("No InDel variants, skip InDel prediction!")
             predicted_data_indel = pd.DataFrame()
 
         if (not predicted_data_snp.dropna(how='all').empty) and (not predicted_data_indel.dropna(how='all').empty):
@@ -247,7 +265,7 @@
 
         elif (predicted_data_snp.dropna(how='all').empty) and (not predicted_data_indel.dropna(how='all').empty):
             predicted_data = predicted_data_indel
-        
+
         elif (predicted_data_indel.dropna(how='all').empty) and (not predicted_data_snp.dropna(how='all').empty):
             predicted_data = predicted_data_snp
 
@@ -257,16 +275,19 @@
         # prioritize and filter variants
         logger.info("Filter variants and finalize score...")
         prioritized_data = prio.prioritize_variants(predicted_data, internal_parameter_dict, ref_path, num_cores, assembly_build, feature_list, skip_db_check, family_file, family_type, hpo_file, gene_exclusion_file)
+        prioritized_data["AIDIVA_RANK"] = prioritized_data["FINAL_AIDIVA_SCORE"].rank(method='min', ascending=False)
 
+        ## TODO: create additional output files according to the inheritance information (only filtered data)
         if only_top_results:
-            prioritized_data[prioritized_data["FILTER_PASSED"] == 1].head(n=25).to_csv(str(output_filename + "_aidiva_result_filtered.tsv"), sep="\t", index=False)
-            logger.info("Only 25 best variants are reported as result!")
+            prioritized_data[prioritized_data["FILTER_PASSED"] == 1 & prioritized_data["AIDIVA_RANK"] <= top_rank].to_csv(str(output_filename + "_aidiva_result_filtered.tsv"), sep="\t", index=False)
+            logger.info(f"Only top ranking variants are reported as result (rank: {top_rank})!")
 
         else:
             if save_as_vcf:
                 write_result.write_result_vcf(prioritized_data, str(output_filename + "_aidiva_result.vcf"), assembly_build, bool(family_type == "SINGLE"))
                 write_result.write_result_vcf(prioritized_data[prioritized_data["FILTER_PASSED"] == 1], str(output_filename + "_aidiva_result_filtered.vcf"), assembly_build, bool(family_type == "SINGLE"))
 
+            #prioritized_data = prioritized_data.rename(columns={"CHROM": "#CHROM"}) # not needed anymore
             prioritized_data.to_csv(str(output_filename + "_aidiva_result.tsv"), sep="\t", index=False)
             prioritized_data[prioritized_data["FILTER_PASSED"] == 1].to_csv(str(output_filename + "_aidiva_result_filtered.tsv"), sep="\t", index=False)
 
@@ -284,4 +305,4 @@
             write_result.write_result_vcf(None, str(output_filename + "_aidiva_result.vcf"), assembly_build, bool(family_type == "SINGLE"))
             write_result.write_result_vcf(None, str(output_filename + "_aidiva_result_filtered.vcf"), assembly_build, bool(family_type == "SINGLE"))
 
-        logger.warn("The given input files were empty!")
+        logger.warning("The given input files were empty!")
diff --git a/aidiva/variant_annotation/annotate_with_vep.py b/aidiva/variant_annotation/annotate_with_vep.py
index fae03b3..223600e 100644
--- a/aidiva/variant_annotation/annotate_with_vep.py
+++ b/aidiva/variant_annotation/annotate_with_vep.py
@@ -16,46 +16,54 @@ def call_vep_and_annotate_vcf(input_vcf_file, output_vcf_file, vep_annotation_di
 
     # set the correct paths to the needed perl modules
     if "PERL5LIB" in os.environ:
-        #os.environ["PERL5LIB"] = vep_annotation_dict["vep"] + "/" + "Bio/:" + vep_annotation_dict["vep"] + "/" + "cpan/lib/perl5/:" + os.environ["PERL5LIB"]
-        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:/mnt/storage2/share/opt/perl_cpan_ubuntu20.04/lib/perl5/:{vep_annotation_dict['vep-plugin-path']}:{os.environ['PERL5LIB']}"
+        os.environ["PERL5LIB"] = f"{os.environ['PERL5LIB']}:{vep_annotation_dict['vep']}/Bio/:{vep_annotation_dict['vep-plugin-path']}:{vep_annotation_dict['vep-cpan']}"
+
     else:
-        #os.environ["PERL5LIB"] = vep_annotation_dict["vep"] + "/" + "Bio/:" + vep_annotation_dict["vep"] + "/" + "cpan/lib/perl5/"
-        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:/mnt/storage2/share/opt/perl_cpan_ubuntu20.04/lib/perl5/:{vep_annotation_dict['vep-plugin-path']}"
+        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:{vep_annotation_dict['vep-plugin-path']}:{vep_annotation_dict['vep-cpan']}"
 
     cache_path = vep_annotation_dict['vep-cache'] + "/"
+    plugin_path = vep_annotation_dict['vep-plugin-path'] + "/"
 
     # add essential parameters
     vep_command = f"{vep_command} --species homo_sapiens --assembly {build} "
     vep_command = f"{vep_command} --offline"
     vep_command = f"{vep_command} --cache"
     vep_command = f"{vep_command} --dir_cache {cache_path}"
+    vep_command = f"{vep_command} --dir_plugins {plugin_path}"
     vep_command = f"{vep_command} --gencode_basic"
     vep_command = f"{vep_command} --symbol"
     vep_command = f"{vep_command} --biotype"
     vep_command = f"{vep_command} --variant_class"
 
-    if not expanded:
-        pass # deprecated
+    #if not expanded:
+    #    pass 
+        # deprecated -> to remove
         # allele frequencies to include
         #vep_command = f"{vep_command} --af"
         #vep_command = f"{vep_command} --af_1kg"
         #vep_command = f"{vep_command} --af_esp"
-        if build == "GRCh37":
-            vep_command = f"{vep_command} --af_gnomad"
-        elif build == "GRCh38":
-            vep_command = f"{vep_command} --af_gnomadg"
+        #if build == "GRCh37":
+        #    vep_command = f"{vep_command} --af_gnomad"
+        #elif build == "GRCh38":
+        #    vep_command = f"{vep_command} --af_gnomadg"
 
     # vep plugins to use
     if not basic:
         vep_command = f"{vep_command} --sift s"
         vep_command = f"{vep_command} --polyphen s"
 
-        #vep_command = f"{vep_command} --plugin EVE,file={vep_annotation_dict['plugin-files']['EVE']}"
         vep_command = f"{vep_command} --plugin AlphaMissense,file={vep_annotation_dict['plugin-files']['AlphaMissense']}"
 
     vep_command = f"{vep_command} -i " + input_vcf_file + " "
     vep_command = f"{vep_command} -o " + output_vcf_file + " "
-    vep_command = f"{vep_command} --fork " + str(num_cores) + " "
+
+    # the developers of VEP recommend to use 4 threads
+    if num_cores >= 4:
+        vep_command = f"{vep_command} --fork 4 "
+
+    elif num_cores == 2:
+        vep_command = f"{vep_command} --fork 2 "
+
     vep_command = f"{vep_command} --format vcf" + " " # we need this to prevent vep from not working if the VCF file has no variant entries
     vep_command = f"{vep_command} --vcf" + " "
     vep_command = f"{vep_command} --no_stats" + " "
@@ -72,11 +80,10 @@ def annotate_consequence_information(input_vcf_file, output_vcf_file, vep_annota
 
     # set the correct paths to the needed perl modules
     if "PERL5LIB" in os.environ:
-        #os.environ["PERL5LIB"] = vep_annotation_dict["vep"] + "/" + "Bio/:" + vep_annotation_dict["vep"] + "/" + "cpan/lib/perl5/:" + os.environ["PERL5LIB"]
-        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:/mnt/storage2/share/opt/perl_cpan_ubuntu20.04/lib/perl5/:{vep_annotation_dict['vep-plugin-path']}:{os.environ['PERL5LIB']}"
+        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:{vep_annotation_dict['vep-cpan']}:{vep_annotation_dict['vep-plugin-path']}:{os.environ['PERL5LIB']}"
+
     else:
-        #os.environ["PERL5LIB"] = vep_annotation_dict["vep"] + "/" + "Bio/:" + vep_annotation_dict["vep"] + "/" + "cpan/lib/perl5/"
-        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:/mnt/storage2/share/opt/perl_cpan_ubuntu20.04/lib/perl5/:{vep_annotation_dict['vep-plugin-path']}"
+        os.environ["PERL5LIB"] = f"{vep_annotation_dict['vep']}/Bio/:{vep_annotation_dict['vep-cpan']}:{vep_annotation_dict['vep-plugin-path']}"
 
     cache_path = vep_annotation_dict['vep-cache'] + "/"
 
@@ -91,7 +98,14 @@ def annotate_consequence_information(input_vcf_file, output_vcf_file, vep_annota
     vep_command = f"{vep_command} -o {output_vcf_file}"
     vep_command = f"{vep_command} --fields Consequence"
     vep_command = f"{vep_command} --vcf_info_field CONS"
-    vep_command = f"{vep_command} --fork {num_cores}"
+
+    # the developers of VEP recommend to use 4 threads
+    if num_cores >= 4:
+        vep_command = f"{vep_command} --fork 4 "
+
+    elif num_cores == 2:
+        vep_command = f"{vep_command} --fork 2 "
+
     vep_command = f"{vep_command} --vcf"
     vep_command = f"{vep_command} --no_stats"
     vep_command = f"{vep_command} --force_overwrite"
@@ -112,9 +126,6 @@ def annotate_from_vcf(input_vcf_file, output_vcf_file, annotation_dict, expanded
             tmp.write(f"{vcf_annotation['EIGEN_PHRED']}\t\tEIGEN_PHRED\t\ttrue\n".encode())
             tmp.write(f"{vcf_annotation['FATHMM_XF']}\t\tFATHMM_XF\t\ttrue\n".encode())
             tmp.write(f"{vcf_annotation['MutationAssessor']}\t\tMutationAssessor\t\ttrue\n".encode())
-            # add allele frequencies to gnomAD annotation (instead of annotation from VEP)
-            #tmp.write(f"{vcf_annotation['gnomAD']}\tgnomAD\tAN,Hom\t\ttrue\n".encode())
-            #tmp.write(f"{vcf_annotation['gnomAD']}\tgnomAD\tAN,Hom,AFR_AF,AMR_AF,EAS_AF,NFE_AF,SAS_AF\t\ttrue\n".encode())
             tmp.write(f"{vcf_annotation['CAPICE']}\t\tCAPICE\t\ttrue\n".encode())
             tmp.write(f"{vcf_annotation['dbscSNV']}\t\tADA=ADA_SCORE,RF=RF_SCORE\t\ttrue\n".encode())
             tmp.write(f"{vcf_annotation['CADD']}\t\tCADD\t\ttrue\n".encode())
@@ -129,7 +140,7 @@ def annotate_from_vcf(input_vcf_file, output_vcf_file, annotation_dict, expanded
                 tmp.write(f"{vcf_annotation['hgmd']}\tHGMD\tCLASS,RANKSCORE\t\ttrue\n".encode())
 
             else:
-                logger.warn("HGMD file is not found! Skip HGMD annotation!")
+                logger.warning("HGMD file is not found! Skip HGMD annotation!")
 
             # switch between SNV and InDel file
             if basic:
@@ -157,17 +168,25 @@ def annotate_from_bed(input_vcf_file, output_vcf_file, annotation_dict, num_core
         tmp_segDup = tempfile.NamedTemporaryFile(mode="w+b", suffix="_segDup.vcf", delete=False)
         tmp_simpleRepeat = tempfile.NamedTemporaryFile(mode="w+b", suffix="_simpleRepeat.vcf", delete=False)
         tmp_oe_lof = tempfile.NamedTemporaryFile(mode="w+b", suffix="_oe_lof.vcf", delete=False)
+        #tmp_oe_mis = tempfile.NamedTemporaryFile(mode="w+b", suffix="_oe_mis.vcf", delete=False) ## currently not used
+        #tmp_oe_syn = tempfile.NamedTemporaryFile(mode="w+b", suffix="_oe_syn.vcf", delete=False) ## currently not used
         tmp_repeatmasker = tempfile.NamedTemporaryFile(mode="w+b", suffix="_repeatmasker.vcf", delete=False)
 
         # close temporary files to make them accessible
         tmp_segDup.close()
         tmp_simpleRepeat.close()
         tmp_oe_lof.close()
+        #tmp_oe_mis.close() ## currently not used
+        #tmp_oe_syn.close() ## currently not used
         tmp_repeatmasker.close()
 
         subprocess.run(f"{command} -bed {bed_annotation['segmentDuplication']} -name SegDup -sep '&' -in {input_vcf_file} -out {tmp_segDup.name} -threads {num_cores}", shell=True, check=True)
         subprocess.run(f"{command} -bed {bed_annotation['simpleRepeat']} -name SimpleRepeats -sep '&' -in {tmp_segDup.name} -out {tmp_simpleRepeat.name} -threads {num_cores}", shell=True, check=True)
         subprocess.run(f"{command} -bed {bed_annotation['oe_lof']} -name oe_lof -sep '&' -in {tmp_simpleRepeat.name} -out {tmp_oe_lof.name} -threads {num_cores}", shell=True, check=True)
+        
+        ## currently not used
+        #subprocess.run(f"{command} -bed {bed_annotation['oe_mis']} -name oe_mis -sep '&' -in {tmp_oe_lof.name} -out {tmp_oe_mis.name} -threads {num_cores}", shell=True, check=True)
+        #subprocess.run(f"{command} -bed {bed_annotation['oe_syn']} -name oe_syn -sep '&' -in {tmp_oe_mis.name} -out {tmp_oe_syn.name} -threads {num_cores}", shell=True, check=True)
 
         if os.path.isfile(f"{bed_annotation['omim']}"):
             subprocess.run(f"{command} -bed {bed_annotation['repeatMasker']} -name REPEATMASKER -sep '&' -in {tmp_oe_lof.name} -out {tmp_repeatmasker.name} -threads {num_cores}", shell=True, check=True)
@@ -182,6 +201,8 @@ def annotate_from_bed(input_vcf_file, output_vcf_file, annotation_dict, num_core
         os.remove(tmp_segDup.name)
         os.remove(tmp_simpleRepeat.name)
         os.remove(tmp_oe_lof.name)
+        #os.remove(tmp_oe_mis.name) ## currently not used
+        #os.remove(tmp_oe_syn.name) ## currently not used
         os.remove(tmp_repeatmasker.name)
 
 
@@ -287,7 +308,7 @@ def left_normalize_and_sort_vcf(input_vcf_file, output_vcf_file, vep_annotation_
         tmp_bigwig_annot = tempfile.NamedTemporaryFile(mode="w+b", suffix="_bigwigAnnot.vcf", delete=False)
 
         # perform annotations
-        left_normalize_and_sort_vcf(input_vcf_file, tmp_sorted.name, annotation_dict, reference, inhouse_sample)
+        left_normalize_and_sort_vcf(input_vcf_file, tmp_sorted.name, annotation_dict, args.reference, inhouse_sample)
         call_vep_and_annotate_vcf(tmp_sorted.name, tmp_vep_annot.name, annotation_dict, assembly_build, basic_annotation, expanded_annotation, num_threads)
         annotate_from_vcf(tmp_vep_annot.name, tmp_vcf_annot.name, annotation_dict, expanded_annotation, basic_annotation, num_threads)
 
diff --git a/aidiva/variant_prioritization/get_HPO_similarity_score.py b/aidiva/variant_prioritization/get_HPO_similarity_score.py
index 43baf40..a5b3358 100644
--- a/aidiva/variant_prioritization/get_HPO_similarity_score.py
+++ b/aidiva/variant_prioritization/get_HPO_similarity_score.py
@@ -1,4 +1,3 @@
-
 import numpy as np
 import logging
 
@@ -9,7 +8,7 @@
 def find_mica_IC(HPO_term_a, HPO_term_b, ic_per_nodes, node_ancestor_mapping):
     hpo_nodes_shared_ancestors = node_ancestor_mapping[HPO_term_a].intersection(node_ancestor_mapping[HPO_term_b])
     mica_ic = max([float(ic_per_nodes[node]) for node in hpo_nodes_shared_ancestors], default=0.0)
-    
+
     return mica_ic
 
 
@@ -17,19 +16,20 @@ def compute_similarity_between_nodes(hpo_term_a, hpo_term_b, ic_per_nodes, node_
     node_a_ic = float(ic_per_nodes[hpo_term_a])
     node_b_ic = float(ic_per_nodes[hpo_term_b])
     mica_ic = find_mica_IC(hpo_term_a, hpo_term_b, ic_per_nodes, node_ancestor_mapping)
-    
+
     if similarity_measure == "Lin":
         if mica_ic == 0.0 or (node_a_ic == 0.0 and node_b_ic == 0.0):
             nodes_similarity = 0.0
+
         else:
             nodes_similarity = (2.0 * mica_ic) / (node_a_ic + node_b_ic)
-        
+
     elif similarity_measure == "Resnik":
         nodes_similarity = mica_ic
-        
+
     elif similarity_measure == "Jiang-Conrath":
         nodes_similarity = (1 - (node_a_ic + node_b_ic - 2.0 * mica_ic))
-        
+
     elif similarity_measure == "Relevance":
         #nodes_similarity = ((2.0 * mica_ic) / (node_a_ic + node_b_ic)) * (1 - p(mica))
         pass
@@ -42,69 +42,78 @@ def compute_similarity_between_nodes(hpo_term_a, hpo_term_b, ic_per_nodes, node_
 
     elif similarity_measure == "Wang":
         pass
-        
+
     else:
         logger.error("An error occured while determining the similarity measure!")
-        
+
     return nodes_similarity
 
 
 def calculate_hpo_set_similarity(hpo_graph, hpo_term_set_a, hpo_term_set_b, ic_per_nodes, node_ancestor_mapping, hpo_replacement_information):
     checked_term_set_a = []
     checked_term_set_b = []
-    
+
     # alternatives and/or considerations are ignored for now
     alternatives = hpo_replacement_information["alternatives"]
     considerations = hpo_replacement_information["considerations"]
     replacements = hpo_replacement_information["replacements"]
-    
+
     for term_a in hpo_term_set_a:
         if term_a not in hpo_graph:
             if term_a in replacements.keys():
                 checked_term_set_a.append(replacements[term_a])
                 logger.debug(f"{term_a} (sample) not in HPO graph! Replacement ({replacements[term_a]}) found will use this term instead!")
+
             elif term_a in alternatives.keys():
                 #checked_term_set_a.extend(alternatives[term_a])
                 logger.debug(f"{term_a} (sample) not in HPO graph! Alternatives ({alternatives[term_a]}) found! HPO term will be skipped!")
+
             elif term_a in considerations.keys():
                 #checked_term_set_a.extend(considerations[term_a])
                 logger.debug(f"{term_a} (sample) not in HPO graph! Considerations ({considerations[term_a]}) found! HPO term will be skipped!")
+
             else:
                 logger.debug(f"{term_a} (sample) not in HPO graph! HPO term will be skipped!")
+
         else:
             checked_term_set_a.append(term_a)
-    
+
     for term_b in hpo_term_set_b:
         if term_b not in hpo_graph:
             if term_b in replacements.keys():
                 checked_term_set_b.append(replacements[term_b])
                 logger.debug(f"{term_b} (gene) not in HPO graph! Replacement ({replacements[term_b]}) found will use this term instead!")
+
             elif term_b in considerations.keys():
                 #checked_term_set_b.extend(considerations[term_b])
                 logger.debug(f"{term_b} (gene) not in HPO graph! Considerations ({considerations[term_b]}) found! HPO term will be skipped!")
+
             elif term_b in alternatives.keys():
                 #checked_term_set_b.extend(alternatives[term_b])
                 logger.debug(f"{term_b} (gene) not in HPO graph! Alternatives ({alternatives[term_b]}) found! HPO term will be skipped!")
+
             else:
                 logger.debug(f"{term_b} (gene) not in HPO graph! HPO term will be skipped!")
+
         else:
             checked_term_set_b.append(term_b)
-    
+
     if checked_term_set_a and checked_term_set_b:
         similarities_a_to_b = [max([compute_similarity_between_nodes(term_a, term_b, ic_per_nodes, node_ancestor_mapping) for term_b in checked_term_set_b], default=0.0) for term_a in checked_term_set_a]
         #similarities_b_to_a = [max([compute_similarity_between_nodes(term_b, term_a, ic_per_nodes, node_ancestor_mapping) for term_a in checked_term_set_a], default=0.0) for term_b in checked_term_set_b]
-        
+
         set_a_to_b_similarity = np.median(similarities_a_to_b)
         #set_b_to_a_similarity = np.median(similarities_b_to_a)
 
         #if set_a_to_b_similarity != 0.0 or set_b_to_a_similarity != 0.0:
         #    hpo_set_similarity = (set_a_to_b_similarity + set_b_to_a_similarity) / 2
+
         #else:
         #    hpo_set_similarity = 0.0
-        
+
         return set_a_to_b_similarity
 
     else:
         logger.debug(f"Sample HPO set ({hpo_term_set_a}) and/or Gene HPO set ({hpo_term_set_b}) was empty after checking if the terms are part of the used HPO graph! Maybe no supported term was in the set! Similarity set to 0.0!")
-        
+
         return 0.0
diff --git a/aidiva/variant_prioritization/prioritize_variants.py b/aidiva/variant_prioritization/prioritize_variants.py
index 34008db..31925a0 100644
--- a/aidiva/variant_prioritization/prioritize_variants.py
+++ b/aidiva/variant_prioritization/prioritize_variants.py
@@ -126,7 +126,7 @@ def parse_ped_file(family_file):
 
         else:
             logger.error("The specified family file %s is not a valid file" % (family_file))
-            logger.warn("Inheritance assessment will be skipped!")
+            logger.warning("Inheritance assessment will be skipped!")
 
     return family_dict
 
@@ -153,7 +153,7 @@ def parse_hpo_list(hpo_list_file):
             #logger.error("The specified HPO list %s is not a valid file" % (hpo_list_file))
 
     else:
-        logger.warn("HPO score finalization will be skipped!")
+        logger.warning("HPO score finalization will be skipped!")
     
     return list(hpo_query)
 
@@ -176,7 +176,7 @@ def parse_gene_list(gene_exclusion_file):
 
         else:
             logger.error("The specified gene exclusion list %s is not a valid file" % (gene_exclusion_file))
-            logger.warn("No genes will be excluded during filtering!")
+            logger.warning("No genes will be excluded during filtering!")
 
     return genes2exclude
 
@@ -283,6 +283,7 @@ def parallelized_variant_processing(skip_db_check, transcript_dict, family, fami
 
     if genotype_column:
         variant_data = check_inheritance(variant_data, family_type, family)
+
     else:
         logger.info(f"Skip inheritance check!")
 
@@ -315,6 +316,7 @@ def investigate_transcript_cds_position(variant, transcript_dict):
 
         if "-" in str(variant["CDS_position"]):
             cds_start = variant["CDS_position"].split("-")[0]
+
         else:
             cds_start = variant["CDS_position"]
 
@@ -327,14 +329,19 @@ def investigate_transcript_cds_position(variant, transcript_dict):
 
                     if 0.95 <= start_percentage < 0.96:
                         adjusted_score = float(predicted_score) - 0.01
+
                     elif 0.96 <= start_percentage < 0.97:
                         adjusted_score = float(predicted_score) - 0.02
+
                     elif 0.97 <= start_percentage < 0.98:
                         adjusted_score = float(predicted_score) - 0.03
+
                     elif 0.98 <= start_percentage < 0.99:
                         adjusted_score = float(predicted_score) - 0.04
+
                     elif 0.99 <= start_percentage <= 1:
                         adjusted_score = float(predicted_score) - 0.05
+
                     else:
                         adjusted_score = predicted_score
 
@@ -365,11 +372,14 @@ def check_databases_for_pathogenicity_classification(variant):
     clinvar_classification = str(variant["CLINVAR_DETAILS"]).split("%")[0]
     if "(replaced" in clinvar_classification:
         clinvar_classification = clinvar_classification.split("(replaced")[0]
+
     elif "/" in clinvar_classification:
         if clinvar_classification.lower() == "benign/likely_benign" or clinvar_classification.lower() == "likely_benign/benign":
             clinvar_classification  = "likely_benign"
+
         elif clinvar_classification.lower() == "pathogenic/likely_pathogenic" or clinvar_classification.lower() == "likely_pathogenic/pathogenic":
             clinvar_classification = "likely_pathogenic"
+
         else:
             logger.debug(f"Found unknown ClinVar classification {clinvar_classification}")
 
@@ -377,33 +387,44 @@ def check_databases_for_pathogenicity_classification(variant):
 
     if clinvar_classification.lower() == "benign":
         clinvar_score = 0.0
+
     elif clinvar_classification.lower() == "likely_benign":
         clinvar_score = 0.25
+
     elif clinvar_classification.lower() == "uncertain_significance":
         clinvar_score = 0.5
+
     elif clinvar_classification.lower() == "likely_pathogenic":
         clinvar_score = 0.75
+
     elif clinvar_classification.lower() == "pathogenic":
         clinvar_score = 1.0
+
     else:
         # ignore missing database scores (if both scores missing use unmodified AIDIVA_SCORE)
         clinvar_score = np.nan
-    
+
     if hgmd_classification == "DM":
         hgmd_score = 1.0
         #if float(variant["HGMD_RANKSCORE"]) > 0.5:
         #    hgmd_score = 1.0
+
     elif hgmd_classification == "DM?":
         hgmd_score = 0.75
+
     #elif hgmd_classification == "DP":
     #    hgmd_score = 0.5
+
     #elif hgmd_classification == "FP":
     #    hgmd_score = 0.5
+
     #elif hgmd_classification == "DFP":
     #    hgmd_score = 0.5
+
     # completely ignore removed entries database entries
     elif hgmd_classification == "R":
         hgmd_score = np.nan
+
     else:
         # ignore missing database scores (if both scores missing use unmodified AIDIVA_SCORE)
         hgmd_score = np.nan
@@ -411,24 +432,29 @@ def check_databases_for_pathogenicity_classification(variant):
 
     # log a warning message if hgmd score and clinvar_score are both not missing but differ
     if (hgmd_score != clinvar_score) and not (np.isnan(hgmd_score) or np.isnan(clinvar_score)):
-        logger.warn(f"The scores of the used databases (ClinVar, HGMD) are different: {clinvar_score}, {hgmd_score} (Variant: {variant['#CHROM']}:{variant['POS']}_{variant['REF']}_{variant['ALT']}) \n If the HGMD database was used during the annotation you may want to further investigate this matter, otherwise you can ignore this warning since we just use the predicted value as default value if the entry is missing.")
+        logger.warning(f"The scores of the used databases (ClinVar, HGMD) are different: {clinvar_score}, {hgmd_score} (Variant: {variant['#CHROM']}:{variant['POS']}_{variant['REF']}_{variant['ALT']}) \n If the HGMD database was used during the annotation you may want to further investigate this matter, otherwise you can ignore this warning since we just use the predicted value as default value if the entry is missing.")
 
     if np.isnan(clinvar_score) and not np.isnan(hgmd_score):
         database_score = hgmd_score
+
     elif np.isnan(hgmd_score) and not np.isnan(clinvar_score):
         database_score = clinvar_score
+
     elif not np.isnan(hgmd_score) and not np.isnan(clinvar_score):
         database_score = (hgmd_score + clinvar_score) / 2
+
     else:
         database_score = np.nan
-    
+
     if not np.isnan(float(variant["AIDIVA_SCORE"])):
         if not np.isnan(database_score):
             # update the predicted score with known information from the databases
             improved_score = 0.8 * float(variant["AIDIVA_SCORE"]) + 0.2 * database_score
+
         else:
             # use unmodified AIDIVA_SCORE if no database entries were found
             improved_score = float(variant["AIDIVA_SCORE"])
+
     else:
         improved_score = np.nan
 
@@ -437,7 +463,6 @@ def check_databases_for_pathogenicity_classification(variant):
 
 def compute_hpo_relatedness_and_final_score(variant, genes2exclude, gene_2_HPO, hgnc_2_gene, gene_2_interacting, HPO_graph, HPO_query, ic_per_nodes, node_ancestor_mapping, hpo_replacement_information):
     if HPO_query:
-        #if np.isnan(variant["AIDIVA_SCORE"]) and ((str(variant["rf_score"]) == "nan") or (str(variant["rf_score"]) == "")) and ((str(variant["ada_score"]) == "nan") or (str(variant["ada_score"]) == "")):
         if np.isnan(variant["AIDIVA_SCORE"]) and ((str(variant["SpliceAI"]) == "nan") or (str(variant["SpliceAI"]) == "")):
             final_score = np.nan
             hpo_relatedness = np.nan
@@ -445,22 +470,13 @@ def compute_hpo_relatedness_and_final_score(variant, genes2exclude, gene_2_HPO,
 
         else:
             if np.isnan(variant["AIDIVA_SCORE"]):
-                # if both scores are present use maximum to benefit of the strengths of each model
-                #if ((str(variant["rf_score"]) != "nan") and (str(variant["rf_score"]) != "")) and ((str(variant["ada_score"]) != "nan") and (str(variant["ada_score"]) != "")):
-                #    pathogenictiy_prediction = max([float(variant["rf_score"]), float(variant["ada_score"])], default=np.nan)
-                #elif ((str(variant["rf_score"]) != "") and (str(variant["rf_score"]) != "nan")) and ((str(variant["ada_score"]) == "") or (str(variant["ada_score"]) == "nan")):
-                #    pathogenictiy_prediction = float(variant["rf_score"])
-                #elif ((str(variant["ada_score"]) != "") and (str(variant["ada_score"]) != "nan")) and ((str(variant["rf_score"]) == "") or (str(variant["rf_score"]) == "nan")):
-                #    pathogenictiy_prediction = float(variant["ada_score"])
-                #else:
-                #    pathogenictiy_prediction = np.nan
-                
                 if ((str(variant["SpliceAI"]) != "nan") and (str(variant["SpliceAI"]) != "")):
                     pathogenictiy_prediction = float(variant["SpliceAI"])
+
                 else:
                     pathogenictiy_prediction = np.nan
-                
-                logger.debug("Using SpliceAI prediction instead of AIdiva prediction for splicing variant!")
+
+                logger.debug("Using SpliceAI prediction instead of aiDIVA prediction for splicing variant!")
 
             else:
                 pathogenictiy_prediction = float(variant["AIDIVA_SCORE"])
@@ -473,6 +489,7 @@ def compute_hpo_relatedness_and_final_score(variant, genes2exclude, gene_2_HPO,
 
             if variant_gene in gene_2_interacting.keys():
                 interacting_genes = [gene_interaction["interacting_gene"] for gene_interaction in gene_2_interacting[variant_gene]]
+
             else:
                 interacting_genes = []
 
@@ -523,26 +540,12 @@ def compute_hpo_relatedness_and_final_score(variant, genes2exclude, gene_2_HPO,
 
     else:
         if np.isnan(variant["AIDIVA_SCORE"]):
-            #if ((str(variant["rf_score"]) == "") or (str(variant["rf_score"]) == "nan")) and ((str(variant["ada_score"]) == "") or (str(variant["ada_score"]) == "nan")):
             if ((str(variant["SpliceAI"]) == "") or (str(variant["SpliceAI"]) == "nan")):
                 pathogenictiy_prediction = np.nan
 
             else:
-                #if (str(variant["rf_score"]) != "") and (str(variant["rf_score"]) != "nan"):
-                #    rf_score = float(variant["rf_score"])
-
-                #else:
-                #    rf_score = np.nan
-                
-                #if (str(variant["ada_score"]) != "") and (str(variant["ada_score"]) != "nan"):
-                #    ada_score = float(variant["ada_score"])
-
-                #else:
-                #    ada_score = np.nan
-
-                #pathogenictiy_prediction = max([rf_score, ada_score], default=np.nan)
                 pathogenictiy_prediction = float(variant["SpliceAI"])
-                logger.debug("Using SpliceAI prediction instead of AIdiva prediction for splicing variant!")
+                logger.debug("Using SpliceAI prediction instead of aiDIVA prediction for splicing variant!")
 
         else:
             pathogenictiy_prediction = float(variant["AIDIVA_SCORE"])
@@ -564,6 +567,7 @@ def check_inheritance(variant_data, family_type="SINGLE", family=None):
 
         for group in variant_data_grouped:
             check_compound_single(group, variant_columns)
+
         variant_data = pd.concat(variant_data_grouped)
 
     elif family_type == "TRIO":
@@ -585,19 +589,23 @@ def check_inheritance(variant_data, family_type="SINGLE", family=None):
             for name in family.keys():
                 if family[name] == 1:
                     affected_child = name
+
                 elif family[name] == 0:
                     if not parent_1:
                         parent_1 = name
                         continue
+
                     elif not parent_2:
                         parent_2 = name
                         continue
+
                     else:
                         logger.error("There was a problem processing the loaded PED file")
 
             if  affected_child and parent_1 and parent_2:
                 for group in variant_data_grouped:
                     check_compound(group, affected_child, parent_1, parent_2)
+
             else:
                 logger.error(f"Something went wrong while checking the TRIO for compound heterozygosity!")
 
@@ -605,8 +613,8 @@ def check_inheritance(variant_data, family_type="SINGLE", family=None):
 
         else:
             logger.error("If family type (TRIO) is used a proper PED file defining the family relations is required!")
-            logger.warn("Inheritance assessment will be skipped!")
-        
+            logger.warning("Inheritance assessment will be skipped!")
+
     elif (family_type == "MULTI") and (family is not None):
         if family is not None:
             variant_data["DOMINANT"] = variant_data.apply(lambda variant: check_dominant(variant, family), axis=1)
@@ -615,11 +623,11 @@ def check_inheritance(variant_data, family_type="SINGLE", family=None):
 
         else:
             logger.error("If family type (MULTI) is used a proper PED file defining the family relations (if known) is required!")
-            logger.warn("Inheritance assessment will be skipped!")
-        
+            logger.warning("Inheritance assessment will be skipped!")
+
     else:
         logger.error("Unsupported family type (%s) was given!" % family_type)
-        logger.warn("Inheritance assessment will be skipped!")
+        logger.warning("Inheritance assessment will be skipped!")
 
     variant_columns = variant_data.columns
     variant_data["INHERITANCE"] = variant_data.apply(lambda variant: add_inheritance_mode(variant, variant_columns), axis=1)
@@ -710,7 +718,8 @@ def homopolymer_filter(sequence):
 
 
 def check_filters(variant, genes2exclude, HPO_query, reference):
-    variant_genes = re.sub("\(.*?\)", "", str(variant["SYMBOL"]))
+    #variant_genes = re.sub("\(.*?\)", "", str(variant["SYMBOL"]))
+    variant_genes = str(variant["SYMBOL"])
     genenames = set(variant_genes.split(";"))
 
     in_fasta = pysam.FastaFile(reference)
@@ -724,12 +733,14 @@ def check_filters(variant, genes2exclude, HPO_query, reference):
 
     try:
         seg_dup = float(variant[duplication_identifier])
+
     except Exception as e:
         logger.debug("Use 0.0 for missing segment duplication entries!")
         seg_dup = 0.0
     
     try:
         maf = float(variant["MAX_AF"])
+
     except Exception as e:
         logger.debug("Allele frequency entry could not be identified, use 0.0 instead!")
         maf = 0.0
@@ -762,29 +773,31 @@ def check_filters(variant, genes2exclude, HPO_query, reference):
         filter_comment = "tandem repeat"
 
         return filter_passed, filter_comment
-    
+
     # let variants with a high FINAL_AIDIVA_SCORE (>=0.7) pass to be more sensitive
     if (seg_dup > 0) and (float(variant["FINAL_AIDIVA_SCORE"]) < 0.7):
         filter_passed = 0 # segmental duplication
         filter_comment = "segmental duplication"
 
         return filter_passed, filter_comment
-    
+
     # let variants with a high FINAL_AIDIVA_SCORE (>=0.7) pass to be more sensitive
     if ((len(variant["REF"]) > 1 or len(variant["ALT"]) > 1)) and (float(variant["FINAL_AIDIVA_SCORE"]) < 0.7):
         # make sure to use the correct internal chromsome notation (with chr)
         if "chr" in str(variant["#CHROM"]):
             chrom_id = str(variant["#CHROM"])
+
         else:
             chrom_id = "chr" + str(variant["#CHROM"])
-        
+
         # Get sequence context (vicinity) of a variant for homopolymer check (5 bases up- and down-stream)
         # Get fewer bases when variant is at the start or end of the sequence
         if "chr" in str(variant["#CHROM"]):
             chrom_id = str(variant["#CHROM"])
+
         else:
             chrom_id = "chr" + str(variant["#CHROM"])
-        
+
         num_bases = 5
         pos_start = max(int(variant["POS"]) - (num_bases + 1), 1)
         pos_end = min(int(variant["POS"]) + num_bases, in_fasta.get_reference_length(chrom_id))
@@ -792,9 +805,10 @@ def check_filters(variant, genes2exclude, HPO_query, reference):
         try:
             sequence_context = in_fasta.fetch(chrom_id, pos_start, pos_end)
             sequence_context = sequence_context.upper()
+
         except FileNotFoundError:
             sequence_context = '.'
-        
+
         homopolymer_flag, low_complexity_flag = homopolymer_filter(sequence_context)
 
         if low_complexity_flag and homopolymer_flag:
@@ -922,6 +936,7 @@ def check_denovo(variant, family):
         # sample info complete?
         if name in check_samples:
             check_samples[name] = 1
+
         else:
             logger.debug(f"It seems that your pedigree is incomplete. The following sample: {name} could not be found in the pedigree!")
 
@@ -953,7 +968,7 @@ def check_denovo(variant, family):
         else:
             # reject missing genotype here or beforehand
             pass
-        
+
     for vals in check_samples.values():
        if vals == 0:
             judgement = 0
@@ -975,6 +990,7 @@ def check_dominant(variant, family):
 
         if name in check_samples:
             check_samples[name] = 1
+
         else:
             logger.debug(f"It seems that your pedigree is incomplete. The following sample: {name} could not be found in the pedigree!")
 
@@ -1008,7 +1024,7 @@ def check_dominant(variant, family):
         elif zygosity == "1/1" and family[name] == 0:
             judgement = 0
             break
-        
+
         else:
             # reject missing genotype here or beforehand
             pass
@@ -1044,6 +1060,7 @@ def check_recessive(variant, family, family_type):
 
         if name in check_samples:
             check_samples[name] = 1
+
         else:
             logger.debug(f"It seems that your pedigree is incomplete. The following sample: {name} could not be found in the pedigree!")
 
@@ -1080,7 +1097,7 @@ def check_recessive(variant, family, family_type):
         else:
             # reject missing genotype here or beforehand
             pass
-        
+
     for vals in check_samples.values():
         if vals == 0:
             judgement = 0
@@ -1098,7 +1115,7 @@ def check_recessive_single(variant, variant_columns):
     # recessive variants have to be homozygous in affected samples
     if (variant_genotype == "1/1"):
         is_recessive = 1
-    
+
     return is_recessive
 
 
@@ -1118,6 +1135,7 @@ def check_xlinked(variant, family):
 
         if name in check_samples:
             check_samples[name] = 1
+
         else:
             logger.debug(f"It seems that your pedigree is incomplete. The following sample: {name} could not be found in the pedigree!")
 
@@ -1152,7 +1170,7 @@ def check_xlinked(variant, family):
         else:
             # reject missing genotype here or beforehand
             pass
-        
+
     # sanity check
     het_checker = 0
     hom_checker = 0
@@ -1161,13 +1179,14 @@ def check_xlinked(variant, family):
         # mother
         if values == "0/1":
             het_checker = 1
-        
+
         # father
         if values == "0/0":
             hom_checker = 1
 
     if het_checker == 1 and hom_checker == 1:
         judgement = 1
+
     else:
         judgement = 0
 
@@ -1187,14 +1206,52 @@ def check_xlinked(variant, family):
     parser = argparse.ArgumentParser(description = "Filter variants and finalize the AIDIVA_SCORE based on the given HPO terms (if this information is present)")
     parser.add_argument("--in_file", type=str, dest="in_file", required=True, help="Tab separated input annotated and scored file [required]")
     parser.add_argument("--out_file", type=str, dest="out_filename", required=True, help="Name to save the results [required]")
-    parser.add_argument("--family", type=str, dest="family", required=False, help="Tab separated list of samples annotated with affection status. [required]")
-    parser.add_argument("--family_type", type=str, choices=["TRIO", "FAMILY", "SINGLE"], dest="family_type", required=False, help="Choose if the data you provide is a trio or a larger family [required]")
+    parser.add_argument("--family", type=str, dest="family", required=False, help="Tab separated list of samples annotated with affection status.")
+    parser.add_argument("--family_type", type=str, choices=["TRIO", "FAMILY", "SINGLE"], dest="family_type", required=False, help="Choose if the data you provide is a trio or a larger family")
     parser.add_argument("--gene_exclusion", type=str, dest="gene_exclusion_list", required=False, help="List of genes that should be excluded in the prioritization")
     parser.add_argument("--hpo_list", type=str, dest="hpo_list", default=None, required=False, help="List of HPO terms that are observed in the patient. These terms are used to adjust the AIDIVA_SCORE\n")
-    parser.add_argument("--hpo_resources", type=str, dest="hpo_resources", default="../../data/", required=True, help="Folder where the HPO resources (HPO_graph,...) are found\n")
+    #parser.add_argument("--hpo_resources", type=str, dest="hpo_resources", default="../../data/", required=True, help="Folder where the HPO resources (HPO_graph,...) are found\n")
+    parser.add_argument("--genome_build", type=str, dest="genome_build", default="GRCh38", required=True, help="Version of the genome build to use [GRCh37, GRCH38]\n")
+    parser.add_argument("--feature_list", type=str, dest="feature_list", required=True, help="Feature list used in the AIDIVA_SCORE prediction.\n")
+    parser.add_argument("--reference", type=str, dest="reference", required=True, help="Path to the refernce genome.\n")
+    parser.add_argument("--skip_db_check", action="store_true", required=False, help="Skip db check.\n")
+    parser.add_argument("--threads", type=str, dest="threads", default=1, required=False, help="Number of threads to use.\n")
     args = parser.parse_args()
 
     input_data = pd.read_csv(args.in_file, sep="\t", low_memory=False)
 
-    prioritized_variants = prioritize_variants(input_data, args.hpo_resources, args.family, args.family_type, args.hpo_list, args.gene_exclusion_list, args.low_conf_region)
+    if args.family:
+        family_file = args.family
+
+    else:
+        family_file = None
+
+    if args.family_type and args.family_file is not None:
+        family_type = args.family_type
+
+    else:
+        family_type="SINGLE"
+
+    if args.genome_build:
+        genome_build = args.genome_build
+
+    else:
+        genome_build = "GRCh38"
+
+    internal_parameter_dict = {"gene2hpo-mapping": "../../data/hpo_resources/gene2hpo.json",
+                               "gene2interacting-mapping": "../../data/hpo_resources/gene2interacting.json",
+                               "transcript2length-mapping": "../../data/hpo_resources/grch38transcript2length.json",
+                               "hgnc2gene-mapping": "../../data/hpo_resources/hgnc2gene.json",
+                               "hpo-graph": "../../data/hpo_resources/hpo_graph.gexf",
+                               "hpo2replacement-mapping": "../../data/hpo_resources/hpo2replacement.json"}
+
+    if args.threads:
+        num_threads = int(args.threads)
+
+    else:
+        num_threads = 1
+
+    feature_list = args.feature_list.split(",")
+
+    prioritized_variants = prioritize_variants(input_data, internal_parameter_dict, args.reference, num_threads, genome_build, feature_list, args.skip_db_check, family_file, family_type, args.hpo_list, args.gene_exclusion_list)
     prioritized_variants.to_csv(args.out_filename, sep="\t", index=False)
diff --git a/aidiva/variant_scoring/score_variants.py b/aidiva/variant_scoring/score_variants.py
index cd5cd3f..41480f2 100644
--- a/aidiva/variant_scoring/score_variants.py
+++ b/aidiva/variant_scoring/score_variants.py
@@ -27,9 +27,10 @@
              "REVEL": 0.28019263637740743,
              "PolyPhen": 0.5169017014355943,
              "oe_lof": 0.53667483333333332,
+             "oe_mis": 0.8742450611581991, ## currently not used
+             "oe_syn": 1.0262797714053697, ## currently not used
              "CAPICE": 0.04487945928377704,
              "ALPHA_MISSENSE_SCORE": 0.4074365673385914}
-             #"EVE_SCORE": 0.4866676824933756}
 
 MEDIAN_DICT = {"MutationAssessor": 1.87,
                "CONDEL": 0.4805749233199981,
@@ -40,9 +41,10 @@
                "REVEL": 0.193,
                "PolyPhen": 0.547,
                "oe_lof": 0.48225,
+               "oe_mis": 0.87952, ## currently not used
+               "oe_syn": 1.004, ## currently not used
                "CAPICE": 0.0006,
                "ALPHA_MISSENSE_SCORE": 0.2509}
-               #"EVE_SCORE": 0.4946792317600748}
 
 SUPPORTED_CODING_VARIANTS = ["stop_gained",
                              "stop_lost",
@@ -66,7 +68,8 @@
                    "splice_donor_region_variant",
                    "splice_polypyrimidine_tract_variant"]
 
-random_seed = 14038
+RANDOM_SEED = 14038
+
 logger = logging.getLogger(__name__)
 
 
@@ -87,9 +90,9 @@ def read_input_data(input_file):
 
 
 # make sure to use the same features, that were used for the training of the model
-# fill SegDup missing values with -> 0
-# fill ABB_SCORE missing values with -> 0
 # fill Allele Frequency missing values with -> 0
+# fill homAF missing values with -> 0
+# fill HIGH_IMPACT missing values with -> 0
 # fill missing values from other features with -> median or mean
 def prepare_input_data(feature_list, allele_frequency_list, input_data):
     for feature in feature_list:
@@ -106,39 +109,16 @@ def prepare_input_data(feature_list, allele_frequency_list, input_data):
         elif (feature == "IS_INDEL"):
             continue
 
-        #elif feature == "CAPICE":
-            # TODO: compute mean and median of CAPICE database
-            #input_data[feature] = input_data[feature].fillna(0.5)
-
         elif "SIFT" == feature:
             input_data[feature] = input_data.apply(lambda row: min([float(value) for value in str(row[feature]).split("&") if ((value != ".") and (value != "nan") and (value != ""))], default=np.nan), axis=1)
             input_data[feature] = input_data[feature].fillna(MEDIAN_DICT["SIFT"])
 
-        elif feature == "oe_lof":
+        elif feature == "oe_lof" or feature == "oe_mis" or feature == "oe_syn":
             input_data[feature] = input_data.apply(lambda row: min([float(value) for value in str(row[feature]).split("&") if ((value != ".") and (value != "nan") and (value != "") and (":" not in value) and ("-" not in value))], default=np.nan), axis=1)
-            input_data[feature] = input_data[feature].fillna(MEDIAN_DICT["oe_lof"])
-
-        #elif feature == "REVEL":
-        #    if input_data["IMPACT"] == "HIGH":
-        #        input_data[feature] = input_data[feature].fillna(1.0)
-        #    else:
-        #        input_data[feature] = input_data[feature].fillna(1.0)
-
-        #elif feature == "EVE_SCORE":
-        #    if input_data["IMPACT"] == "HIGH":
-        #        input_data[feature] = input_data[feature].fillna(1.0)
-
-        #    else:
-        #        input_data[feature] = input_data[feature].fillna(MEDIAN_DICT["EVE_SCORE"])
-
-        #elif feature == "ALPHA_MISSENSE_SCORE":
-        #    if input_data["IMPACT"] == "HIGH" and not "splice" in input_data["Consequence"]:
-        #        input_data[feature] = input_data[feature].fillna(1.0)
-
-        #    else:
-        #        input_data[feature] = input_data[feature].fillna(MEDIAN_DICT["ALPHA_MISSENSE_SCORE"])
+            input_data[feature] = input_data[feature].fillna(MEDIAN_DICT[feature])
 
-        elif feature == "REVEL" or feature == "ALPHA_MISSENSE_SCORE": #or feature == "EVE_SCORE":
+        # REVEL and ALPHA_MISSENSE score only missense variants so for missing high impact variants we fill with 1 instead of median/mean value
+        elif feature == "REVEL" or feature == "ALPHA_MISSENSE_SCORE":
             input_data[feature] = input_data.apply(lambda row: max([float(value) for value in str(row[feature]).split("&") if ((value != ".") & (value != "nan") & (value != "") & (not ":" in value) & (not "-" in value))], default=np.nan), axis=1)
             input_data.loc[(~(input_data["MOST_SEVERE_CONSEQUENCE"].str.contains("|".join(SPLICE_VARIANTS))) & (input_data["IMPACT"] == "HIGH") & (input_data[feature].isna())), feature] = 1.0
             input_data[feature] = input_data[feature].fillna(MEDIAN_DICT[feature])
@@ -194,7 +174,7 @@ def parallelize_dataframe_processing(dataframe, function, num_cores):
     return dataframe
 
 
-def perform_pathogenicity_score_prediction(rf_model, input_data, allele_frequency_list, feature_list, num_cores):
+def perform_pathogenicity_score_prediction(rf_model, input_data, allele_frequency_list, feature_list, num_cores=1):
     rf_model = import_model(rf_model)
 
     # compute maximum Minor Allele Frequency (MAF) from population frequencies if MAX_AF not present
@@ -218,18 +198,12 @@ def perform_pathogenicity_score_prediction(rf_model, input_data, allele_frequenc
 
     # set splicing donor/acceptor variants to NaN if not additionally a supported consequence is reported for the variant 
     # add filter for splice_region variants
-    # later in the pipeline the score from dbscSNV for splicing variants will be used
-    # TODO: change to use SpliceAI instead of dbscSNV for splice variants
-    #predicted_data.loc[(~(any(term for term in SUPPORTED_CODING_VARIANTS if term in predicted_data["Consequence"])) & (~(predicted_data["rf_score"].isna()) | ~(predicted_data["ada_score"].isna()))), "AIDIVA_SCORE"] = np.nan
-    #predicted_data.loc[(~(any(term for term in SUPPORTED_CODING_VARIANTS if term in predicted_data["Consequence"])) & (~(predicted_data["SpliceAI"].isna()))), "AIDIVA_SCORE"] = np.nan
-    #predicted_data.loc[(~(predicted_data["Consequence"].str.contains("|".join(SUPPORTED_CODING_VARIANTS))) & (~(predicted_data["SpliceAI"].isna()))), "AIDIVA_SCORE"] = np.nan
+    # later in the pipeline the score from SpliceAI for splicing variants will be used
     predicted_data.loc[(~(predicted_data["MOST_SEVERE_CONSEQUENCE"].str.contains("|".join(SUPPORTED_CODING_VARIANTS))) & (predicted_data["MOST_SEVERE_CONSEQUENCE"].str.contains("|".join(SPLICE_VARIANTS)))), "AIDIVA_SCORE"] = np.nan
-    #predicted_data.loc[((predicted_data["Consequence"].str.contains("splice")) & (~(predicted_data["SpliceAI"].isna()))), "AIDIVA_SCORE"] = np.nan
 
 
     # set synonymous variants to 0.0 if they are not at a splicing site
     # TODO check how to handle overlapping consequences
-    #predicted_data.loc[(predicted_data["Consequence"].str.contains("synonymous") & ~(predicted_data["Consequence"].str.contains("splice")) & ~(any(term for term in SUPPORTED_CODING_VARIANTS if term in predicted_data["Consequence"]))), "AIDIVA_SCORE"] = 0.0
     predicted_data.loc[(predicted_data["MOST_SEVERE_CONSEQUENCE"].str.contains("|".join(SYNONYMOUS_VARIANTS)) & ~(predicted_data["MOST_SEVERE_CONSEQUENCE"].str.contains("|".join(SUPPORTED_CODING_VARIANTS))) & ~(predicted_data["MOST_SEVERE_CONSEQUENCE"].str.contains("|".join(SPLICE_VARIANTS)))), "AIDIVA_SCORE"] = 0.0
 
     return predicted_data
@@ -242,6 +216,7 @@ def perform_pathogenicity_score_prediction(rf_model, input_data, allele_frequenc
     parser.add_argument("--model", type=str, dest="model", metavar="model.pkl", required=True, help="Specifies the name of the trained model to import\n")
     parser.add_argument("--feature_list", type=str, dest="feature_list", metavar="feature1,feature2,feature3", required=True, help="Comma separated list of the features used to train the model\n")
     parser.add_argument("--allele_frequency_list", type=str, dest="allele_frequency_list", metavar="frequency1,frequecy2,frequency3", required=False, help="Comma separated list of allele frequency sources that should be used as basis to get the maximum allele frequency\n")
+    parser.add_argument("--threads", type=str, dest="threads", metavar="1", required=False, help="Number of threads to use.\n")
     args = parser.parse_args()
 
     input_data = read_input_data(args.in_data)
@@ -253,5 +228,11 @@ def perform_pathogenicity_score_prediction(rf_model, input_data, allele_frequenc
     else:
         allele_frequency_list = []
 
-    predicted_data = perform_pathogenicity_score_prediction(args.model, input_data, allele_frequency_list, feature_list)
+    if args.threads:
+        num_threads = int(args.threads)
+
+    else:
+        num_threads = 1
+
+    predicted_data = perform_pathogenicity_score_prediction(args.model, input_data, allele_frequency_list, feature_list, num_threads)
     predicted_data.to_csv(args.out_data, index=False, sep="\t", na_rep="NA")
diff --git a/data/aiDIVA_example_configuration_annotated.yaml b/data/aiDIVA_example_configuration_annotated.yaml
index c85321b..0acab26 100644
--- a/data/aiDIVA_example_configuration_annotated.yaml
+++ b/data/aiDIVA_example_configuration_annotated.yaml
@@ -1,18 +1,20 @@
-## aiDIVA -- augmented intelligence-based Disease Variant Analysis
+## aiDIVA -- augmented intelligence-based disease variant analysis
 # Configuration file specifiying all the parameters that are needed to run aiDIVA in the different modes
 # if you modify the key names make sure to also update them in the run_aiDIVA.py script
 ---
-Assembly-Build: GRCh38
+Assembly-Build: GRCh37
+Canonical-Transcripts: "/mnt/storage2/users/ahboced1/phd_project/mane_grch37_v13_transcript_ids.txt"
 
 
 Analysis-Input:
     # path to the reference assembly used during the expansion of the indels
-    # instead of GRCh38 you can also use hg38 the differences between the two should not matter here
-    ref-path: <full-path-to-reference>/GRCh38.fa
+    # instead of GRCh37 you can also use hg19 the differences between the two should not matter here
+    ref-path: <full-path-to-reference>/GRCh37.fa
 
-    # trained scoring model (random forest) used to predict the pathogenicity score
+    # trained scoring models used to predict the pathogenicity score
     # if no trained model is present you can use the train_model.py script to train a new custom model
-    scoring-model: <full-path-to-model>/rf_model_combined.pkl
+    scoring-model-snp: <full-path-to-model>/rf_model_snp.pkl
+    scoring-model-indel: <full-path-to-model>/rf_model_indel.pkl
 
     prioritization-information:
         # Identifier to get the score from the annotated file
@@ -22,16 +24,23 @@ Analysis-Input:
 
 
 Model-Features:
-    # List containing the names of the allele frequency sources (populations) that are present in the data set (the MaxAF will be based on these)
+    # List containing the names of the allele frequency sources (populations) that are present in the data set (the MaxAF will be based on these) if not wanted use a empty list "[]" instead
+    # NOTE: currently not used, we use directly the MAX_AF annotation from VEP
     allele-frequency-list:
         - gnomAD_AFR_AF
-        - gnomAD_AMR_AF
+        - gnomAD_ASJ_AF
         - gnomAD_EAS_AF
+        - gnomAD_FIN_AF
         - gnomAD_NFE_AF
+        - gnomAD_OTH_AF
         - gnomAD_SAS_AF
-        #- gnomAD_ASJ_AF
-        #- gnomAD_FIN_AF
-        #- gnomAD_OTH_AF
+        - AA_AF
+        - EA_AF
+        - AFR_AF
+        - AMR_AF
+        - EAS_AF
+        - EUR_AF
+        - SAS_AF
 
     # List containing the names of the features used for the model training
     # the exact order of the features is crucial (make sure to have the exact same order as in the training step)
@@ -54,9 +63,6 @@ Model-Features:
         - oe_lof
         - homAF
         - CAPICE
-        - ALPHA_MISSENSE_SCORE
-        - HIGH_IMPACT
-        - IS_INDEL
 
 
 # DO NOT CHANGE IF YOU DON'T KNOW WHAT YOU DO
@@ -64,7 +70,7 @@ Model-Features:
 Internal-Parameters:
     gene2hpo-mapping: ../../data/hpo_resources/gene2hpo.json
     gene2interacting-mapping: ../../data/hpo_resources/gene2interacting.json
-    transcript2length-mapping: ../../data/hpo_resources/grch38transcript2length.json
+    transcript2length-mapping: ../../data/hpo_resources/grch37transcript2length.json
     hgnc2gene-mapping: ../../data/hpo_resources/hgnc2gene.json
     hpo-graph: ../../data/hpo_resources/hpo_graph.gexf
     hpo2replacement-mapping: ../../data/hpo_resources/hpo2replacement.json
diff --git a/data/aiDIVA_example_configuration_annotated_grch38.yaml b/data/aiDIVA_example_configuration_annotated_grch38.yaml
new file mode 100644
index 0000000..066e057
--- /dev/null
+++ b/data/aiDIVA_example_configuration_annotated_grch38.yaml
@@ -0,0 +1,71 @@
+## aiDIVA -- augmented intelligence-based disease variant analysis
+# Configuration file specifiying all the parameters that are needed to run aiDIVA in the different modes
+# if you modify the key names make sure to also update them in the run_aiDIVA.py script
+---
+Assembly-Build: GRCh38
+Canonical-Transcripts: "/mnt/storage2/users/ahboced1/phd_project/mane_grch38_v13_transcript_ids.txt"
+
+
+Analysis-Input:
+    # path to the reference assembly used during the expansion of the indels
+    # instead of GRCh38 you can also use hg38 the differences between the two should not matter here
+    ref-path: <full-path-to-reference>/GRCh38.fa
+
+    # trained scoring model (random forest) used to predict the pathogenicity score
+    # if no trained model is present you can use the train_model.py script to train a new custom model
+    scoring-model: <full-path-to-model>/rf_model_combined.pkl
+
+    prioritization-information:
+        # Identifier to get the score from the annotated file
+        cadd-identifier: CADD_PHRED
+        repeat-identifier: simpleRepeat
+        duplication-identifier: segmentDuplication
+
+
+Model-Features:
+    # List containing the names of the allele frequency sources (populations) that are present in the data set (the MaxAF will be based on these)
+    allele-frequency-list:
+        - gnomAD_AFR_AF
+        - gnomAD_AMR_AF
+        - gnomAD_EAS_AF
+        - gnomAD_NFE_AF
+        - gnomAD_SAS_AF
+        #- gnomAD_ASJ_AF
+        #- gnomAD_FIN_AF
+        #- gnomAD_OTH_AF
+
+    # List containing the names of the features used for the model training
+    # the exact order of the features is crucial (make sure to have the exact same order as in the training step)
+    feature-list:
+        - SIFT
+        - PolyPhen
+        - CADD_PHRED
+        - REVEL
+        - MAX_AF
+        - EIGEN_PHRED
+        - CONDEL
+        - FATHMM_XF
+        - MutationAssessor
+        - phastCons_mammal
+        - phastCons_primate
+        - phastCons_vertebrate
+        - phyloP_mammal
+        - phyloP_primate
+        - phyloP_vertebrate
+        - oe_lof
+        - homAF
+        - CAPICE
+        - ALPHA_MISSENSE_SCORE
+        - HIGH_IMPACT
+        - IS_INDEL
+
+
+# DO NOT CHANGE IF YOU DON'T KNOW WHAT YOU DO
+# In case you change the following parameters (eg. if you recomputed the hpo resources) make sure to specify the full path to the newly created resources
+Internal-Parameters:
+    gene2hpo-mapping: ../../data/hpo_resources/gene2hpo.json
+    gene2interacting-mapping: ../../data/hpo_resources/gene2interacting.json
+    transcript2length-mapping: ../../data/hpo_resources/grch38transcript2length.json
+    hgnc2gene-mapping: ../../data/hpo_resources/hgnc2gene.json
+    hpo-graph: ../../data/hpo_resources/hpo_graph.gexf
+    hpo2replacement-mapping: ../../data/hpo_resources/hpo2replacement.json
diff --git a/data/aiDIVA_example_configuration_with_annotation.yaml b/data/aiDIVA_example_configuration_with_annotation.yaml
index b96b872..5ea5ac0 100644
--- a/data/aiDIVA_example_configuration_with_annotation.yaml
+++ b/data/aiDIVA_example_configuration_with_annotation.yaml
@@ -1,18 +1,20 @@
-## aiDIVA -- augmented intelligence-based Disease Variant Analysis
+## aiDIVA -- augmented intelligence-based disease variant analysis
 # Configuration file specifiying all the parameters that are needed to run aiDIVA in the different modes
 # if you modify the key names make sure to also update them in the run_aiDIVA.py script
 ---
-Assembly-Build: GRCh38
+Assembly-Build: GRCh37
+Canonical-Transcripts: "/mnt/storage2/users/ahboced1/phd_project/mane_grch37_v13_transcript_ids.txt"
 
 
 Analysis-Input:
     # path to the reference assembly used during the expansion of the indels
-    # instead of GRCh38 you can also use hg38 the differences between the two should not matter here
-    ref-path: <full-path-to-reference>/GRCh38.fa
+    # instead of GRCh37 you can also use hg19 the differences between the two should not matter here
+    ref-path: <full-path-to-reference>/GRCh37.fa
 
-    # trained scoring model (random forest) used to predict the pathogenicity score
+    # trained scoring models used to predict the pathogenicity score
     # if no trained model is present you can use the train_model.py script to train a new custom model
-    scoring-model: <full-path-to-model>/rf_model_combined.pkl
+    scoring-model-snp: <full-path-to-model>/rf_model_snp.pkl
+    scoring-model-indel: <full-path-to-model>/rf_model_indel.pkl
 
     prioritization-information:
         # Identifier to get the score from the annotated file
@@ -23,15 +25,22 @@ Analysis-Input:
 
 Model-Features:
     # List containing the names of the allele frequency sources (populations) that are present in the data set (the MaxAF will be based on these)
+    # NOTE: currently not used, we use directly the MAX_AF annotation from VEP
     allele-frequency-list:
         - gnomAD_AFR_AF
-        - gnomAD_AMR_AF
+        - gnomAD_ASJ_AF
         - gnomAD_EAS_AF
+        - gnomAD_FIN_AF
         - gnomAD_NFE_AF
+        - gnomAD_OTH_AF
         - gnomAD_SAS_AF
-        #- gnomAD_ASJ_AF
-        #- gnomAD_FIN_AF
-        #- gnomAD_OTH_AF
+        - AA_AF
+        - EA_AF
+        - AFR_AF
+        - AMR_AF
+        - EAS_AF
+        - EUR_AF
+        - SAS_AF
 
     # List containing the names of the features used for the model training
     # the exact order of the features is crucial (make sure to have the exact same order as in the training step)
@@ -54,69 +63,62 @@ Model-Features:
         - oe_lof
         - homAF
         - CAPICE
-        - ALPHA_MISSENSE_SCORE
-        - HIGH_IMPACT
-        - IS_INDEL
 
 
 Annotation-Resources:
     # VEP: path to the VEP executable
-    vep: <full-path-to-folder>/ensembl-vep-release-109.3/
+    vep: <full-path-to-folder>/ensembl-vep-release-103.1/vep
 
-    # path to the direcrory where the additional perl libraries are installed during the installation step
-    vep-cpan: <full-path-to-folder>/perl_cpan/
-
-    # VEP Plugin direcrory
-    vep-plugin-path: <full-path-to-folder>/vep_plugins/
-
-    # VEP Cache directory
-    vep-cache: <full-path-to-folder>/ensembl-vep-109/
+    # Cache directory and plugin directory
+    vep-cache: <full-path-to-folder>/ensembl-vep-103/cache
 
     # ngs-bits install path
-    ngs-bits: <full-path-to-folder>/ngs-bits-2023_06/
+    ngs-bits: <full-path-to-folder>/ngs-bits/
 
     # low-confidence filter file
-    low-confidence: <full-path-to-file>/grch38_low_conf_region.bed
+    low-confidence: <full-path-to-file>/grch37_low_conf_regions.bed
 
     # BED like files here the key of the dictionary is used as the name to present the feature in the annotated file
     bed-files:
-        simpleRepeat: <full-path-to-file>/hg38_simpleRepeat.bed
-        segmentDuplication: <full-path-to-file>/hg38_genomicSuperDups.bed
-        oe_lof: <full-path-to-file>/gnomAD_OE_grch38_sorted.bed
-        repeatMasker: <full-path-to-file>/RepeatMasker_GRCh38.bed
-
-    plugin-files:
-        AlphaMissense: <full-path-to-file>/AlphaMissense_hg38.tsv.gz
+        simpleRepeat: <full-path-to-file>/grch37_simpleRepeat.bed
+        segmentDuplication: <full-path-to-file>/grch37_genomicSuperDups.bed
+        oe_lof: <full-path-to-file>/gnomAD_oe_lof_sorted.bed
+        oe_mis: <full-path-to-file>/gnomAD_oe_mis_grch38_sorted.bed
+        oe_syn: <full-path-to-file>/gnomAD_oe_syn_grch38_sorted.bed
+        repeatMasker: <full-path-to-file>/grch37_repeatmasker.bed
+        
+        # OPTIONAL: only used if DB check is not skipped
+        # NOTE: currently not implemented
+        # NOTE: the following annotation needs a valid license
+        omim: <full-path-to-file>/omim.bed
 
     # VCF files the key of the dictionary is used to identify the feature in the INFO column of hte VCF file
     vcf-files:
-        EIGEN_PHRED: <full-path-to-file>/grch38liftOver_precomputed_Eigen-phred_coding_chrom1-22.vcf.gz
-        CONDEL: <full-path-to-file>/grch38liftOver_precomputed_Condel.vcf.gz
-        FATHMM_XF: <full-path-to-file>/hg38_fathmm_xf_coding.vcf.gz
-        MutationAssessor: <full-path-to-file>/grch38liftOver_precomputed_MutationAssessor.vcf.gz
-        gnomAD: <full-path-to-file>/gnomAD_genome_v3.1.2_GRCh38.vcf.gz
-        CADD: <full-path-to-file>/CADD_SNVs_1.6_GRCh38.vcf.gz
-        dbscSNV: <full-path-to-file>/dbscSNV1.1_GRCh38.vcf.gz
-        CAPICE: <full-path-to-file>/grch38liftOver_capice_v1_snvs.vcf.gz
-        REVEL: <full-path-to-file>/REVEL_1.3.vcf.gz
-        SpliceAI-SNV: <full-path-to-file>/spliceai_scores.masked.snv.hg38.vcf.gz
-        SpliceAI-InDel: <full-path-to-file>/spliceai_scores.masked.indel.hg38.vcf.gz
-
+        EIGEN_PHRED: <full-path-to-file>/grch37_eigen_phred_coding.vcf.gz
+        CONDEL: <full-path-to-file>/grch37_fannsDB_Condel.vcf.gz
+        FATHMM_XF: <full-path-to-file>/grch37_fathmm_xf_coding.vcf.gz
+        MutationAssessor: <full-path-to-file>/grch37_precomputed_MutationAssessor.vcf.gz
+        gnomAD: <full-path-to-file>/grch37_gnomAD_genomes_r211.vcf.gz
+        CADD: <full-path-to-file>/grch37_CADD_snvs_v16.vcf.gz
+        dbscSNV: <full-path-to-file>/grch37_dbscSNV_scores.vcf.gz
+        CAPICE: <full-path-to-file>/capice_v1_grch37_snvs.vcf.gz
+        REVEL: <full-path-to-file>/grch37_revel_v13.vcf.gz
+        
         # OPTIONAL: only used if DB check is not skipped
-        clinvar: <full-path-to-file>/clinvar_GRCh38.vcf.gz
-
+        clinvar: <full-path-to-file>/grch37_clinvar.vcf.gz
+        
         # OPTIONAL: only used if DB check is not skipped
         # NOTE: the following annotation needs a valid license
         hgmd: <full-path-to-file>/HGMD.vcf.gz
 
     # Bigwig files the key of the dictionary is used as the name to present the feature in the annotated file
     bigwig-files:
-        phastCons_mammal: <full-path-to-file>/hg38_phastCons17way_primate.bw
-        phastCons_primate: <full-path-to-file>/hg38_phastCons30way_mammal.bw
-        phastCons_vertebrate: <full-path-to-file>/hg38_phastCons100way_vertebrate.bw
-        phyloP_mammal: <full-path-to-file>/hg38_phyloP17way_primate.bw
-        phyloP_primate: <full-path-to-file>/hg38_phyloP30way_mammal.bw
-        phyloP_vertebrate: <full-path-to-file>/hg38_phyloP100way_vertebrate.bw
+        phastCons_mammal: <full-path-to-file>/primates.phastCons46way.bw
+        phastCons_primate: <full-path-to-file>/placentalMammals.phastCons46way.bw
+        phastCons_vertebrate: <full-path-to-file>/vertebrate.phastCons46way.bw
+        phyloP_mammal: <full-path-to-file>/primates.phyloP46way.bw
+        phyloP_primate: <full-path-to-file>/placentalMammals.phyloP46way.bw
+        phyloP_vertebrate: <full-path-to-file>/vertebrate.phyloP46way.bw
 
 
 # DO NOT CHANGE THE FOLLOWING IF YOU DON'T KNOW WHAT YOU DO
@@ -124,7 +126,7 @@ Annotation-Resources:
 Internal-Parameters:
     gene2hpo-mapping: ../../data/hpo_resources/gene2hpo.json
     gene2interacting-mapping: ../../data/hpo_resources/gene2interacting.json
-    transcript2length-mapping: ../../data/hpo_resources/grch38transcript2length.json
+    transcript2length-mapping: ../../data/hpo_resources/grch37transcript2length.json
     hgnc2gene-mapping: ../../data/hpo_resources/hgnc2gene.json
     hpo-graph: ../../data/hpo_resources/hpo_graph.gexf
     hpo2replacement-mapping: ../../data/hpo_resources/hpo2replacement.json
diff --git a/data/aiDIVA_example_configuration_with_annotation_grch38.yaml b/data/aiDIVA_example_configuration_with_annotation_grch38.yaml
new file mode 100644
index 0000000..bc04c6f
--- /dev/null
+++ b/data/aiDIVA_example_configuration_with_annotation_grch38.yaml
@@ -0,0 +1,133 @@
+## aiDIVA -- augmented intelligence-based disease variant analysis
+# Configuration file specifiying all the parameters that are needed to run aiDIVA in the different modes
+# if you modify the key names make sure to also update them in the run_aiDIVA.py script
+---
+Assembly-Build: GRCh38
+Canonical-Transcripts: "/mnt/storage2/users/ahboced1/phd_project/mane_grch38_v13_transcript_ids.txt"
+
+
+Analysis-Input:
+    # path to the reference assembly used during the expansion of the indels
+    # instead of GRCh38 you can also use hg38 the differences between the two should not matter here
+    ref-path: <full-path-to-reference>/GRCh38.fa
+
+    # trained scoring model (random forest) used to predict the pathogenicity score
+    # if no trained model is present you can use the train_model.py script to train a new custom model
+    scoring-model: <full-path-to-model>/rf_model_combined.pkl
+
+    prioritization-information:
+        # Identifier to get the score from the annotated file
+        cadd-identifier: CADD_PHRED
+        repeat-identifier: simpleRepeat
+        duplication-identifier: segmentDuplication
+
+
+Model-Features:
+    # List containing the names of the allele frequency sources (populations) that are present in the data set (the MaxAF will be based on these)
+    allele-frequency-list:
+        - gnomAD_AFR_AF
+        - gnomAD_AMR_AF
+        - gnomAD_EAS_AF
+        - gnomAD_NFE_AF
+        - gnomAD_SAS_AF
+        #- gnomAD_ASJ_AF
+        #- gnomAD_FIN_AF
+        #- gnomAD_OTH_AF
+
+    # List containing the names of the features used for the model training
+    # the exact order of the features is crucial (make sure to have the exact same order as in the training step)
+    feature-list:
+        - SIFT
+        - PolyPhen
+        - CADD_PHRED
+        - REVEL
+        - MAX_AF
+        - EIGEN_PHRED
+        - CONDEL
+        - FATHMM_XF
+        - MutationAssessor
+        - phastCons_mammal
+        - phastCons_primate
+        - phastCons_vertebrate
+        - phyloP_mammal
+        - phyloP_primate
+        - phyloP_vertebrate
+        - oe_lof
+        - homAF
+        - CAPICE
+        - ALPHA_MISSENSE_SCORE
+        - HIGH_IMPACT
+        - IS_INDEL
+
+
+Annotation-Resources:
+    # VEP: path to the VEP executable
+    vep: <full-path-to-folder>/ensembl-vep-release-109.3/
+
+    # path to the direcrory where the additional perl libraries are installed during the installation step
+    vep-cpan: <full-path-to-folder>/perl_cpan/
+
+    # VEP Plugin direcrory
+    vep-plugin-path: <full-path-to-folder>/vep_plugins/
+
+    # VEP Cache directory
+    vep-cache: <full-path-to-folder>/ensembl-vep-109/
+
+    # ngs-bits install path
+    ngs-bits: <full-path-to-folder>/ngs-bits-2023_06/
+
+    # low-confidence filter file
+    low-confidence: <full-path-to-file>/grch38_low_conf_region.bed
+
+    # BED like files here the key of the dictionary is used as the name to present the feature in the annotated file
+    bed-files:
+        simpleRepeat: <full-path-to-file>/hg38_simpleRepeat.bed
+        segmentDuplication: <full-path-to-file>/hg38_genomicSuperDups.bed
+        oe_lof: <full-path-to-file>/gnomAD_oe_lof_grch38_sorted.bed
+        oe_mis: <full-path-to-file>/gnomAD_oe_mis_grch38_sorted.bed
+        oe_syn: <full-path-to-file>/gnomAD_oe_syn_grch38_sorted.bed
+        repeatMasker: <full-path-to-file>/RepeatMasker_GRCh38.bed
+
+    plugin-files:
+        AlphaMissense: <full-path-to-file>/AlphaMissense_hg38.tsv.gz
+
+    # VCF files the key of the dictionary is used to identify the feature in the INFO column of hte VCF file
+    vcf-files:
+        EIGEN_PHRED: <full-path-to-file>/grch38liftOver_precomputed_Eigen-phred_coding_chrom1-22.vcf.gz
+        CONDEL: <full-path-to-file>/grch38liftOver_precomputed_Condel.vcf.gz
+        FATHMM_XF: <full-path-to-file>/hg38_fathmm_xf_coding.vcf.gz
+        MutationAssessor: <full-path-to-file>/grch38liftOver_precomputed_MutationAssessor.vcf.gz
+        gnomAD: <full-path-to-file>/gnomAD_genome_v3.1.2_GRCh38.vcf.gz
+        CADD: <full-path-to-file>/CADD_SNVs_1.6_GRCh38.vcf.gz
+        dbscSNV: <full-path-to-file>/dbscSNV1.1_GRCh38.vcf.gz
+        CAPICE: <full-path-to-file>/grch38liftOver_capice_v1_snvs.vcf.gz
+        REVEL: <full-path-to-file>/REVEL_1.3.vcf.gz
+        SpliceAI-SNV: <full-path-to-file>/spliceai_scores.masked.snv.hg38.vcf.gz
+        SpliceAI-InDel: <full-path-to-file>/spliceai_scores.masked.indel.hg38.vcf.gz
+
+        # OPTIONAL: only used if DB check is not skipped
+        clinvar: <full-path-to-file>/clinvar_GRCh38.vcf.gz
+
+        # OPTIONAL: only used if DB check is not skipped
+        # NOTE: the following annotation needs a valid license
+        hgmd: <full-path-to-file>/HGMD.vcf.gz
+
+    # Bigwig files the key of the dictionary is used as the name to present the feature in the annotated file
+    bigwig-files:
+        phastCons_mammal: <full-path-to-file>/hg38_phastCons17way_primate.bw
+        phastCons_primate: <full-path-to-file>/hg38_phastCons30way_mammal.bw
+        phastCons_vertebrate: <full-path-to-file>/hg38_phastCons100way_vertebrate.bw
+        phyloP_mammal: <full-path-to-file>/hg38_phyloP17way_primate.bw
+        phyloP_primate: <full-path-to-file>/hg38_phyloP30way_mammal.bw
+        phyloP_vertebrate: <full-path-to-file>/hg38_phyloP100way_vertebrate.bw
+
+
+# DO NOT CHANGE THE FOLLOWING IF YOU DON'T KNOW WHAT YOU DO
+# In case you change the following parameters (eg. if you recomputed the hpo resources) make sure to specify the full path to the newly created resources
+Internal-Parameters:
+    gene2hpo-mapping: ../../data/hpo_resources/gene2hpo.json
+    gene2interacting-mapping: ../../data/hpo_resources/gene2interacting.json
+    transcript2length-mapping: ../../data/hpo_resources/grch38transcript2length.json
+    hgnc2gene-mapping: ../../data/hpo_resources/hgnc2gene.json
+    hpo-graph: ../../data/hpo_resources/hpo_graph.gexf
+    hpo2replacement-mapping: ../../data/hpo_resources/hpo2replacement.json
diff --git a/doc/prepare_annotation_resources.md b/doc/prepare_annotation_resources.md
index 0acf71e..b3c18ad 100644
--- a/doc/prepare_annotation_resources.md
+++ b/doc/prepare_annotation_resources.md
@@ -6,7 +6,7 @@ For some of the annotation sources there are no GRCh38/hg38 files available in t
 
 
 ## Reference Genome Assembly
-Note hg19 and GRCh37 differ in the mitochondrial DNA, but since mitchondrial variants are not supported by aiDIVA it should not matter which of the two assemblies is used as reference.
+Note hg19 and GRCh37 differ in the mitochondrial DNA, but since mitchondrial variants are not supported by AIdiva it does not matter which of the two assemblies is used as reference.
 
 GRCh37:
 <br>
@@ -32,9 +32,9 @@ samtools faidx hg38.fa.gz
 ```
 
 ## Annotation Databases
-INFO: Please be adviced that some of the annotation sources aiDIVA uses are only free for non-commercial and academic use!!!
+INFO: Please be adviced that some of the annotation sources aiDIVA uses are only free for non-commercial and academic use!
 
-If for some reason you choose to exclude one or more of the Annotation sources shown here you have to make sure to modify the source code accordingly. If an annotation source is used as a feature for the random forest and you exclude it in your analysis you have to train a new model before you can use aiDIVA.
+If for some reason you choose to exclude one or more of the Annotation sources shown here you have to make sure to modify the source code accordingly. If an annotation source is used as a feature for the random forest and you exclude it in your analysis you have to train a new model before you can use Aidiva.
 
 
 ### CADD
@@ -280,7 +280,7 @@ https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/vcf/genomes
 (https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz)
 \
 \
-From the complete gnomAD database we are only interested in a few entries from the VCF files, therefor we suggest to prepare the VCF and remove the unnecessary stuff from the gnomAD annotation VCF. The following code snippet uses the `prepare_gnomAD_vcf.py` script to reduce it to a minimal set of INFO entries that we need for the aiDIVA annotations:
+From the complete gnomAD database we are only interested in a few entries from the VCF files, therefor we suggest to prepare the VCF and remove the unnecessary stuff from the gnomAD annotation VCF. The following code snippet uses the `prepare_gnomAD_vcf.py` script to reduce it to a minimal set of INFO entries that we need for the AIdiva annotations:
 
 ```
 wget -c https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz
@@ -311,7 +311,12 @@ tabix -p vcf grch38_gnomAD_genomes_r211.vcf.gz
 
 
 ### \[optional\] HGMD (needs license)
-The possibility to use HGMD in aiDIVA is optional due to the fact that you need a license for it. If you choose to include HGMD in the aiDIVA analysis you can use the public/professional version of HGMD.
+The possibility to use HGMD in AIdiva is optional due to the fact that you need a license for it. If you choose to include HGMD in the AIdiva analysis you can use the public/professional version of HGMD.
+
+
+### \[optional\] OMIM (needs license)
+The possibility to use OMIM in AIdiva is optional due to the fact that you need a license for it.
+**Currently not implemented!**
 
 
 ### Low Confidence Regions
diff --git a/scripts/generate_HPO_resources.py b/scripts/generate_HPO_resources.py
index b14ac29..94168f7 100644
--- a/scripts/generate_HPO_resources.py
+++ b/scripts/generate_HPO_resources.py
@@ -29,6 +29,7 @@ def generate_gene2hpo_dict(gene2phenotype_list, gene2hpo_dict):
         for line in rd:
             if line.startswith("#"):
                 pass
+
             else:
                 splitted_line = line.strip().split("\t")
                 # Format: HPO-id<tab>HPO label<tab>entrez-gene-id<tab>entrez-gene-symbol<tab>Additional Info from G-D source<tab>G-D source<tab>disease-ID for link
@@ -41,6 +42,7 @@ def generate_gene2hpo_dict(gene2phenotype_list, gene2hpo_dict):
 
     with open(gene2hpo_dict, "w") as gene2hpo_f:
         json.dump(gene_2_HPO, gene2hpo_f)
+
     logger.info(f"Gene to HPO mapping successfully generated and saved as {gene2hpo_dict}")
 
 
@@ -72,25 +74,31 @@ def create_hpo_graph(hpo_ontology, phenotype_hpoa, hpo_graph_file, hpo_replaceme
 
             elif line.startswith("is_a:"):
                 parents.append(line.strip().split("is_a: ")[1].split(" !")[0])
+
             elif line.startswith('is_obsolete:'):
                 obsolete =True
+
             elif line.startswith('replaced_by:'):
                 replacement = line.strip().split('replaced_by: ')[1]
                 obsolete =False
+
             elif line.startswith('alt_id:'):
                 alt = line.strip().split('alt_id: ')[1]
                 if hpo_term in alternatives.keys():
                     current_alternatives = alternatives[hpo_term]
                     current_alternatives.append(alt)
                     alternatives[hpo_term] = current_alternatives
+
                 else:
                     alternatives[hpo_term] = [alt]
+
             elif line.startswith('consider:'):
                 consider = line.strip().split('consider: ')[1]
                 if hpo_term in considerations.keys():
                     current_considerations = considerations[hpo_term]
                     current_considerations.append(consider)
                     considerations[hpo_term] = current_considerations
+
                 else:
                     considerations[hpo_term] = [consider]
 
@@ -99,7 +107,7 @@ def create_hpo_graph(hpo_ontology, phenotype_hpoa, hpo_graph_file, hpo_replaceme
             hpo_edges[hpo_term] = parents
             if replacement != "":
                 replacements[hpo_term] = replacement
-        
+
         hpo_edges["replacements"] = replacements
 
     logger.info("Generate HPO graph...")
@@ -116,17 +124,6 @@ def create_hpo_graph(hpo_ontology, phenotype_hpoa, hpo_graph_file, hpo_replaceme
         counts_dict[hpo_term] = count_value
         total_counts += count_value
 
-    #total_counts = 0
-    #with open(hpo_counts, "r") as count_file:
-    #    for line in count_file:
-    #        if line.startswith("HPO-ID"):
-    #            continue
-    #        splitted_line = line.strip().split("\t")
-    #        hpo_term = splitted_line[0]
-    #        count = int(splitted_line[1])
-    #        counts_dict[hpo_term] = count
-    #        total_counts += count
-
     # get replacements of obsolete nodes
     replacements = dict(hpo_edges.get("replacements", []))
 
@@ -139,40 +136,49 @@ def create_hpo_graph(hpo_ontology, phenotype_hpoa, hpo_graph_file, hpo_replaceme
     hpo_replacement_information = {"alternatives": alternatives, "considerations": considerations, "replacements": replacements}
 
     # TODO: remove support for networkx v1.x and get rid of the duplicated if-else conditions (networkx v2.x and v3.x use the same syntax for the graphs)
-
     for node in hpo_edges.keys():
         hpo_graph.add_node(node)
         if str(nx.__version__).startswith("1."):
             hpo_graph.node[node]["count"] = 0.0
+
         elif str(nx.__version__).startswith("2."):
             hpo_graph.nodes[node]["count"] = 0.0
+
         elif str(nx.__version__).startswith("3."):
             hpo_graph.nodes[node]["count"] = 0.0
+
         else:
             logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
         ancestors = [(x,node) for x in hpo_edges[node]]
         hpo_graph.add_edges_from(ancestors)
+
         if node in replacements.keys():
             if str(nx.__version__).startswith("1."):
                 hpo_graph.node[node]['replaced_by'] = replacements[node]
                 hpo_graph.node[node]['IC'] = -math.log(1.0 / total_counts)
+
             elif str(nx.__version__).startswith("2."):
                 hpo_graph.nodes[node]['replaced_by'] = replacements[node]
                 hpo_graph.nodes[node]['IC'] = -math.log(1.0 / total_counts)
+
             elif str(nx.__version__).startswith("3."):
                 hpo_graph.nodes[node]["replaced_by"] = replacements[node]
                 hpo_graph.nodes[node]["IC"] = -math.log(1.0 /total_counts)
+
             else:
                 logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
     for node in hpo_graph.nodes():
         if str(nx.__version__).startswith("1."):
             hpo_graph.node[node]["count"] = 0.0
+
         elif str(nx.__version__).startswith("2."):
             hpo_graph.nodes[node]["count"] = 0.0
+
         elif str(nx.__version__).startswith("3."):
             hpo_graph.nodes[node]["count"] = 0.0
+
         else:
             logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
@@ -180,53 +186,69 @@ def create_hpo_graph(hpo_ontology, phenotype_hpoa, hpo_graph_file, hpo_replaceme
     for node in counts_dict.keys():
         if str(nx.__version__).startswith("1."):
             hpo_graph.node[node]["count"] = counts_dict[node]
+
         elif str(nx.__version__).startswith("2."):
             hpo_graph.nodes[node]["count"] = counts_dict[node]
+
         elif str(nx.__version__).startswith("3."):
             hpo_graph.nodes[node]["count"] = counts_dict[node]
+
         else:
             logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
     hpo_graph.nodes(data="count")
-    # now fill it with the actual value.
 
+    # now fill it with the actual value.
     for node in hpo_edges.keys():
         descendants = nx.descendants(hpo_graph,node)
         if str(nx.__version__).startswith("1."):
             count = hpo_graph.node[node]["count"]
+
         elif str(nx.__version__).startswith("2."):
             count = hpo_graph.nodes[node]["count"]
+
         elif str(nx.__version__).startswith("3."):
             count = hpo_graph.nodes[node]["count"]
+
         else:
             logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
         for descendant in descendants:
             if str(nx.__version__).startswith("1."):
                 count += hpo_graph.node[descendant]["count"]
+
             elif str(nx.__version__).startswith("2."):
                 count += hpo_graph.nodes[descendant]["count"]
+
             elif str(nx.__version__).startswith("3."):
                 count += hpo_graph.nodes[descendant]["count"]
+
             else:
                 logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
         if count > 0 :
             if str(nx.__version__).startswith("1."):
                 hpo_graph.node[node]["IC"] =  -math.log(float(count) / total_counts)
+
             elif str(nx.__version__).startswith("2."):
                 hpo_graph.nodes[node]["IC"] =  -math.log(float(count) / total_counts)
+
             elif str(nx.__version__).startswith("3."):
                 hpo_graph.nodes[node]["IC"] =  -math.log(float(count) / total_counts)
+
             else:
                 logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
+
         else :
             if str(nx.__version__).startswith("1."):
                 hpo_graph.node[node]["IC"] = -math.log(1.0 / total_counts) # missing nodes, set as rare as possible
+
             elif str(nx.__version__).startswith("2."):
                 hpo_graph.nodes[node]["IC"] = -math.log(1.0 / total_counts) # missing nodes, set as rare as possible
+
             elif str(nx.__version__).startswith("3."):
                 hpo_graph.nodes[node]["IC"] = -math.log(1.0 / total_counts) # missing nodes, set as rare as possible
+
             else:
                 logger.error("There seems to be a problem with your installation of NetworkX, make sure that you have either v1, v2 or v3 installed!")
 
@@ -289,13 +311,16 @@ def create_gene2interacting_mapping(string_mapping, string_db_links, string_inte
             if ((protein_a in string2name.keys()) and (protein_b in string2name.keys())) and ((confidence_experimental >= 900) or (confidence_database >= 900)):
                 gene_name = string2name[protein_a]
                 interacting_gene = string2name[protein_b]
+
                 if gene_name in string_interaction_mapping.keys():
                     string_interaction_mapping[gene_name].append({"interacting_gene": interacting_gene, "confidence_experimental": confidence_experimental, "confidence_database": confidence_database})
+
                 else:
                     string_interaction_mapping[gene_name] = [{"interacting_gene": interacting_gene, "confidence_experimental": confidence_experimental, "confidence_database": confidence_database}]
     
     with open(string_interactions, "w") as string_interactions_f:
         json.dump(string_interaction_mapping, string_interactions_f)
+
     logger.info(f"Gene to interacting gene mapping successfully generated and saved as {string_interactions}")
 
 # this file can be obtained through ensembl biomart
@@ -308,6 +333,7 @@ def create_transcript_length_mapping(transcript_lengths, transcript_information)
         for line in transc:
             if line.startswith("Gene") or line.startswith("#"):
                 continue
+
             else:
                 # header:
                 # #Gene stable ID [TAB] Transcript stable ID [TAB] CDS Length [TAB] Transcript length (including UTRs and CDS) [TAB] Strand
@@ -320,11 +346,12 @@ def create_transcript_length_mapping(transcript_lengths, transcript_information)
     
     with open(transcript_lengths, "w") as transcript_lengths_f:
         json.dump(transcript_mapping, transcript_lengths_f)
+
     logger.info(f"Transcript to CDS length mapping successfully generated and saved as {transcript_lengths}")
 
 
 if __name__=="__main__":
-    parser = argparse.ArgumentParser("Script to generate the HPO resources needed in the prioritization step of AIdiva")
+    parser = argparse.ArgumentParser("Script to generate the HPO resources needed in the prioritization step of aiDIVA")
     parser.add_argument("--hpo_ontology", type=str, dest="hpo_ontology", metavar="hp.obo", required=False, help="File containing the HPO ontology\n")
     parser.add_argument("--gene_phenotype", type=str, dest="gene_phenotype", metavar="phenotype_to_genes.txt", required=False, help="File that contains information about the genes and the associated phenotypes\n")
     parser.add_argument("--gene_hpo", type=str, dest="gene_hpo", metavar="gene2hpo.json", required=False, help="File to save the generated gene2hpo_dict\n")
@@ -345,12 +372,12 @@ def create_transcript_length_mapping(transcript_lengths, transcript_information)
 
     if args.hpo_ontology and args.phenotype_hpoa and args.hpo_graph and args.hpo_replacements:
         create_hpo_graph(args.hpo_ontology, args.phenotype_hpoa, args.hpo_graph, args.hpo_replacements)
-    
+
     if args.hgnc_symbols and args.hgnc_gene:
         create_gene2hgnc_mapping(args.hgnc_symbols, args.hgnc_gene)
-    
+
     if args.string_mapping and args.string_links and args.gene_interacting:
         create_gene2interacting_mapping(args.string_mapping, args.string_links, args.gene_interacting)
-    
+
     if args.transcript_information and args.transcript_length_mapping:
         create_transcript_length_mapping(args.transcript_length_mapping, args.transcript_information)
diff --git a/scripts/prepare_CADD_vcf.py b/scripts/prepare_CADD_vcf.py
index d1c79e7..89aee71 100644
--- a/scripts/prepare_CADD_vcf.py
+++ b/scripts/prepare_CADD_vcf.py
@@ -9,14 +9,15 @@
     writer.write("##fileformat=VCFv4.1\n")
     writer.write("##INFO=<ID=CADD_PHRED,Number=1,Type=String,Description=\"CADD phred score\">\n")
     writer.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
-    
+
     for line in infile:
         if line.startswith("#"):
             continue
 
         split = line.replace("\n", "").split("\t")
-        
+
         if str(split[4]) != "":
             writer.write(f"{split[0]}\t{split[1]}\t.\t{split[2]}\t{split[3]}\t.\t.\tCADD_PHRED={str(split[5])}\n")
+
         else:
             print("WARNING: Skip missing CADD phred score!")
diff --git a/scripts/prepare_Capice_vcf.py b/scripts/prepare_Capice_vcf.py
index ca0f5c0..9254b1f 100644
--- a/scripts/prepare_Capice_vcf.py
+++ b/scripts/prepare_Capice_vcf.py
@@ -9,14 +9,15 @@
     writer.write("##fileformat=VCFv4.1\n")
     writer.write("##INFO=<ID=CAPICE,Number=1,Type=String,Description=\"Capice score\">\n")
     writer.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
-    
+
     for line in infile:
         if line.startswith("#"):
             continue
 
         split = line.replace("\n", "").split("\t")
-        
+
         if str(split[4]) != "":
             writer.write(f"{split[0]}\t{split[1]}\t.\t{split[2]}\t{split[3]}\t.\t.\tCAPICE={str(split[4])}\n")
+
         else:
             print("WARNING: Skip missing Capice score!")
diff --git a/scripts/prepare_ClinVar_vcf.py b/scripts/prepare_ClinVar_vcf.py
index 8bd4f90..9ca5478 100644
--- a/scripts/prepare_ClinVar_vcf.py
+++ b/scripts/prepare_ClinVar_vcf.py
@@ -9,7 +9,7 @@
     writer.write("##fileformat=VCFv4.1\n")
     writer.write("##INFO=<ID=DETAILS,Number=.,Type=String,Description=\"ClinVar disease/significance annotation\">\n")
     writer.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
-    
+
     for line in infile:
         if line.startswith("#"):
             continue
@@ -20,7 +20,7 @@
             continue
 
         info_fields = split[7].split(";")
-        
+
         disease_name = ""
         disease_name_incl = ""
         sig_info = ""
@@ -30,20 +30,26 @@
         for field in info_fields:
             if field.startswith("CLNDN="):
                 disease_name = field.split("=")[1]
+
             elif field.startswith("CLNDNINCL="):
                 disease_name_incl = field.split("=")[1]
+
             elif field.startswith("CLNSIG="):
                 sig_info = field.split("=")[1]
+
             elif field.startswith("CLNSIGCONF="):
                 sig_info_conf = field.split("=")[1]
+
             elif field.startswith("CLNSIGINCL="):
                 sig_info_incl = field.split("=")[1]
+
             else:
                 continue
-        
+
         # TODO: resolve conflicting interpretations of pathogenicity if possible and include them in the resulting file
 
         if str(sig_info) != "" and sig_info != "Conflicting_interpretations_of_pathogenicity":
             writer.write(f"{split[0]}\t{split[1]}\t{split[2]}\t{split[3]}\t{split[4]}\t{split[5]}\t{split[6]}\tDETAILS={str(sig_info)}%20{str(disease_name)}%3D{str(disease_name_incl)}\n")
+
         else:
             print("WARNING: Skip conflicting or empty significance entry!")
\ No newline at end of file
diff --git a/scripts/prepare_Condel_vcf.py b/scripts/prepare_Condel_vcf.py
index 55aa245..7ccf70f 100644
--- a/scripts/prepare_Condel_vcf.py
+++ b/scripts/prepare_Condel_vcf.py
@@ -16,5 +16,6 @@
     for row in reader:
         if str(row["CONDEL"]) != "":
             writer.write(f"{row['CHR']}\t{row['START']}\t.\t{row['REF']}\t{row['ALT']}\t.\t.\tEIGEN_PHRED={str(row['CONDEL'])}\n")
+
         else:
             print("WARNING: Skip missing Condel score!")
diff --git a/scripts/prepare_EigenPhred_vcf.py b/scripts/prepare_EigenPhred_vcf.py
index 46b7992..8cabd36 100644
--- a/scripts/prepare_EigenPhred_vcf.py
+++ b/scripts/prepare_EigenPhred_vcf.py
@@ -16,5 +16,6 @@
     for row in reader:
         if str(row["Eigen-phred"]) != "":
             writer.write(f"{row['chr']}\t{row['position']}\t.\t{row['ref']}\t{row['alt']}\t.\t.\tEIGEN_PHRED={str(row['Eigen-phred'])}\n")
+
         else:
             print("WARNING: Skip missing Eigen-phred score!")
diff --git a/scripts/prepare_FathmmXF_vcf.py b/scripts/prepare_FathmmXF_vcf.py
index 3cae935..604004a 100644
--- a/scripts/prepare_FathmmXF_vcf.py
+++ b/scripts/prepare_FathmmXF_vcf.py
@@ -16,5 +16,6 @@
         # skip missing scores to keep the annotation file small
         if str(row["FATHMM_XF"]) != "":
             writer.write(f"{row['#CHROM']}\t{row['POS']}\t.\t{row['REF']}\t{row['ALT']}\t.\t.\FATHMM_XF={str(row['FATHMM_XF'])}\n")
+
         else:
             print("WARNING: Skip missing FathmmXF score!")
\ No newline at end of file
diff --git a/scripts/prepare_MutationAssessor_vcf.py b/scripts/prepare_MutationAssessor_vcf.py
index 5e338e6..03ea219 100644
--- a/scripts/prepare_MutationAssessor_vcf.py
+++ b/scripts/prepare_MutationAssessor_vcf.py
@@ -17,5 +17,6 @@
             # skip missing scores to keep the annotation file small
             if str(row["FI score"]) != "":
                 writer.write(f"{mutation[1]}\t{mutation[2]}\t.\t{mutation[3]}\t{mutation[4]}\t.\t.\tMutationAssessor={str(row['FI score'])}\n")
+
             else:
                 print("WARNING: Skip missing MutationAssessor score!")
\ No newline at end of file
diff --git a/scripts/prepare_REVEL_vcf.py b/scripts/prepare_REVEL_vcf.py
index e96b471..4880e54 100644
--- a/scripts/prepare_REVEL_vcf.py
+++ b/scripts/prepare_REVEL_vcf.py
@@ -24,7 +24,9 @@
             grch37_writer.write(f"{row['chr']}\t{row['hg19_pos']}\t.\t{row['ref']}\t{row['alt']}\t.\t.\tREVEL={str(row['REVEL'])}\n")
             if row["grch38_pos"] != ".":
                 grch38_writer.write(f"{row['chr']}\t{row['grch38_pos']}\t.\t{row['ref']}\t{row['alt']}\t.\t.\tREVEL={str(row['REVEL'])}\n")
+
             else:
                 print("WARNING: Skip GRCh38 missing REVEL score!")
+
         else:
             print("WARNING: Skip missing REVEL score!")
diff --git a/scripts/prepare_RepeatMasker_bed.py b/scripts/prepare_RepeatMasker_bed.py
index 5ca95e1..3bfd8bd 100644
--- a/scripts/prepare_RepeatMasker_bed.py
+++ b/scripts/prepare_RepeatMasker_bed.py
@@ -7,7 +7,7 @@
 outfile = sys.argv[2]
 
 with gzip.open(infile, "rt") as infile, open(outfile, "w") as writer:
-    
+
     for line in infile:
         current_line = line.lstrip().replace("\n", "")
         current_line = re.sub(r"\s+", " ", current_line)
@@ -22,8 +22,9 @@
             continue
 
         split = current_line.split(" ")
-        
+
         if str(split[9]) != "" and str(split[10]) != "":
             writer.write(f"{split[4]}\t{str(int(split[5])-1)}\t{split[6]}\t{split[9]} ({split[10]})\n")
+
         else:
             print("WARNING: Skip missing repeat!")
\ No newline at end of file
diff --git a/scripts/prepare_dbscSNV_vcf.py b/scripts/prepare_dbscSNV_vcf.py
index 5f4ce67..60970f0 100644
--- a/scripts/prepare_dbscSNV_vcf.py
+++ b/scripts/prepare_dbscSNV_vcf.py
@@ -1,3 +1,4 @@
+## TODO: Delete file it is not needed anymore
 import csv
 import sys
 
@@ -26,7 +27,9 @@
                 writer_grch37.write(f"{row['chr']}\t{row['pos']}\t.\t{row['ref']}\t{row['alt']}\t.\t.\tADA_SCORE={str(row['ada_score'])};RF_SCORE={str(row['rf_score'])}\n")
                 if row["hg38_pos"] != ".":
                     writer_grch38.write(f"{row['hg38_chr']}\t{row['hg38_pos']}\t.\t{row['ref']}\t{row['alt']}\t.\t.\tADA_SCORE={str(row['ada_score'])};RF_SCORE={str(row['rf_score'])}\n")
+
                 else:
                     print("WARNING: Skip GRCh38 missing dbscSNV score!")
+
             else:
                 print("WARNING: Skip missing scores!")
\ No newline at end of file
diff --git a/scripts/prepare_gnomAD_vcf.py b/scripts/prepare_gnomAD_vcf.py
index 7cc4178..219a1a0 100644
--- a/scripts/prepare_gnomAD_vcf.py
+++ b/scripts/prepare_gnomAD_vcf.py
@@ -1,6 +1,4 @@
 import gzip
-from re import S
-from statistics import quantiles
 import sys
 
 
@@ -13,35 +11,42 @@
         if line.startswith("#"):
             if line.startswith("##fileformat="):
                 prepared_gnomad.write(line)
+
             elif line.startswith("##hailversion="):
                 prepared_gnomad.write(line)
+
             elif line.startswith("##INFO=<ID=AN,"):
                 prepared_gnomad.write(line)
+
             elif line.startswith("##INFO=AF,"):
                 prepared_gnomad.write(line)
+
             elif line.startswith("##INFO=<ID=nhomalt,"):
                 prepared_gnomad.write("##INFO=<ID=Hom,Number=A,Type=Integer,Description=\"Count of homozygous individuals in samples\">\n")
+
             elif line.startswith("##INFO=<ID=AFR_AF,"):
                 prepared_gnomad.write("##INFO=<ID=AFR_AF,Number=A,Type=Float,Description=\"Alternate allele frequency in AFR samples\">")
+
             elif line.startswith("##INFO=<ID=AMR_AF,"):
                 prepared_gnomad.write("##INFO=<ID=AMR_AF,Number=A,Type=Float,Description=\"Alternate allele frequency in AMR samples\">")
+
             elif line.startswith("##INFO=<ID=EAS_AF,"):
                 prepared_gnomad.write("##INFO=<ID=EAS_AF,Number=A,Type=Float,Description=\"Alternate allele frequency in EAS samples\">")
+
             elif line.startswith("##INFO=<ID=NFE_AF,"):
                 prepared_gnomad.write("##INFO=<ID=NFE_AF,Number=A,Type=Float,Description=\"Alternate allele frequency in NFE samples\">")
+
             elif line.startswith("##INFO=<ID=SAS_AF,"):
                 prepared_gnomad.write("##INFO=<ID=SAS_AF,Number=A,Type=Float,Description=\"Alternate allele frequency in SAS samples\">")
+
             elif line.startswith("##contig="):
                 prepared_gnomad.write(line)
+
             elif line.startswith("#CHROM"):
                 prepared_gnomad.write(line)
+
             else:
                 continue
-                
-                
-
-
-
 
         else:
             splitted_line = line.replace("\n", "").split("\t")
@@ -59,20 +64,28 @@
             for entry in info_fields:
                 if entry.startswith("AN="):
                     an_entry = entry.split("=")[1]
+
                 elif entry.startswith("AF="):
                     af_entry = entry.split("=")[1]
+
                 elif entry.startswith("nhomalt="):
                     hom_entry = entry.split("=")[1]
+
                 elif entry.startswith("AFR_AF="):
                     afr_af_entry = entry.split("=")[1]
+
                 elif entry.startswith("AMR_AF="):
                     amr_af_entry = entry.split("=")[1]
+
                 elif entry.startswith("EAS_AF="):
                     eas_af_entry = entry.split("=")[1]
+
                 elif entry.startswith("NFE_AF="):
                     nfe_af_entry = entry.split("=")[1]
+
                 elif entry.startswith("SAS_AF="):
                     sas_af_entry = entry.split("=")[1]
+
                 else:
                     continue
             
@@ -87,5 +100,6 @@
 
             if str(an_entry) != "" and str(af_entry) != "" and str(hom_entry) != "":
                 prepared_gnomad.write(f"{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filt}\t{info}\n")
+
             else:
                 print("WARNING: Skip missing gnomAD entry!")