In [8]:
import pandas as pd
import distinctipy
import configs.itol_text as itol_text
import os

def get_info_list(df, col, id_field):
    info_list = [(df.at[i, id_field], df.at[i, col]) if pd.notna(df.at[i, col]) else (df.at[i, id_field], "None") for i in df.index]
    return info_list


def get_colour_dict(df, col):
    df = df.fillna("None")

    unique_values = df[col].unique()

    colour_set = [
        distinctipy.get_hex(x) for x in distinctipy.get_colors(len(unique_values))
    ]

    colour_dict = {
        value: colour_set[i % len(colour_set)] for i, value in enumerate(unique_values)
    }
    
    
    # If 'None' exists, assign it white color
    if "None" in colour_dict:
        colour_dict["None"] = "#FFFFFF"     

    return colour_dict


def generate_itol_colorstrip(col, colour_dict, info_list, output_filename):
    with open(output_filename, "w") as f:
        f.write(itol_text.dataset_colorstrip_text.replace("<custom_dataset_label>", col))
        for info, label in info_list:
            f.write(f"{info},{colour_dict[label]},{label} \n")

    print(f"File '{output_filename}' has been created.\n")


def generate_itol_ranges(col, colour_dict, info_list, output_filename):

    with open(output_filename, "w") as f:
        f.write(itol_text.dataset_ranges_text.replace("<custom_dataset_label>", col))
        for info, label in info_list:
            f.write(
                f"{info},{info},{colour_dict[label]},{colour_dict[label]},{colour_dict[label]},dashed,2,{label},black,italic\n"
            )

    print(f"File '{output_filename}' has been created.\n")


def generate_itol_text(col, info_list, output_filename):
    
    with open(output_filename, "w") as f:
        f.write(itol_text.dataset_text_text.replace("<custom_dataset_label>", col))
        for info, label in info_list:
            if label != "None":
                
                # Set ancestors / leaf labels differently
                if info[0] == "N" and info[1:].isdigit():
                    position = 1  # Set position to 1
                else:
                    position = -1  # Set position to -1
                f.write(f"{info},{label},{position},#0000ff,bold-italic,1,0 \n")

    print(f"File '{output_filename}' has been created.\n")



In [9]:
df = pd.read_csv("./kari_example.csv") # Dataframe with annotations
annotation_cols = [x for x in df.columns] # Columns you want to generate itol annotations for
outpath = "itol_output" # Folder to write out itol annotations
id_field = 'truncated_info' # ID field (will be skipped when making annotations)

df

Unnamed: 0,truncated_info,xref_orthodb,xref_supfam,lineage_superkingdom,lineage_phylum,value
0,tr|A0A2R6AP45|A0A2R6AP45_9ARCH,,SSF48179;SSF51735;,Archaea,Candidatus Marsarchaeota,0.23
1,tr|A0A7C5ICI9|A0A7C5ICI9_9ARCH,,SSF48179;SSF51735;,Archaea,Nitrososphaerota,0.24
2,tr|A0A6B2C402|A0A6B2C402_9ARCH,,SSF48179;SSF51735;,Archaea,Nitrososphaerota,
3,tr|A0A151BJ71|A0A151BJ71_9ARCH,,SSF48179;SSF51735;,Archaea,Candidatus Bathyarchaeota,
4,tr|A0A511RIY9|A0A511RIY9_9DEIN,9804088at2;,SSF48179;SSF51735;,Bacteria,Deinococcota,
5,tr|D3PT81|D3PT81_MEIRD,9804088at2;,SSF48179;SSF51735;,Bacteria,Deinococcota,
6,tr|A0A0K9HJH1|A0A0K9HJH1_GEOSE,9804088at2;,SSF48179;SSF51735;,Bacteria,Bacillota,
7,tr|F5L972|F5L972_CALTT,9804088at2;,SSF48179;SSF51735;,Bacteria,Bacillota,
8,tr|A0A5T0UG45|A0A5T0UG45_CAMJU,,SSF48179;SSF51735;,Bacteria,Campylobacterota,
9,N0,,,,,


In [15]:
if not os.path.exists(outpath):
    os.makedirs(outpath)

for col in annotation_cols:

    # Skip the info column, which won't be informative
    if col != id_field and col in df:


        colour_dict = get_colour_dict(df, col)
        
        info_list = get_info_list(df, col, id_field)
        
#         print (colour_dict)
#         print (info_list)

        # Can overwrite colour_dict here and pass custom colours.

        
        generate_itol_colorstrip(
            col, colour_dict, info_list,  f"{outpath}/{col}_itol_colorstrip.txt"
        )
        
        generate_itol_ranges(
            col, colour_dict, info_list, f"{outpath}/{col}_itol_ranges.txt"
        )

text_cols = ['value']

for text_col in text_cols:
    
    info_list = get_info_list(df, text_col, id_field)

        
    generate_itol_text(
        text_col, info_list, f"{outpath}/{text_col}_itol_text.txt"
    )
        


{'None': '#FFFFFF', '9804088at2;': '#f100f3'}
[('tr|A0A2R6AP45|A0A2R6AP45_9ARCH', 'None'), ('tr|A0A7C5ICI9|A0A7C5ICI9_9ARCH', 'None'), ('tr|A0A6B2C402|A0A6B2C402_9ARCH', 'None'), ('tr|A0A151BJ71|A0A151BJ71_9ARCH', 'None'), ('tr|A0A511RIY9|A0A511RIY9_9DEIN', '9804088at2;'), ('tr|D3PT81|D3PT81_MEIRD', '9804088at2;'), ('tr|A0A0K9HJH1|A0A0K9HJH1_GEOSE', '9804088at2;'), ('tr|F5L972|F5L972_CALTT', '9804088at2;'), ('tr|A0A5T0UG45|A0A5T0UG45_CAMJU', 'None'), ('N0', 'None'), ('N1', 'None'), ('N2', 'None'), ('N3', 'None'), ('N4', 'None'), ('N5', 'None'), ('N6', 'None'), ('N7', 'None')]

File 'itol_output/xref_orthodb_itol_colorstrip.txt' has been created.

File 'itol_output/xref_orthodb_itol_ranges.txt' has been created.

{'SSF48179;SSF51735;': '#00ff00', 'None': '#FFFFFF'}
[('tr|A0A2R6AP45|A0A2R6AP45_9ARCH', 'SSF48179;SSF51735;'), ('tr|A0A7C5ICI9|A0A7C5ICI9_9ARCH', 'SSF48179;SSF51735;'), ('tr|A0A6B2C402|A0A6B2C402_9ARCH', 'SSF48179;SSF51735;'), ('tr|A0A151BJ71|A0A151BJ71_9ARCH', 'SSF48179;SSF51