In [15]:
import pandas as pd
import distinctipy
import scripts.itol_text as itol_text
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

def get_info_list(df, col, id_field):
    info_list = [(df.at[i, id_field], df.at[i, col]) if pd.notna(df.at[i, col]) else (df.at[i, id_field], "None") for i in df.index]
    return info_list


# def get_colour_dict(df, col):
#     df = df.fillna("None")

#     unique_values = df[col].unique()

#     colour_set = [
#         distinctipy.get_hex(x) for x in distinctipy.get_colors(len(unique_values))
#     ]

#     colour_dict = {
#         value: colour_set[i % len(colour_set)] for i, value in enumerate(unique_values)
#     }
    
    
#     # If 'None' exists, assign it white color
#     if "None" in colour_dict:
#         colour_dict["None"] = "#FFFFFF"     

#     return colour_dict
        

def generate_itol_colorstrip(col, colour_dict, info_list, output_filename):
    with open(output_filename, "w") as f:
        f.write(itol_text.dataset_colorstrip_text.replace("<custom_dataset_label>", col))
        for info, label in info_list:
            f.write(f"{info},{colour_dict[label]},{label} \n")

    print(f"File '{output_filename}' has been created.\n")


def generate_itol_ranges(col, colour_dict, info_list, output_filename):

    with open(output_filename, "w") as f:
        f.write(itol_text.dataset_ranges_text.replace("<custom_dataset_label>", col))
        for info, label in info_list:
            f.write(
                f"{info},{info},{colour_dict[label]},{colour_dict[label]},{colour_dict[label]},dashed,2,{label},black,italic\n"
            )

    print(f"File '{output_filename}' has been created.\n")


def generate_itol_text(col, info_list, output_filename):
    
    with open(output_filename, "w") as f:
        f.write(itol_text.dataset_text_text.replace("<custom_dataset_label>", col))
        for info, label in info_list:
            if label != "None":
                
                # Set ancestors / leaf labels differently
                if info[0] == "N" and info[1:].isdigit():
                    position = 1  # Set position to 1
                else:
                    position = -1  # Set position to -1
                f.write(f"{info},{label},{position},#0000ff,bold-italic,1,0 \n")

    print(f"File '{output_filename}' has been created.\n")



### Adjust values here for each different dataframe

In [34]:
# # dhad dataframes
df = pd.read_csv("../../../parsed_outputs/dhad_highest_formatted.csv", names=['ID', 'Temp']) # Dataframe with annotations
outpath = "itol_output_dhad_high" # Folder to write out itol annotations

# df = pd.read_csv("../../../parsed_outputs/dhad_lowest_formatted.csv", names=['ID', 'Temp']) # Dataframe with annotations
# outpath = "itol_output_dhad_low" # Folder to write out itol annotations



# als dataframes
# df = pd.read_csv("../../../parsed_outputs/als_highest.csv", names=['ID', 'Temp'])
# outpath = "itol_output_als_high"

# df = pd.read_csv("../../../parsed_outputs/als_lowest.csv", names=['ID', 'Temp'])
# outpath = "itol_output_als_low" 



# kari dataframes
# df = pd.read_csv("../../../parsed_outputs/kari_highest.csv", names=['ID', 'Temp'])
# outpath = "itol_output_kari_high"

# df = pd.read_csv("../../../parsed_outputs/kari_lowest.csv", names=['ID', 'Temp'])
# outpath = "itol_output_kari_low"



annotation_cols = [x for x in df.columns] # Columns you want to generate itol annotations for
id_field = 'ID' # ID field (will be skipped when making annotations)
df

Unnamed: 0,ID,Temp
0,sp_C1DFH7_ILVC_AZOVD,7.212238
1,tr_L0E0X7_L0E0X7_THIND,7.212238
2,tr_A0A0C1KLA6_A0A0C1KLA6_9PSED,23.0
3,tr_A0A0B6WSC0_A0A0B6WSC0_9BACT,1.0
4,tr_A0A258V9Q8_A0A258V9Q8_9SPHN,4.216697
5,tr_A0A8J7MZV9_A0A8J7MZV9_9PROT,24.69625
6,tr_A0A2S6RJP7_A0A2S6RJP7_9PROT,4.216697
7,tr_A0A7V8IQ16_A0A7V8IQ16_9PROT,5.791448
8,tr_A0A2D6ZRJ2_A0A2D6ZRJ2_9PROT,6.2402
9,tr_A0A2A4VAT5_A0A2A4VAT5_9PROT,14.11885


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      45 non-null     object 
 1   Temp    45 non-null     float64
dtypes: float64(1), object(1)
memory usage: 852.0+ bytes


In [36]:
if not os.path.exists(outpath):
    os.makedirs(outpath)

for col in annotation_cols:

    # Skip the info column, which won't be informative
    if col != id_field and col in df:

        # not needed because we are using custom colour dictionary
        # colour_dict = get_colour_dict(df, col)
        
        info_list = get_info_list(df, col, id_field)

        unique_values = df[col].unique()


        # Can overwrite colour_dict here and pass custom colours.
        colour_dict = {}
        for value in unique_values:
            if -3 <= value < 0:
                colour_dict[value] = "#03045E"
            elif 0 <= value < 5:
                colour_dict[value] = "#023E8A"
            elif 5 <= value < 10:
                colour_dict[value] = "#0077B6"
            elif 10 <= value < 15:
                colour_dict[value] = "#0096C7"
            elif 15 <= value < 20:
                colour_dict[value] = "#00B4D8"
            elif 20 <= value < 25:
                colour_dict[value] = "#48CAE4"
            elif 25 <= value < 30:
                colour_dict[value] = "#90E0EF"
            elif 30 <= value < 36:
                colour_dict[value] = "#ADE8F4"
            elif 36 <= value < 60:
                colour_dict[value] = "#CAF0F8"

            #then consider ones in the high range
            elif 60 <= value < 70:
                colour_dict[value] = "#FEF001"
            elif 70 <= value < 71:
                colour_dict[value] = "#FFCE03"
            elif 71 <= value < 75:
                colour_dict[value] = "#FD9A01"
            elif 75 <= value < 80:
                colour_dict[value] = "#FD6104"
            elif 80 <= value < 81:
                colour_dict[value] = "#FF2C05"
            elif 81 <= value < 82:
                colour_dict[value] = "#F00505"
            elif 82 <= value:
                colour_dict[value] = "#B80000"
        

        generate_itol_colorstrip(
            col, colour_dict, info_list,  f"{outpath}/{col}_itol_colorstrip.txt"
        )
        
        generate_itol_ranges(
            col, colour_dict, info_list, f"{outpath}/{col}_itol_ranges.txt"
        )

text_cols = ['Temp']

for text_col in text_cols:
    
    info_list = get_info_list(df, text_col, id_field)

        
    generate_itol_text(
        text_col, info_list, f"{outpath}/{text_col}_itol_text.txt"
    )
        


File 'itol_output_kari_low/Temp_itol_colorstrip.txt' has been created.

File 'itol_output_kari_low/Temp_itol_ranges.txt' has been created.

File 'itol_output_kari_low/Temp_itol_text.txt' has been created.

