In [1]:
## Tree visualization with marked nodes

import toytree
import re
import toyplot.pdf
import toyplot.png
import toyplot.svg

# Load the tree from the file
tree_url = "tree.tre"
tree = toytree.tree(tree_url)

# Find tips that match 'outgroup'
matching_tips = [name for name in tree.get_tip_labels() if 'outgroup' in name]

# Root the tree using the MRCA of the matched tips
if matching_tips:
    # Get the MRCA of the matched tips
    mrca = tree.get_mrca_node(*matching_tips)
    rooted_tree = tree.root(mrca)

# Ladderize the tree
ladderized_tree = rooted_tree.ladderize()

# Create a node size list for all nodes (internal and terminal)
node_sizes = [0 for _ in range(ladderized_tree.nnodes)]  # Initialize with size 0

# Create a node marker list
node_markers = ['' for _ in range(ladderized_tree.nnodes)]  # Initialize empty list

# Create a node color list
node_colors = ['black' for _ in range(ladderized_tree.nnodes)]  # Default color is black

# Traverse all internal nodes and assign sizes, colors, and markers based on support values
for node in ladderized_tree.treenode.traverse():
    if node.is_leaf():
        continue  # Skip terminal nodes
    # Get the support value
    support_value = node.support
    if support_value is not None:
        if support_value >= 99:
            node_sizes[node.idx] = 12  # Large circle for support >= 90
            node_markers[node.idx] = 'o'  # Circle marker
            node_colors[node.idx] = "black"  # Set color to black
        elif 70 <= support_value < 99:
            node_sizes[node.idx] = 12  # Medium square for support >= 70 and < 90
            node_markers[node.idx] = 's'  # Square marker
            node_colors[node.idx] = "gray"  # Set color to gray
        elif 60 <= support_value < 70:
            node_sizes[node.idx] = 12  # Small diamond for support >= 60 and < 70
            node_markers[node.idx] = 'd'  # Diamond marker
            node_colors[node.idx] = "white"  # Set color to red
        elif 50 <= support_value < 60:
            node_sizes[node.idx] = 12  # Small diamond for support >= 60 and < 70
            node_markers[node.idx] = 'v'  # Diamond marker
            node_colors[node.idx] = "white"  # Set color to red
        else:
            continue  # Skip supports less than 60


######## To transform the tree in file

# Draw the tree with custom tip labels, node sizes, markers, and colors
canvas, axes, mark = ladderized_tree.draw(
    width=1200,
    height=1200,
    tip_labels_align=False,
    tip_labels_style={  # Font style for tip labels
        "fill": "#262626",          # Text color
        "font-size": "18px",        # Font size in pixels
        "-toyplot-anchor-shift": "15px",  # Adjust label position
    },
    node_labels=None,          # Set to None to hide support values
    node_sizes=node_sizes,     # Set node sizes: 0 for support < 60, adjusted sizes for others
    node_markers=node_markers, # Set the marker types (circle, square, diamond)
    node_colors=node_colors,   # Set node colors (black, gray, red)
    edge_style={
        "stroke": "black", 
        "stroke-width": 1,
    },
)

# Save the canvas to different formats
#toyplot.pdf.render(canvas, "tree_visualization.pdf")
#toyplot.png.render(canvas, "tree_visualization.png")
toyplot.svg.render(canvas, "out/tree_markers.svg")


In [2]:
# need internet connection
# #is bolding

import re
import xml.etree.ElementTree as ET
import os
import requests

# Define SVG namespace
SVG_NS = "http://www.w3.org/2000/svg"
ET.register_namespace('', SVG_NS)  # Ensure correct namespace handling

# Genus validation function
def check_genus(genus_name):
    url = f"https://www.mycoportal.org/fdex/query.php?qText={genus_name}&qField=taxon"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return genus_name.lower() in response.text.lower()
        else:
            print(f"Error accessing MyCoPortal: Status {response.status_code}")
            return False
    except Exception as e:
        print(f"Exception occurred: {e}")
        return False

# Patterns to detect genus/species and 'type' words
type_pattern = re.compile(r'\b(\w*type\w*)\b', re.IGNORECASE)
genus_pattern = re.compile(
    r'(?P<genus>\b[A-Z][a-z]{3,})\s+'  
    r'(?:(?P<qualifier>sp|cf|aff)\.?)?\s*'
    r'(?P<species>[A-Za-z-]+)?\s*'
    r'(?P<voucher>[A-Z][\w]*\s*\d+.*)?'
)

def italicize_genus_species(svg_file, output_file):
    tree = ET.parse(svg_file)
    root = tree.getroot()
    genus_cache = {}

    for text_elem in root.findall(f'.//{{{SVG_NS}}}text'):
        text = ''.join(text_elem.itertext()).strip()
        adjusted_text = text.replace('_', ' ')

        x_attr = text_elem.get('x')
        y_attr = text_elem.get('y')

        text_elem.clear()
        if x_attr:
            text_elem.set('x', x_attr)
        if y_attr:
            text_elem.set('y', y_attr)

        last_index = 0

        # Process genus/species with italics and 'type' words with bold
        process_text_elements(text_elem, adjusted_text, genus_cache)

    tree.write(output_file, encoding='utf-8', xml_declaration=True)

def process_text_elements(text_elem, text, genus_cache):
    """Handles both italics for genus/species and bold for 'type' words."""
    matches = list(genus_pattern.finditer(text))
    last_index = 0

    for match in matches:
        genus, qualifier, species, voucher = (
            match.group('genus'), match.group('qualifier'),
            match.group('species'), match.group('voucher')
        )

        if genus not in genus_cache:
            genus_cache[genus] = check_genus(genus)

        # Add plain text before the match
        if match.start() > last_index:
            plain_text = text[last_index:match.start()]
            process_type_words(text_elem, plain_text)  # Handle 'type' words

        # Italicize genus if valid
        if genus_cache[genus]:
            italic_genus = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan', attrib={'style': 'font-style:italic'})
            italic_genus.text = genus + ' '
        else:
            plain_genus = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan')
            plain_genus.text = genus + ' '

        # Add qualifier if present
        if qualifier:
            qualifier_tspan = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan')
            qualifier_tspan.text = f"{qualifier}. "

            if qualifier.lower() == 'sp':
                species = None  # Skip species if "sp." is present

        # Italicize species
        if species:
            italic_species = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan', attrib={'style': 'font-style:italic'})
            italic_species.text = species + ' '

        # Add voucher as plain text
        if voucher:
            voucher_tspan = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan')
            voucher_tspan.text = voucher + ' '

        last_index = match.end()

    # Process remaining text
    if last_index < len(text):
        process_type_words(text_elem, text[last_index:])

def process_type_words(text_elem, text):
    """Bold words containing 'type'."""
    matches = list(type_pattern.finditer(text))
    last_index = 0

    for match in matches:
        if match.start() > last_index:
            # Add plain text before the match
            tspan_before = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan')
            tspan_before.text = text[last_index:match.start()]

        # Add bold 'type' word
        bold_tspan = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan', attrib={'style': 'font-weight:bold'})
        bold_tspan.text = match.group(1)

        last_index = match.end()

    if last_index < len(text):
        # Add remaining plain text
        tspan_after = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan')
        tspan_after.text = text[last_index:]

def process_svg_folder(input_folder):
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.svg'):
            input_file = os.path.join(input_folder, file_name)
            base_name = os.path.splitext(file_name)[0]
            output_file = os.path.join(input_folder, f"{base_name}_output.svg")

            print(f"Processing file: {input_file}")
            italicize_genus_species(input_file, output_file)
            print(f"Output saved to: {output_file}")

# Example usage
svg_input_folder = 'out/'  # Adjust path as needed
process_svg_folder(svg_input_folder)


Processing file: out/tree_markers.svg
Output saved to: out/tree_markers_output.svg


In [3]:
## Tree visualization with support values
##
import html  # To escape XML-invalid characters
import toytree
import re
import toyplot.pdf
import toyplot.png
import toyplot.svg

# Load the tree from the URL
tree_url = "tree.tre"
tree = toytree.tree(tree_url)

# Find tips that match 'outgroup'
matching_tips = [name for name in tree.get_tip_labels() if 'militaris' in name]

# Root the tree using the MRCA of the matched tips
if matching_tips:
    # Get the MRCA of the matched tips
    mrca = tree.get_mrca_node(*matching_tips)
    rooted_tree = tree.root(mrca)

# Ladderize the tree
ladderized_tree = rooted_tree.ladderize()

# Create a node label list for all nodes (internal and terminal)
node_labels = ['' for _ in range(ladderized_tree.nnodes)]

# Create a node size list for all nodes (internal and terminal)
node_sizes = [0 for _ in range(ladderized_tree.nnodes)]  # Initialize with size 0

# Create a node marker list (alternatives to circles)
# Use "s" for squares, "t" for triangles, "d" for diamonds, etc.
node_markers = ['' for _ in range(ladderized_tree.nnodes)]  # Initialize empty

# Create a node color list
node_colors = ['black' for _ in range(ladderized_tree.nnodes)]  # Default color is black

# Traverse all internal nodes and add support labels and sizes where support >= 60
for node in ladderized_tree.treenode.traverse():
    if node.is_leaf():
        continue  # Skip terminal nodes
    # Get the support value
    support_value = node.support
    # Only label and add a marker if support is 60 or higher
    if support_value is not None and support_value >= 50:
        # Convert the support value to an integer to remove the decimal part
        node_labels[node.idx] = str(int(support_value))  # Format support as an integer
        node_sizes[node.idx] = 15  # Set a size for markers where support >= 60
        node_markers[node.idx] = "s"  # Use squares for nodes with support >= 60

# Draw the tree with custom tip labels, node sizes, markers, and colors
canvas, axes, mark = ladderized_tree.draw(
    width=2000,
    height=5000,
    tip_labels_align=False,
    tip_labels_style={  # Font style for tip labels
        "fill": "#262626",          # Text color
        "font-size": "18px",        # Font size in pixels
        "-toyplot-anchor-shift": "15px",  # Adjust label position
    },
    node_labels=node_labels,          # Set to None to hide support values
    node_labels_style={  # Font style for tip labels
        "fill": "#262626",          # Text color
        "font-size": "15px",        # Font size in pixels
    },
    node_sizes=None,     # Set node sizes: 0 for support < 60, adjusted sizes for others
    node_markers=node_markers, # Set the marker types (circle, square, diamond)
    node_colors=node_colors,   # Set node colors (black, gray, red)
    edge_style={
        "stroke": "black", 
        "stroke-width": 1,
    },
)

# Save the canvas to different formats
#toyplot.pdf.render(canvas, "tree_visualization.pdf")
#toyplot.png.render(canvas, "tree_visualization.png")
toyplot.svg.render(canvas, "out/tree_supportvalue.svg")

In [4]:
#Use this code to italicize if no internet is available
# no internet connection is needed
# is bolding the "types" too
#

import re
import xml.etree.ElementTree as ET
import os

# Define SVG namespace
SVG_NS = "http://www.w3.org/2000/svg"
ET.register_namespace('', SVG_NS)  # Ensure correct namespace handling

# Regex for genus/species names (excluding words with "type")
pattern = re.compile(
    r'\b(?!\w*type\b)'  # Exclude words containing "type"
    r'(?P<genus>[A-Z][a-z]{3,})\s+'  # Genus with an uppercase letter + lowercase letters
    r'(?:(?P<qualifier>sp|cf|aff)\.?)?\s*'  # Optional qualifier (sp., cf., aff.)
    r'(?P<species>[\w-]+)?\s*'  # Optional species name
    r'(?P<voucher>(?:[A-Z][\w]*\s*\d+.*)?)'  # Voucher with uppercase + number
)

# Regex to detect any word with "type" (case-insensitive)
type_pattern = re.compile(r'\b(\w*type\w*)\b', re.IGNORECASE)

def italicize_genus_and_bold_types(svg_file, output_file):
    tree = ET.parse(svg_file)  # Parse the SVG file
    root = tree.getroot()

    # Iterate through all text elements in the SVG
    for text_elem in root.findall(f'.//{{{SVG_NS}}}text'):
        text = ''.join(text_elem.itertext()).strip()
        adjusted_text = text.replace('_', ' ')  # Replace underscores with spaces

        # Search for genus/species matches (ignoring "type" words)
        matches = list(pattern.finditer(adjusted_text))
        if not matches:
            process_type_words(text_elem, adjusted_text)  # Handle only "type" words
            continue

        # Save x and y attributes
        x_attr = text_elem.get('x')
        y_attr = text_elem.get('y')

        # Clear the original text element
        text_elem.clear()
        if x_attr: text_elem.set('x', x_attr)
        if y_attr: text_elem.set('y', y_attr)

        last_index = 0  # Track the last match's end position

        # Process genus/species matches and interspersed text
        for match in matches:
            if match.start() > last_index:
                process_type_words(text_elem, adjusted_text[last_index:match.start()])

            genus = match.group('genus')
            qualifier = match.group('qualifier')
            species = match.group('species')
            voucher = match.group('voucher')

            # Italicize genus
            add_tspan(text_elem, genus + ' ', italic=True)

            # Add qualifier if present
            if qualifier:
                add_tspan(text_elem, f"{qualifier}. ")
                if qualifier.lower() == 'sp':
                    species = None  # Skip species if "sp." is found

            # Italicize species if present
            if species:
                add_tspan(text_elem, species + ' ', italic=True)

            # Add voucher if present
            if voucher:
                add_tspan(text_elem, voucher + ' ')

            last_index = match.end()  # Update last processed index

        # Handle remaining text after the last match
        if last_index < len(adjusted_text):
            process_type_words(text_elem, adjusted_text[last_index:])

    # Save the modified SVG
    tree.write(output_file, encoding='utf-8', xml_declaration=True)

def process_type_words(text_elem, text):
    """Bold any word containing 'type' (case-insensitive) within a text."""
    last_pos = 0

    for match in type_pattern.finditer(text):
        if match.start() > last_pos:
            add_tspan(text_elem, text[last_pos:match.start()])

        # Bold the "type" word
        add_tspan(text_elem, match.group(1), bold=True)
        last_pos = match.end()

    if last_pos < len(text):
        add_tspan(text_elem, text[last_pos:])

def add_tspan(text_elem, content, italic=False, bold=False):
    """Create a <tspan> element with appropriate styles."""
    style = ""
    if italic:
        style += "font-style:italic; "
    if bold:
        style += "font-weight:bold; "

    tspan = ET.SubElement(text_elem, f'{{{SVG_NS}}}tspan', attrib={'style': style.strip()})
    tspan.text = content

def process_svg_folder(input_folder):
    """Process all SVG files in a folder."""
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.svg'):
            input_file = os.path.join(input_folder, file_name)
            output_file = os.path.join(input_folder, f"{os.path.splitext(file_name)[0]}_output.svg")

            print(f"Processing: {input_file}")
            italicize_genus_and_bold_types(input_file, output_file)
            print(f"Saved: {output_file}")

# Example usage: Replace with the appropriate folder path
svg_input_folder = 'out/'  # Adjust as needed
process_svg_folder(svg_input_folder)


Processing: out/tree_markers.svg
Saved: out/tree_markers_output.svg
