In [None]:
import subprocess
import os

def reroot_tree(input_tree, output_tree, outgroups):
    """Reroot the tree using multiple outgroup strains."""
    try:
        # Create the base command
        cmd = ["gotree", "reroot", "outgroup"]
        
        # Add all outgroups by unpacking the list
        cmd.extend(outgroups)
        
        # Add the input and output files
        cmd.extend(["-i", input_tree, "-o", output_tree])
        
        # Run the command
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"Gotree output:\n{result.stdout}")
        
    except subprocess.CalledProcessError as e:
        print(f"Error rerooting tree: {e.stderr}")
    except Exception as ex:
        print(f"An unexpected error occurred: {ex}")

def generate_kaptive_tips(path_klebsiella, output_file):
    """Generate the tips file for PastML using Kaptive output."""
    try:
        with open(output_file, "w") as outfile:
            for specie in os.listdir(path_klebsiella):
                if specie.startswith("k") and os.path.isdir(f"{path_klebsiella}/{specie}"):
                    for rep in os.listdir(f"{path_klebsiella}/{specie}/refseq/bacteria"):
                        kaptive_out_file = f"{path_klebsiella}/{specie}/refseq/bacteria/{rep}/{rep}_Kaptive_out.txt"
                        if os.path.exists(kaptive_out_file):
                            kaptive_out = open(kaptive_out_file).read()
                            first_line = kaptive_out.split("\n")[0].split("\t")
                            second_line = kaptive_out.split("\n")[1].split("\t")
                            kaptive_dic = dict(zip(first_line, second_line))
                            if kaptive_dic["Match confidence"] not in ["None", "Low"]:
                                outfile.write(f"{rep},{kaptive_dic['Best match locus']}\n")
        print(f"Kaptive tips file written to {output_file}")
        
    except Exception as e:
        print(f"Error generating kaptive tips: {e}")

def run_pastml(tree_file, data_file, html_output, html_compressed, threads):
    """Run PastML analysis."""
    try:
        cmd = [
            "pastml",
            "--tree", tree_file,
            "--data", data_file,
            "--columns", "K-type",
            "--html", html_output,
            "--html_compressed", html_compressed,
            "--data_sep", ",",
            "-v",
            "--threads", str(threads),
            "--prediction_method", "MPPA",
            "-m", "F81"
        ]
        
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"PastML output:\n{result.stdout}")
        
    except subprocess.CalledProcessError as e:
        print(f"Error running PastML: {e.stderr}")
    except Exception as ex:
        print(f"An unexpected error occurred: {ex}")
        

def main():
    """Main function to execute the reroot, kaptive tips generation, and PastML pipeline."""
    # Define input/output paths
    input_tree = "/home/conchae/prediction_depolymerase_tropism/iqtree_local/tree_files/Klensiella_genomes_fixed.2.1.treefile"
    output_tree = "/home/conchae/prediction_depolymerase_tropism/iqtree_local/tree_files/Klensiella_genomes_fixed.2.1.africana_rooted.treefile"
    outgroups = ["GCF_016804125.1", "GCF_020526085.1"]
    
    path_klebsiella = "/home/conchae/prediction_depolymerase_tropism"
    output_tips_file = f"{path_klebsiella}/pastML/pastML_KL_tips.comma.txt"
    
    tree_file = output_tree
    data_file = output_tips_file
    html_output = f"{path_klebsiella}/pastML/k_genomes.real_tree.pastML.html"
    html_compressed = f"{path_klebsiella}/pastML/k_genomes.real_tree.pastML.comp.html"
    threads = 30

    # Step 1: Reroot the tree
    print("Starting tree rerooting...")
    reroot_tree(input_tree, output_tree, outgroups)

    # Step 2: Generate Kaptive tips file for PastML
    print("Generating Kaptive tips file...")
    generate_kaptive_tips(path_klebsiella, output_tips_file)

    # Step 3: Run PastML
    print("Running PastML analysis...")
    run_pastml(tree_file, data_file, html_output, html_compressed, threads)

if __name__ == "__main__":
    main()