# Create meta alignment

In [2]:
# Load libs
import os, subprocess, sys,glob
from Bio.Seq import Seq
from Bio import SeqIO,AlignIO, Phylo
from collections import defaultdict

## Individual alignments + Gblocks

The 1-to-1 orthologs will be aligned, and their alignments polished with GBlocks.

In [3]:
maindir=os.path.join(os.getcwd(),"21_hemimetabolan_orthofinder_blast_renamed")
inputdir=os.path.join(maindir,"Single_Copy_Orthologue_Sequences")
output_align_dir=os.path.join(maindir,"MUSCLE_out")


In [None]:
# for each file on the directory 

for file in os.listdir(inputdir):
    print(os.path.join( inputdir,file))
    inputfas=os.path.join(inputdir,file)

    muscle_out=os.path.join(output_align_dir,os.path.splitext(file)[0])
    

    MUSCLEcommand="~/data_disk/Software/muscle3.8.31_i86linux64 -in %s -out %s"

    #run MUSCLE 
    #subprocess.run(MUSCLEcommand % (inputfas, muscle_out) , shell=True)

    # run GBlocks
    Gblockcommand="~/data_disk/Software/Gblocks_0.91b/Gblocks %s -t=p  -b4=5 -b5=a"
    #Gblockcommand="~/data_disk/Software/Gblocks_0.91b/Gblocks %s -t=p -b1=11 -b2=12 -b4=2 -b5=n"
    subprocess.run(Gblockcommand % muscle_out , shell=True)

    

## Join the alignments in a single file

In [None]:
# Concatenate Alignments
## https://yueyvettehao.github.io/2018/09/using-biopython-to-concatenate-aligned-sequences-with-the-same-name/

#with open("example.fasta", "rU") as handle:
#    for record in SeqIO.parse(handle, "fasta"):
#        print(record.id)      
#output_align_dir="21_hemimetabolan_orthofinder_blast_renamed/MUSCLE_out/"

Alignemntfileslist=[]

for file in os.listdir(output_align_dir):
    if file.endswith('-gb'):
        print(file)
        Alignemntfileslist.append(os.path.join(output_align_dir,file))

Alignemntfileslist
        
# Join all fasta alignments  in single file
All_alignments=os.path.join(output_align_dir,"All_alignments.fa")

catcommand="cat %s > %s" % (' '.join(Alignemntfileslist),All_alignments )

print(catcommand)

subprocess.run(catcommand, shell=True)



## Concatenate the alignments in a Metaalignment

In [None]:
sequence_map = defaultdict(str)


#All_alignments="/home/guillem/data_disk/Cricket_genome_annotation/Comparative_Genomics/Phylogenetic_trees/Proba.fa"
#output_handle = open("Example.fa", "w")
output_handle = open(os.path.join(output_align_dir,"Meta_alignment.fa"), "w")

for sequence in SeqIO.parse(All_alignments, "fasta"):
    sequence.name=sequence.name.split("_")[0]  #  only retain spp name
    sequence_map[sequence.name] += str(sequence.seq)

    
tmp= open("tmp","w+")

for key, seq in sequence_map.items() :
    #print (key)
    tmp.write("".join((">",key) ))
    tmp.write("\n")
    tmp.write(seq)
    tmp.write("\n")
    #print ("".join((">",key) ))
    #print (seq)
tmp.close() 

## Reformat to good fasta file    
alignments = AlignIO.parse("tmp", "fasta")
#print("Alignment length %i" % alignment.get_alignment_length())
#for record in alignment :
#    print(record.seq + " " + record.id)
AlignIO.write(alignments, output_handle, "fasta")
output_handle.close()
os.remove("tmp")

## Run FastTree

I used the FastTreMP whichuses multiple threads


In [None]:
#metalignment_file=os.path.join(os.getcwd(), "Example.fa")
#outtree=os.path.join(os.getcwd(), "Example_Tree.tree")

metalignment_file= os.path.join(output_align_dir,"Meta_alignment.fa")
outtree=os.path.join(maindir, "Fasttree_out.tree")

Fasttree_Command="/home/guillem/data_disk/Software/FastTreeMP -gamma < %s > %s" % (metalignment_file, outtree)

print(Fasttree_Command)

Fasttree_result =subprocess.run(Fasttree_Command , shell=True,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)


In [None]:
print("Stdout:\n",str(Fasttree_result))
print("Stderr:\n",str(Fasttree_result))

## Visualize FastTree Tree

In [None]:
tree = Phylo.read(outtree, 'newick')
#print(tree)

import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [10, 10]

Phylo.draw(tree, branch_labels=lambda c: c.branch_length)

Phylo.draw_ascii(tree)

## Run RaxML


In [5]:
## Run rRaxML

metalignment_file= os.path.join(output_align_dir,"Meta_alignment.fa")
Raxmlouttree="21hemi" # if whole path raxmlcrashes!

#-# boostrtaps  -p  randomseet  -T numberOfThreads
RaxML_Command="/home/guillem/data_disk/Software/standard-RAxML/raxmlHPC-PTHREADS -T 16 -m PROTGAMMAAUTO -p 123 -# 2 -s %s -n %s " % (metalignment_file, Raxmlouttree)

print(RaxML_Command)

RaxML_result =subprocess.run(RaxML_Command , shell=True,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)

/home/guillem/data_disk/Software/standard-RAxML/raxmlHPC-PTHREADS -T 16 -m PROTGAMMAAUTO -p 123 -# 2 -s /data/Cricket_genome_annotation/Comparative_Genomics/Phylogenetic_trees/21_hemimetabolan_orthofinder_blast_renamed/MUSCLE_out/Meta_alignment.fa -n RaxMLout 


In [6]:
print("Stdout:\n",str(RaxML_result))
print("Stderr:\n",str(RaxML_result))

Stdout:
 CompletedProcess(args='/home/guillem/data_disk/Software/standard-RAxML/raxmlHPC-PTHREADS -T 16 -m PROTGAMMAAUTO -p 123 -# 2 -s /data/Cricket_genome_annotation/Comparative_Genomics/Phylogenetic_trees/21_hemimetabolan_orthofinder_blast_renamed/MUSCLE_out/Meta_alignment.fa -n RaxMLout ', returncode=255, stdout=b"\nRAxML can't, parse the alignment file as phylip file \nit will now try to parse it as FASTA file\n\nRAxML output files with the run ID <RaxMLout> already exist \nin directory /data/Cricket_genome_annotation/Comparative_Genomics/Phylogenetic_trees/ ...... exiting\n", stderr=b'')
Stderr:
 CompletedProcess(args='/home/guillem/data_disk/Software/standard-RAxML/raxmlHPC-PTHREADS -T 16 -m PROTGAMMAAUTO -p 123 -# 2 -s /data/Cricket_genome_annotation/Comparative_Genomics/Phylogenetic_trees/21_hemimetabolan_orthofinder_blast_renamed/MUSCLE_out/Meta_alignment.fa -n RaxMLout ', returncode=255, stdout=b"\nRAxML can't, parse the alignment file as phylip file \nit will now try to par

## Visualize RaxML Tree

In [None]:
RaxMLtree = Phylo.read("RaxMLout", 'newick')
#print(tree)

import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [10, 10]

Phylo.draw(RaxMLtree, branch_labels=lambda c: c.branch_length)

Phylo.draw_ascii(RaxMLtree)

##Preparing CAFE

RaxML/Fasttree r8s -> for cafe

https://groups.google.com/forum/#!searchin/hahnlabcafe/raxml|sort:date/hahnlabcafe/kEfPXEx1CN8/MmtW8ihBCQAJ

