# mycoplasmopsis phylogeny 

orthodb query: https://www.orthodb.org/?level=&species=&query=6927at2093



In [1]:
import os,sys,subprocess,glob,re,shutil
import json
import numpy as np
import pandas as pd
import pylab as plt
from importlib import reload
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
pd.options.display.max_rows = 200
pd.set_option('display.width', 1000)

In [2]:
%load_ext rpy2.ipython

In [3]:
recs = list(SeqIO.parse('mycoplasmopsis_S20.faa',format='fasta'))

In [62]:
#convert orthodb format to get species name only
new = []
found = []
for r in recs:
    x = r.description    
    data = json.loads(x.split(r.id)[1])     
    org = data['organism_name'].replace(' ','_')    
    if org in found:
        continue
    found.append(org)
    new.append(SeqRecord(id=org,seq=r.seq))
SeqIO.write(new,'mycoplasmopsis_S20_org.faa','fasta')

93

In [130]:
import toytree
tre = toytree.tree('mycoplasmopsis_S20.tree')
tre = tre.root('Mycoplasma_penetrans')
canvas, axes, mark = tre.draw(tip_labels_align=True, layout='r', width=900)
#import toyplot.pdf
#toyplot.pdf.render(canvas, "tree-plot.pdf")

In [6]:
%%R
library(ape)
source('drawtrees.R')

In [None]:
%%R -w 1200 -h 900
#meta <- read.table('s20_orthodb.txt',sep='\t',header=TRUE,row.names=1)

tree <- read.tree('mycoplasmopsis_S20.tree')
tree <- root(tree,'Mycoplasma_penetrans')
plot(tree,type='p',align.tip.label=TRUE,cex=.8)

## kSNP

In [57]:
#download files: https://www.ncbi.nlm.nih.gov/datasets/genome/?taxon=2767358&reference_only=true
#move all files to one folder
files=glob.glob('mycoplasmopsis_genomes/*.fna')
#for f in files:   
    #shutil.move(f,os.path.join('mycoplasmopsis_genomes',os.path.basename(f)[:13]+'.fna'))
    #print (f,os.path.basename(f)[:13]+'.fna')
    #shutil.move(f,f.replace('.1',''))

### make ksnp file
/local/kSNP4.1/MakeKSNP4infile -indir mycoplasmopsis_genomes/ -outfile ksnp_mycoplasmopsis.txt
### kchooser
/local/kSNP4.1/Kchooser4 -in ksnp_mycoplasmopsis.txt 
### run ksnp4
/local/kSNP4.1/kSNP4 -core -k 19 -outdir ksnp_mycoplasmopsis -in ksnp_mycoplasmopsis.txt

In [123]:
tre2 = toytree.tree('ksnp_mycoplasmopsis/tree.parsimony.tre')
tre2 = tre2.root('GCF_002272945')
df = pd.read_csv('data_summary.tsv',sep='\t',index_col=5)
idx = tre2.get_tip_labels()
df = df.reindex(index=idx)
df = df.loc[idx]
tiplabels = list(df['Organism Scientific Name'])

canvas, axes, mark = tre2.draw(tip_labels_align=True, layout='r', tip_labels=tiplabels, width=900)
#toyplot.pdf.render(canvas, "tree-plot2.pdf")

In [129]:
found = list(df['Organism Scientific Name'].str.replace(' ','_'))

names =['Mycoplasmopsis_californica',
 'Mycoplasmopsis_opalescens_ATCC_27921',
 'Mycoplasmopsis_agalactiae',
 'Mycoplasmopsis_bovis']

#tre3 = tre.drop_tips(tips)


['Mycoplasmopsis_glycophila',
 'Mycoplasmopsis_gallopavonis',
 'Mycoplasmopsis_gallinacea',
 'Mycoplasmopsis_pullorum',
 'Mycoplasmopsis_anatis',
 'Mycoplasmopsis_verecunda',
 'Mycoplasmopsis_citelli',
 'Mycoplasmopsis_cricetuli_ATCC_35279',
 'Mycoplasmopsis_columboralis',
 'Mycoplasmopsis_sturni_DSM_22021',
 'Mycoplasmopsis_synoviae',
 'Mycoplasmopsis_edwardii',
 'Mycoplasmopsis_canis',
 'Mycoplasmopsis_cynos_C142',
 'Mycoplasmopsis_mustelae',
 'Mycoplasmopsis_felis',
 'Mycoplasmopsis_bovirhinis',
 'Mycoplasmopsis_alligatoris_A21JP2',
 'Mycoplasmopsis_mucosicanis',
 'Mycoplasmopsis_bovigenitalium',
 'Mycoplasmopsis_phocirhinis',
 'Mycoplasmopsis_californica',
 'Mycoplasmopsis_opalescens_ATCC_27921',
 'Mycoplasmopsis_agalactiae',
 'Mycoplasmopsis_bovis',
 'Mycoplasmopsis_primatum_ATCC_25948',
 'Mycoplasmopsis_fermentans_M64',
 'Mycoplasmopsis_caviae',
 'Mycoplasmopsis_lipofaciens_ATCC_35015',
 'Mycoplasmopsis_meleagridis',
 'Mycoplasmopsis_columbinasalis',
 'Mycoplasmopsis_iners_ATCC_1

In [97]:
%%R
library(dplyr)
library(phytools)
plot_tree <- function(tree,samples,type='phylogram',title='',colorcol=NULL,
						tiplabelcol=NULL,showtip=TRUE,
						cmap="Set1") {

    if (!is.null(colorcol)){
        labels <- samples[tree$tip.label,][[col]]
        #print(labels)
        #print (samples[mltree$tip.label,])
        labels[is.na(labels)] <- "Other"  
        leglabels <- levels(as.factor(labels))
        n<-length(leglabels)
        colors <- brewer.pal(n = n, name = cmap)
        cols<-setNames(colors[1:length(leglabels)],leglabels)
        tiplabels(pie=to.matrix(labels, levels(as.factor(labels))),cex=cex,size=2,piecol=cols)

        legcolors <- cols[leglabels]
        legend("topright", legend=names(cols), pch=22, pt.bg=cols, pt.cex=2.0, cex=1.2, 
             bty="n",ncol=1,x.intersp=.3)
    }
    l<-length(labels)
    w<- max(dist.nodes(tree))*.8

    #plot tree
    plot(tree,type=type,cex=.8,label.offset=.2, edge.width=.6,show.tip.label=showtip)
    title(title,cex.main= 2)
    cex<-(.3/l*100)
    if (!is.null(tiplabelcol)){
        tl <- samples[tree$tip.label,][[tiplabelcol]]
        print(tl)
        #tiplabels(tl,type=2,cex=.8,size=1)
        sub.taxa.label(tree, tl)
    }
    #add.scale.bar(x=100,lwd=2, cex=1)
}

In [None]:
%%R -w 1000 -h 600
tree <- read.tree('ksnp_mycoplasmopsis/tree.parsimony.tre')
meta <- read.table('data_summary.tsv',sep='\t',row.names=3)
options(repr.plot.width=15, repr.plot.height=12)
#plot(tree,type='p',align.tip.label=TRUE,cex=.8)
#plot_tree(tree, meta,tiplabelcol='Organism Scientific Name')

## fastANI method

In [93]:
files=glob.glob('mycoplasmopsis_genomes/**/*.fna')
with open('query1.txt','w') as f:
    for l in files:
        f.writelines(l+'\n')

In [None]:
cmd = 'fastANI --ql query1.txt --rl query2.txt -o fastani.out -t 16 --matrix'
subprocess.check_output(cmd,shell=True)