# mycoplasmopsis phylogeny 

* https://lpsn.dsmz.de/genus/mycoplasmopsis

In [2]:
import os,sys,subprocess,glob,re,shutil
import json
import numpy as np
import pandas as pd
import pylab as plt
from importlib import reload
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
pd.options.display.max_rows = 200
pd.set_option('display.width', 1000)

In [2]:
%load_ext rpy2.ipython

In [3]:
#recs = list(SeqIO.parse('mycoplasmopsis_S20.faa',format='fasta'))
recs = list(SeqIO.parse('uni,format='fasta'))

In [62]:
#convert orthodb format to get species name only
new = []
found = []
for r in recs:
    x = r.description    
    data = json.loads(x.split(r.id)[1])     
    org = data['organism_name'].replace(' ','_')    
    if org in found:
        continue
    found.append(org)
    new.append(SeqRecord(id=org,seq=r.seq))
SeqIO.write(new,'mycoplasmopsis_S20_org.faa','fasta')

93

In [6]:
import toytree
tre = toytree.tree('uniref_s20_mycoplasmopsis.tree')
#tre = tre.root('Mycoplasma_penetrans')


df = pd.read_csv('uniref_s20_mycoplasmopsis.tsv',sep='\t',index_col=0)
idx = tre.get_tip_labels()
df = df.reindex(index=idx)
df = df.loc[idx]
tiplabels = list(df['Organisms'])
canvas, axes, mark = tre.draw(tip_labels_align=True, layout='r', tip_labels=tiplabels, width=900)

#import toyplot.pdf
#toyplot.pdf.render(canvas, "tree-plot.pdf")

In [6]:
%%R
library(ape)
source('drawtrees.R')

In [None]:
%%R -w 1200 -h 900

tree <- read.tree('mycoplasmopsis_S20.tree')
tree <- root(tree,'Mycoplasma_penetrans')
plot(tree,type='p',align.tip.label=TRUE,cex=.8)

## kSNP

In [57]:
#download files: https://www.ncbi.nlm.nih.gov/datasets/genome/?taxon=2767358&reference_only=true
#move all files to one folder
files=glob.glob('mycoplasmopsis_genomes/*.fna')
#for f in files:   
    #shutil.move(f,os.path.join('mycoplasmopsis_genomes',os.path.basename(f)[:13]+'.fna'))
    #print (f,os.path.basename(f)[:13]+'.fna')
    #shutil.move(f,f.replace('.1',''))

### make ksnp file
/local/kSNP4.1/MakeKSNP4infile -indir mycoplasmopsis_genomes/ -outfile ksnp_mycoplasmopsis.txt
### kchooser
/local/kSNP4.1/Kchooser4 -in ksnp_mycoplasmopsis.txt 
### run ksnp4
/local/kSNP4.1/kSNP4 -core -k 19 -outdir ksnp_mycoplasmopsis -in ksnp_mycoplasmopsis.txt

In [123]:
tre2 = toytree.tree('ksnp_mycoplasmopsis/tree.parsimony.tre')
tre2 = tre2.root('GCF_002272945')
df = pd.read_csv('data_summary.tsv',sep='\t',index_col=5)
idx = tre2.get_tip_labels()
df = df.reindex(index=idx)
df = df.loc[idx]
tiplabels = list(df['Organism Scientific Name'])

canvas, axes, mark = tre2.draw(tip_labels_align=True, layout='r', tip_labels=tiplabels, width=900)
#toyplot.pdf.render(canvas, "tree-plot2.pdf")

## fastANI method

In [93]:
files=glob.glob('mycoplasmopsis_genomes/**/*.fna')
with open('query1.txt','w') as f:
    for l in files:
        f.writelines(l+'\n')

In [None]:
cmd = 'fastANI --ql query1.txt --rl query2.txt -o fastani.out -t 16 --matrix'
subprocess.check_output(cmd,shell=True)