# Mycoplasma Bovis analysis

Links

* https://www.ncbi.nlm.nih.gov/genome/browse/#!/prokaryotes/1150/

Bioprojects:

* Genomic epidemiology of Mycoplasma bovis in France: SRP386998
* Genome-Wide Identification of Variants Associated with Antimicrobial Resistance in Mycoplasma bovis: SRP349146
* Sanger Full_genome_sequencing_of_Mycoplasma_species_isolates_from_ruminants: PRJEB3408

Refs:

* [Genomics-based epidemiology of bovine Mycoplasma bovis strains in Israel](https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-020-6460-0#Sec13)
* [Population Genomic Analysis of Mycoplasma bovis Elucidates Geographical Variations and Genes associated with Host-Types](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7650767/)

In [315]:
import os,sys,subprocess,glob
import numpy as np
import pandas as pd
import pylab as plt
import seaborn as sns
from importlib import reload
from snipgenie import app, tools, wgmlst

In [285]:
df=pd.read_csv('results/samples.csv')

## assembly

In [None]:
for i,r in df[12:14].iterrows():    
    name = r['sample']
    print (name)
    wgmlst.spades(r.filename1,r.filename2, os.path.join('assembly',name), 'assembly/%s.fa' %name, 12)

240
C06786950603283112
spades -t 12 --pe1-1 /storage/catherine/data/C06786950603283112_S8_L001-4_R1_001.fastq.gz --pe1-2 /storage/catherine/data/C06786950603283112_S8_L001-4_R2_001.fastq.gz --careful -o assembly/C06786950603283112


## annotation

In [None]:
out='annot'
files = glob.glob('assembly/*.fa')
a = [os.path.basename(i) for i in files]
print (files)

for f in files:
    n = os.path.splitext(os.path.basename(f))[0]
    cmd = 'prokka --outdir {o}/{n} {f} --prefix {n}'.format(f=f,n=n,o=out)#,p=path)
    print(cmd)
    #if not os.path.exists('%s/%s' %(out,n)):
        #print cmd
        #subprocess.check_output(cmd, shell=True)

## coverage

In [None]:
reload(tools)
n=8
fig,ax=plt.subplots(n-1,1,figsize=(25,12))
axs=ax.flat
i=0
for idx,r in df.sample(n).iterrows(): 
    #print (r.bam_file)
    if pd.isnull(r.bam_file): continue   
    d=tools.samtools_depth(r.bam_file, start=180000,end=300000)
    x=groupby_interval(d,500,1000)
    ax=axs[i]
    x.plot(kind='area',ax=ax)
    label = r['sample']
    ax.text(.05,.8,label,color='blue',transform=ax.transAxes,fontsize=12)
    ax.set_xticklabels([])
    i+=1
sns.despine()
plt.tight_layout()

In [234]:
def groupby_interval(df, clip=2, interval=100):
    """Group over an interval - used for samtools depth data"""
    
    min = df.pos.min()
    max=df.pos.max()
    b = np.arange(min, max, interval)
    #rint (b)
    x=df.groupby(pd.cut(df.pos, bins=b, labels=b[:-1], right=False)).mean(numeric_only=True)['depth']
    x = x.clip(0,clip)
    x = x.replace(0,np.nan)
    return x.dropna()

In [None]:
d=tools.samtools_depth('results/mapped/222A.bam')
data=groupby_interval(d,500,1000).dropna()
data

In [None]:
from pycirclize import Circos
from pycirclize.parser import Genbank
from pycirclize.utils import load_prokaryote_example_file
import numpy as np
from matplotlib.patches import Patch

# Load Genbank file
gbk_file = 'NC_014760.gb'
gbk = Genbank(gbk_file)

circos = Circos(sectors={gbk.name: gbk.range_size}, start=0, end=320)
sector = circos.get_sector(gbk.name)
major_ticks_interval = 200000
minor_ticks_interval = 50000
outer_track = sector.add_track((98, 100))
outer_track.axis(fc="lightgrey")
outer_track.xticks_by_interval(
    major_ticks_interval, label_formatter=lambda v: f"{v/ 10 ** 6:.1f} Mb"
)
outer_track.xticks_by_interval(minor_ticks_interval, tick_length=1, show_label=False)

f_cds_track = sector.add_track((90, 97), r_pad_ratio=0.1)
f_cds_track.genomic_features(gbk.extract_features("CDS", target_strand=1), fc="red")
r_cds_track = sector.add_track((83, 90), r_pad_ratio=0.1)
r_cds_track.genomic_features(gbk.extract_features("CDS", target_strand=-1), fc="blue")

n=8
i=70
h=6
text_common_kws = dict(ha="right", va="center", size=8)
for idx,r in df.sample(n).iterrows(): 
    name = r['sample']
    if pd.isnull(r.bam_file): continue   
    d = tools.samtools_depth(r.bam_file)#, start=200000,end=350000)
    data = groupby_interval(d,500,2000)
    x=data.index
    y=data.values
    track = sector.add_track((i, i+h), r_pad_ratio=0.1)
    track.axis()
    #track.line(x, y)
    track.fill_between(x, y, ec="black", lw=.2)
    i-=h
    circos.text(name, r=i+h+2, color="black", **text_common_kws)

fig = circos.plotfig(160)

In [None]:
from pycirclize import Circos
from io import StringIO
from Bio import Phylo
import numpy as np

np.random.seed(0)

tree = Phylo.read('results/tree.newick', "newick")
leaf_num = tree.count_terminals()

# Initialize circos sector with tree size
circos = Circos(sectors={"Tree": leaf_num}, start=-350, end=0)
sector = circos.sectors[0]

# Plot tree
tree_track = sector.add_track((30, 80))
tree_track.tree(
    tree,
    use_branch_length=True, 
    leaf_label_size=10,
    leaf_label_margin=2, # Set large margin for heatmap plot
    
)

# Plot heatmap
heatmap_track = sector.add_track((100, 120))
matrix_data = np.random.randint(0, 100, (5, leaf_num))
heatmap_track.heatmap(matrix_data, cmap="viridis")
heatmap_track.yticks([0.5, 1.5, 2.5, 3.5, 4.5], list("ABCDE"), vmax=5, tick_length=0)

fig = circos.plotfig()


## AMR snps

## kraken