In [3]:
import pandas as pd
import seaborn as sb
import matplotlib.pylab as plt
import toyplot
import numpy as np
from PIL import ImageFont

import toytree
import toyplot
import numpy as np

tre = toytree.tree('tree-bacteroidetes-mags-ribo-iqtree/concatenated-proteins.fa.treefile')

In [4]:
df = pd.read_csv('Data/bacteroidetes_susCD.csv', sep='\t')

In [5]:
def ctgene(df, genelist):
    temp = dict.fromkeys(genelist)
    for key in temp.keys():
        temp[key] = dict.fromkeys(set(df['Genome Name']), 0)

        
    for index, row in df.iterrows():
        for gene in genelist:
            if gene.lower() in row['Gene Product Name'].lower():
                temp[gene][row['Genome Name']] += 1
    neat = pd.DataFrame(temp) #easy to read format but bad for plotting
    neat.reset_index(inplace=True)
    neat.rename(columns={'index':'Genome Name'}, inplace=True)
    
    #reformat to tidy format for seaborn plotting          
    tidy = {'Genome Name':[], 'Gene':[], 'Count':[]}
    for entry in temp.items():
        for key in entry[1].keys():
            tidy['Gene'].append(entry[0])
            tidy['Genome Name'].append(key)
            tidy['Count'].append(entry[1][key])
    return neat, tidy

In [6]:
counts = ctgene(df, ['susC', 'susD'])
splits = [i.split('SB-')[1] for i in counts[0]['Genome Name']]
manualrows = pd.DataFrame({'Genome Name': ['MAG 3', 'MAG 55'], 'susC': [0,0], 'susD': [0,0], 'Shortname': ['MAG 3','MAG 55']})
counts[0]['Shortname'] = splits
counts_edited = counts[0].append(manualrows, ignore_index=True)


tree_abbs = ['MAG 5', 'MAG 69', 'MAG 4', 'MAG 2', 'MAG 1', 'MAG 45',\
                'MAG 83', 'MAG 6', 'MAG 56', 'MAG 14', 'MAG 84', 'MAG 15', \
                'MAG 3', 'MAG 39', 'MAG 42', 'MAG 55', 'MAG 46', 'MAG 11', 'MAG 62']
tree_abbs.reverse()
reindex_list = []
for abb in tree_abbs:
    reindex_list.append(counts_edited.loc[lambda counts_edited: counts_edited['Shortname'] == abb].index.values[0])

counts_edited = counts_edited.reindex(reindex_list)
counts_edited

Unnamed: 0,Genome Name,susC,susD,Shortname
8,Saprospiraceae bacterium SB-MAG 62,11,6,MAG 62
11,Saprospiraceae bacterium SB-MAG 11,18,6,MAG 11
10,Phaeodactylibacter sp. SB-MAG 46,16,10,MAG 46
19,MAG 55,0,0,MAG 55
6,Flavobacteriales bacterium SB-MAG 42,2,2,MAG 42
2,Flavobacteriales bacterium SB-MAG 39,1,2,MAG 39
18,MAG 3,0,0,MAG 3
16,Muricauda sp. SB-MAG 15,9,5,MAG 15
4,Bacteroidia bacterium SB-MAG 84,3,4,MAG 84
14,Bacteroidia bacterium SB-MAG 14,41,18,MAG 14


In [5]:
def grouped_bars(axes, data, group_names, group_width=None, along='x'):
    
    if group_width is None:
        group_width=1 - 1.0 / (data.shape[1] + 1)
        
    group_left_edges = np.arange(data.shape[0], dtype="float") - (group_width / 2.0)
    bar_width = group_width / data.shape[1]
    
    marks = []
    axes.y.ticks.locator = toyplot.locator.Explicit(labels=group_names)
    for index, series in enumerate(data.T):
        left_edges = group_left_edges + (index * bar_width)
        right_edges = group_left_edges + ((index + 1) * bar_width)
        marks.append(axes.bars(left_edges, right_edges, series, opacity=0.5, along='y'))
        
    return marks

In [6]:
data = np.asarray(counts_edited[['susD','susC']].values.tolist())
x,y = 600,500
canvas = toyplot.Canvas(width=x,height=y)
axes = canvas.cartesian(bounds=(x/2.5, -x/6,x/12, -50), label="Genes annotated as susD or susC in Shark Bay MAGs", xlabel='count')
marks = grouped_bars(axes, data, group_names=counts_edited['Shortname'],along='y')
axes.y.ticks.labels.angle = -90
colors = toyplot.color.Palette()

m1 = toyplot.marker.create(shape="s", size=15, mstyle={'fill':colors.css(0), 'opacity': 0.5,'stroke-opacity':0})
m2 = toyplot.marker.create(shape="s", size=15, mstyle={'fill':colors.css(1),'opacity':0.5,'stroke-opacity':0})
canvas.legend([
    ('susC', m2),
    ('susD', m1)
], corner=("right", 0, 100, 50),);

In [14]:
canvas = toyplot.Canvas(width=600, height=500, style={'background-color':'white'})
megax = canvas.cartesian(bounds=(0,600,0,500), label='Predicted susC/D genes in Shark Bay Bacteroidetes MAGs')
#xmin, xmax, ymin, ymax
megax.y.show = False
megax.x.show = False
megax.label.show=True

ax0 = canvas.cartesian(bounds=(20, 299, 20, 400), padding=15, ymin=0, ymax=20)
ax1 = canvas.cartesian(bounds=(301, 580, 20, 400), padding=15, ymin=0, ymax=20, xlabel = 'gene count')
ax0.show = False
r1 = axes.rectangle(
    -1, 1, -0.5, 3.5,
    opacity=0.15,
    color = toytree.colors[0]
)

r2 = axes.rectangle(
    -1, 1, 3.5, 7.5,
    opacity=0.15,
    color=toytree.colors[1]
)


r3 = axes.rectangle(
    -1, 1, 7.5, 9.5,
    opacity=0.15,
    color=toytree.colors[2]
)

r4 = axes.rectangle(
    -1, 1, 9.5, 18.5,
    opacity=0.15,
    color=toytree.colors[3]
)
simple_labels = ['MAG 5', 'MAG 69', 'MAG 4', 'MAG 2', 'MAG 1', 'MAG 45',\
                'MAG 83', 'MAG 6', 'MAG 56', 'MAG 14', 'MAG 84', 'MAG 15', \
                'MAG 3', 'MAG 39', 'MAG 42', 'MAG 55', 'MAG 46', 'MAG 11', 'MAG 62']

simple_labels.reverse()

tre.draw(
    tip_labels_align=True,
    tip_labels = simple_labels,
    axes = ax0
)

r1 = ax0.rectangle(
    -1, 0.32, -0.5, 3.5,
    opacity=0.15,
    color = toytree.colors[0]
)

r2 = ax0.rectangle(
    -1, 0.32, 3.5, 7.5,
    opacity=0.15,
    color=toytree.colors[1]
)


r3 = ax0.rectangle(
    -1, 0.32, 7.5, 9.5,
    opacity=0.15,
    color=toytree.colors[2]
)

r4 = ax0.rectangle(
    -1, 0.32, 9.5, 18.5,
    opacity=0.15,
    color=toytree.colors[3]
)



markers = [
    toyplot.marker.create(shape='s', mstyle={'fill':toytree.colors[0], 'stroke-opacity':0, 'opacity':0.5}),
    toyplot.marker.create(shape='s', mstyle={'fill':toytree.colors[1], 'stroke-opacity':0, 'opacity':0.5}),
    toyplot.marker.create(shape='s', mstyle={'fill':toytree.colors[2], 'stroke-opacity':0, 'opacity':0.5}),
    toyplot.marker.create(shape='s', mstyle={'fill':toytree.colors[3], 'stroke-opacity':0, 'opacity':0.5})
]

row1 =  [('Saprospiria',markers[0]),
    ('Flavobacteriia',markers[1]),]
row2 =  [('Bacteroidia',markers[2]),
    ('Cytophagia',markers[3])]

next_x = 0
font = ImageFont.load_default()

for legend in row1:
    canvas.legend([
        legend
    ], rect=(next_x,410,150, 30))
    next_x = next_x + 100 + (1.2*font.getsize(legend)[0])
    
next_x = 0
for legend in row2:
    canvas.legend([
        legend
    ], rect=(next_x,430,150, 30))
    next_x = next_x + 100 + (1.2*font.getsize(legend)[0])

data = np.asarray(counts_edited[['susD','susC']].values.tolist())
x,y = 600,500
marks = grouped_bars(ax1, data, group_names=counts_edited['Shortname'],along='y')
ax1.y.ticks.labels.show = False
ax1.y.show = False
colors = toyplot.color.Palette()

m1 = toyplot.marker.create(shape="s", size=15, mstyle={'fill':colors.css(0), 'opacity': 0.5,'stroke-opacity':0})
m2 = toyplot.marker.create(shape="s", size=15, mstyle={'fill':colors.css(1),'opacity':0.5,'stroke-opacity':0})
canvas.legend([
    ('susC', m2),
    ('susD', m1)
], corner=("top-right", 50, 40, 35),);



In [15]:
import toyplot.png
toyplot.png.render(canvas, "susCD.png")

In [30]:
df_sulfatases = pd.read_csv('Data/bacteroidetes-sulfatases.csv')
taxondf = pd.read_csv('Data/bacteroidetes-MAGs-taxontable.csv')

In [100]:
sulfatase_counts = ctgene(df_sulfatases, ['sulfatase'])
sulfatase_counts[0].drop(index=6, inplace= True)

In [101]:
splits = [i.split('SB-')[1] for i in sulfatase_counts[0]['Genome Name']]
sulfatase_counts[0]['Shortname'] = splits
sulfatase_counts[0]

Unnamed: 0,Genome Name,sulfatase,Shortname
0,Aureispira sp. SB-MAG 55,1,MAG 55
1,Bacteroidia bacterium SB-MAG 14,10,MAG 14
2,Bacteroidia bacterium SB-MAG 84,1,MAG 84
3,Cyclobacteriaceae bacterium SB-MAG 2,1,MAG 2
4,Cyclobacteriaceae bacterium SB-MAG 56,22,MAG 56
5,Cyclobacteriaceae bacterium SB-MAG 6,3,MAG 6
7,Cyclobacteriaceae bacterium SB-MAG 83,15,MAG 83
8,Cytophagales bacterium SB-MAG 45,20,MAG 45
9,Flavobacteriales bacterium SB-MAG 42,1,MAG 42
10,Fulvivirga sp. SB-MAG 5,2,MAG 5


In [104]:
classes = []
susC = []
susD = []
for i, row in sulfatase_counts[0].iterrows():
    classes.append(taxondf[taxondf['Genome Name / Sample Name'] == row['Genome Name']]['Class'].tolist()[0])
    susC.append(counts_edited[counts_edited['Shortname'] == row['Shortname']]['susC'].tolist()[0])
    susD.append(counts_edited[counts_edited['Shortname'] == row['Shortname']]['susD'].tolist()[0])



In [105]:
sulfatase_table = {'MAG': splits, 'Class': classes, 'susC': susC, 'susD': susD, 'Sulfatases': sulfatase_counts[0]['sulfatase'].tolist()}

In [148]:
sulfatase_table = pd.DataFrame(sulfatase_table).sort_values('Sulfatases', ascending=False)
print(sum(sulfatase_table['Sulfatases']))

88


In [147]:
import toyplot.data
canvas = toyplot.Canvas(width=500, height=400)
table = canvas.table(sulfatase_table)
table.cells.column[[0,3]].width = 70
table.cells.column[0,1].align = 'left'