In [26]:
import pandas as pd
from bokeh.charts import Scatter, show, output_file, ColumnDataSource
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.models import HoverTool, BoxSelectTool
output_notebook()

In [27]:
### Dictionary of the number of A+T in each Amino Acid's Codon. 
pctat_dict = {
    'Alanine': (2/12),
    'Arginine': (5/18),
    'Asparagine': (5/6),
    'Aspartic acid': (3/6),
    'Cysteine': (3/6),
    'Glutamic acid': (3/6),
    'Glutamine': (3/6),
    'Glycine': (2/12),
    'Histidine': (3/6),
    'Isoleucine': (8/9),
    'Leucine': (6/12),
    'Lycine': (5/6),
    'Methionine': (2/3),
    'Phenylalanine': (5/6),
    'Proline': (2/12),
    'Serine': (9/18),
    'Threonine': (6/12),
    'Tryptophane': (1/3),
    'Tyrosine': (5/6),
    'Valine': (6/12)}

aa = ['Alanine', 'Arginine', 'Asparagine', 'Aspartic acid', 'Cysteine', 'Glutamic acid', 'Glutamine',  'Glycine',
      'Histidine', 'Isoleucine', 'Leucine', 'Lycine', 'Methionine', 'Phenylalanine', 'Proline', 'Serine','Threonine',  
      'Tryptophane', 'Tyrosine', 'Valine']
mw = [89.094, 174.203, 132.119, 133.104, 121.154, 147.131, 146.146, 75.067, 155.156, 131.175, 131.175, 146.189, 149.208,
      165.192, 115.132, 105.093, 119.119, 204.228, 181.191, 117.148]
pct_at = [pctat_dict['Alanine'], pctat_dict['Arginine'], pctat_dict['Asparagine'], pctat_dict['Aspartic acid'], pctat_dict['Cysteine'],
       pctat_dict['Glutamic acid'], pctat_dict['Glutamine'], pctat_dict['Glycine'], pctat_dict['Histidine'], pctat_dict['Isoleucine'],
       pctat_dict['Leucine'], pctat_dict['Lycine'], pctat_dict['Methionine'], pctat_dict['Phenylalanine'], pctat_dict['Proline'],
       pctat_dict['Serine'], pctat_dict['Threonine'], pctat_dict['Tryptophane'], pctat_dict['Tyrosine'], pctat_dict['Valine']]
biochem = ['nonpolar', 'basic', 'polar', 'acidic', 'nonpolar', 'acidic', 'polar',  'nonpolar',
      'basic', 'nonpolar', 'nonpolar', 'basic', 'nonpolar', 'nonpolar', 'nonpolar', 'polar','polar',  
      'nonpolar', 'polar', 'nonpolar']
biochem_col = ['blue', 'green', 'orange', 'red', 'blue', 'red', 'orange',  'blue',
      'green', 'blue', 'blue', 'green', 'blue', 'blue', 'blue', 'orange','orange',  
      'blue', 'orange', 'blue']

### Convert Lists to Pandas Dataframe
aa_dict = {
    'Amino_acid': aa,
    'Molecular_weight': mw,
    'Biochem_property': biochem,
    'Percent_AT': pct_at}

AA = pd.DataFrame(aa_dict)

row_labels = ['Ala','Arg','Asn','Asp','Cys','Glu','Gln','Gly','His','Ile','Leu','Lys','Met','Phe','Pro','Ser','Thr','Trp','Tyr','Val']
AA.index = row_labels

print(AA)

        Amino_acid Biochem_property  Molecular_weight  Percent_AT
Ala        Alanine         nonpolar            89.094    0.166667
Arg       Arginine            basic           174.203    0.277778
Asn     Asparagine            polar           132.119    0.833333
Asp  Aspartic acid           acidic           133.104    0.500000
Cys       Cysteine         nonpolar           121.154    0.500000
Glu  Glutamic acid           acidic           147.131    0.500000
Gln      Glutamine            polar           146.146    0.500000
Gly        Glycine         nonpolar            75.067    0.166667
His      Histidine            basic           155.156    0.500000
Ile     Isoleucine         nonpolar           131.175    0.888889
Leu        Leucine         nonpolar           131.175    0.500000
Lys         Lycine            basic           146.189    0.833333
Met     Methionine         nonpolar           149.208    0.666667
Phe  Phenylalanine         nonpolar           165.192    0.833333
Pro       

# High-level Bokeh plotting

In [28]:
p = Scatter(AA, x='Percent_AT', y='Molecular_weight', color="Biochem_property", legend="top_left",
            xlabel="Percent AT", ylabel="Molecular Weight")
show(p)

### But it doesn't tell you which points correspond to which Amino Acid
### (At least it gets pretty messy if you try)

# Low-level Bokeh plotting
### combining figure.circle with hovertool

In [29]:
def scatter_with_hover(df, x, y,
                       fig=None, hov_cols=None, name=None,
                       fig_width=500, fig_height=500, **kwargs):

    if fig is None:
        fig = figure(width=fig_width, height=fig_height, tools=['box_zoom', 'reset', 'save'])

    source = ColumnDataSource(data=df)

    if name is None:
        name = 'main'

    fig.circle(df[x], df[y], source=source, name=name, **kwargs)

    hover = HoverTool(names=[name])

    if hov_cols is None:
        hover.tooltips = [(c, '@' + c) for c in df.columns]
    else:
        hover.tooltips = [(c, '@' + c) for c in hov_cols]

#     hover.tooltips.append(('index', '$index'))

    fig.add_tools(hover)

    return fig

In [31]:
fig = figure(x_axis_label="Percent AT", y_axis_label="Molecular Weight (g/mol)", title='Percent AT vs Amino Acid MW')

p = scatter_with_hover(df = AA, x = 'Percent_AT', y = 'Molecular_weight', fig = fig, 
                         hov_cols = ['Amino_acid','Molecular_weight','Percent_AT', 'Biochem_property'], color = biochem_col, size = 10)

output_file("scatter.html")

show(p)

Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)


## Now you can see that with the exceptions of Tryptophane and Arginine, there is a positive correlation of percent AT in each Amino Acid's codon and their respective molecular weights.
## This isn't something new, but it is often forgotten and overlooked.