In [None]:
from sklearn.datasets import load_iris
iris = load_iris()

In [None]:
X = iris['data']
y = iris['target']
names = iris['target_names']

In [None]:
import panel as pn
import pandas as pd
import hvplot.pandas 
import holoviews as hv 
import janitor

from panel.interact import fixed
hv.extension("bokeh")

%load_ext autoreload
%autoreload 2
%matplotlib inline

## Iris Tab

In [4]:
mapping = {i:v for i, v in enumerate(iris['target_names'])}
df = pd.DataFrame(X)
df.columns = iris['feature_names']
df['flower_type'] = iris['target']
df['flower_type'] = df['flower_type'].apply(lambda x: mapping.get(x))

In [5]:
def pairplot(df, x, y):
    return df.hvplot.scatter(x=x, y=y, c="flower_type").opts(width=600, height=400)

x = pn.widgets.Select(options=iris['feature_names'])
y = pn.widgets.Select(options=iris['feature_names'])

scatter = pn.interact(pairplot, df=fixed(df), x=iris['feature_names'], y=iris['feature_names'])

In [6]:
scatter_txt = pn.pane.Markdown("""
# Iris Dataset

Pick the x- and y-axes from the dropdown menus to explore how the three flower types differ from one another.
""")

scatter_tab = pn.Column(scatter_txt, scatter)

## HIV Tab

In [7]:
intro_hiv = pn.pane.Markdown("""
# HIV Resistance Prediction

This shows how to write an app that accepts a file input
and returns a model prediction.
""")

In [25]:
from dask.distributed import LocalCluster, Client

cluster = LocalCluster()
client = Client(cluster)

Port 8787 is already in use. 
Perhaps you already have a cluster running?
Hosting the diagnostics dashboard on a random port instead.
distributed.scheduler - INFO - Receive client connection: Client-cbaa4df0-b331-11e9-9665-b0c090a5d351
distributed.core - INFO - Starting established connection


In [18]:
import numpy as np

molecular_weights = {
    'A':89.0935,
    'R':174.2017,
    'N':132.1184,
    'D':133.1032,
    'C':121.1590,
    'E':147.1299,
    'Q':146.1451,
    'G':75.0669,
    'H':155.1552,
    'I':131.1736,
    'L':131.1736,
    'K':146.1882,
    'M':149.2124,
    'F':165.1900,
    'P':115.1310,
    'S':105.0930,
    'T':119.1197,
    'W':204.2262,
    'Y':181.1894,
    'V':117.1469,
    'X':100.00
}


def featurize_sequence_(x):
    """x: a string in a pandas DataFrame cell"""
    feats = np.zeros(len(x))
    for i, letter in enumerate(x):
        feats[i] = molecular_weights[letter]
    return feats

In [53]:
drugs = ['ATV', 'DRV', 'FPV', 'IDV', 'LPV', 'NFV', 'SQV', 'TPV']

data = (
    pd.read_csv("data/hiv-protease-data-expanded.csv", index_col=0)
    .query("weight == 1.0")
    .transform_column("sequence", lambda x: len(x), "seq_length")
    .query("seq_length == 99")
    .transform_column("sequence", featurize_sequence_, "features")
    .transform_columns(drugs, np.log10)
)
data.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df[dest_column_name] = df[column_name].apply(function)


Unnamed: 0,ATV,DRV,FPV,IDV,LPV,NFV,SQV,SeqID,TPV,seqid,sequence,sequence_object,weight,seq_length,features
6,1.50515,,0.477121,1.544068,1.50515,1.462398,2.214844,4426,,4426-0,PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKM...,ID: 4426-0\nName: <unknown name>\nDescription:...,1.0,99,"[115.131, 146.1451, 131.1736, 119.1197, 131.17..."
7,,,0.176091,0.0,,0.342423,0.041393,4432,,4432-0,PQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKM...,ID: 4432-0\nName: <unknown name>\nDescription:...,1.0,99,"[115.131, 146.1451, 131.1736, 119.1197, 131.17..."
14,,,0.491362,0.939519,,1.50515,1.227887,4664,,4664-0,PQITLWQRPIVTIKVGGQLIEALLDTGADDTVLEEINLPGRWKPKM...,ID: 4664-0\nName: <unknown name>\nDescription:...,1.0,99,"[115.131, 146.1451, 131.1736, 119.1197, 131.17..."


In [54]:
features = pd.DataFrame(np.vstack(data['features'])).set_index(data.index)

In [102]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score


def fit_model(data, features, target):
    import janitor
    model = RandomForestRegressor(n_estimators=300)
    
    resistance_data = features.join(data[target]).dropna()
    X, y = resistance_data.get_features_targets(target_column_names=target)
    
    model.fit(X, y)
    return model


def cross_validate(data, features, target):
    import janitor
    model = RandomForestRegressor(n_estimators=500)
    
    resistance_data = features.join(data[target]).dropna()
    X, y = resistance_data.get_features_targets(target_column_names=target)
    
    return -cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=5)


def predict(model, sequence):
    """
    :param model: sklearn model
    :param sequence: A string, should be 99 characters long.
    """
    assert len(sequence) == 99
    assert set(sequence) == set(molecular_weights.keys())
    
    seqfeat = featurize_sequence_(sequence)
    return model.predict(seqfeat)
    
    

dataf = client.scatter(data)
featuresf = client.scatter(features)


models = dict()
scores = dict()


for drug in drugs:
    models[drug] = client.submit(fit_model, dataf, featuresf, drug)
    scores[drug] = client.submit(cross_validate, dataf, featuresf, drug)



In [114]:
# client.gather(models)

In [115]:
# client.gather(scores)

In [112]:
model_performance = pd.DataFrame(client.gather(scores))
perfplot = model_performance.hvplot.box().opts(xlabel="drug", ylabel="mse")

perfplot

In [136]:
sequence = pn.widgets.TextInput(name="Protein Sequence")

seqlength = pn.widgets.StaticText(value=len(sequence.value))

def callback(target, event):
    return len(target.value)

seqlength.link(sequence, callbacks=[callback])
pn.Row(sequence, seqlength)

ValueError: <function callback at 0x7f664a52e510> parameter was not found in list of parameters of class StaticText

In [126]:
fasta_file.save("hello.png")

TypeError: a bytes-like object is required, not 'NoneType'

## Intro Tab

In [None]:
intro_txt = pn.pane.Markdown("""
# Minimal Panel Example

This is a minimal Panel example that shows you how to serve and deploy a Panel app.

Panel is a dashboarding toolkit that works inside and outside of Jupyter notebooks.
You can prototype your dashboard visualizations inside a Jupyter notebook, 
and then choose how you want to serve it:

- As a standalone `.py` file
- Served using a Jupyter notebook

Click on the next tab to see a plot generated using Panel and hvPlot.

The source code for this project can be found [here](https://github.com/ericmjl/minimal-panel-app).
""")

## Interface

In [None]:
tabs = dict()
tabs['Introduction'] = intro_txt
tabs['1. Iris'] = scatter_tab

def dict2tuple(d):
    return tuple(zip(*zip(*d.items())))

dict2tuple(tabs)
app = pn.Tabs(*dict2tuple(tabs))
app.servable()