In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
data = pd.read_csv("./asm_data_for_ml.txt", sep='\t')

In [7]:
def mask(cola, colb, color=None, p1=99.9, p2=99.9):
    mask = (cola < np.percentile(cola, p1)) & (colb < np.percentile(colb, p2))
    if color is None:
        return cola[mask], colb[mask], None
    else:
        return cola[mask], colb[mask], color[mask]

    

def myhexplot(cola, colb, p1=99.9, p2=99.9, ax=None):
    if ax is None:
        plt.figure()
        ax = plt.gca()
    cola, colb, _ = mask(cola, colb, p1=p1, p2=p2)
    ax.hexbin(cola, colb, cmap='viridis', bins='log')
    if hasattr(cola, "name"):
        ax.set_xlabel(cola.name)
        ax.set_ylabel(colb.name)

In [17]:
from matplotlib.widgets import Slider
from matplotlib.colors import ListedColormap

def timeline_plot(cola, colb, probabilities=None):
    cola = np.array(cola)
    colb = np.array(colb)
    fig, axes = plt.subplots(2, 1, figsize=(10, 10))
    ax = axes[0]

    ax.hexbin(cola, colb, cmap='viridis', bins='log')
    if probabilities is not None:
        preds = np.argmax(probabilities, axis=1)
    else:
        preds = np.ones(len(cola))
    
    window = 100

    cm = ListedColormap(['r', 'g', 'b', 'c'])

    def setup_plot(cola, colb, c):
        dots = ax.scatter(cola, colb, c=c, alpha=1, s=20, cmap=cm)
        return dots

    l = setup_plot(cola[:5 * window], colb[:5 * window], c=preds[:5 * window])
    
    sfreq = Slider(axes[1], 'Start', 0, len(cola), valinit=0, facecolor=None, alpha=.1)
    if probabilities is not None:
        for prob, color in zip(probs.T, ['r', 'g', 'b', 'c']):
            axes[1].plot(pd.Series(prob).rolling(window=50).mean(), color=color, alpha=.6, linewidth=3)

    def update(val):
        start = int(sfreq.val)
        inds = range(start, start + window)
        l.set_offsets(np.c_[cola[inds], colb[inds]])
        l.set_array(preds[inds])
        #ax.canvas.draw_idle()
    sfreq.on_changed(update)

    ax.set_xlim(cola.min(), cola.max())
    ax.set_ylim(colb.min(), colb.max())
    plt.show()

In [18]:

x, y, _ = mask(data.rateC / data.rate, data.rate, p1=99, p2=99.99)
X = np.c_[x, y]


In [19]:
timeline_plot(X[:, 0], X[:, 1])

<IPython.core.display.Javascript object>

In [6]:
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3, n_init=5).fit(X)
probs = gmm.predict_proba(X)

In [20]:
plt.figure()
plt.scatter(X[:,0], X[:, 1], c=np.argmax(probs, axis=1))

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7faa51c7fdd8>

In [23]:
timeline_plot(X[:, 0], X[:, 1], probs)

<IPython.core.display.Javascript object>

In [24]:
from sklearn.mixture import GaussianMixture
gmm4 = GaussianMixture(n_components=4, n_init=5).fit(X)
probs = gmm4.predict_proba(X)
plt.figure()
plt.scatter(X[:,0], X[:, 1], c=np.argmax(probs, axis=1))

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7faa5173e3c8>

In [25]:
timeline_plot(X[:, 0], X[:, 1], probs)

<IPython.core.display.Javascript object>