In [None]:
import deepwaveform as dwf
import numpy as np
import torch
from sklearn.cluster import KMeans

In [None]:
targets = ["../data/strip_137.csv", "../data/strip_139.csv"]

model_conv = dwf.ConvNet(output_dimension=2)
model_conv.load_state_dict(torch.load("trained_models/classifier.pt"))
model_conv.eval()
model_ae = dwf.AutoEncoder(hidden=12)
model_ae.load_state_dict(torch.load("trained_models/autoencoder.pt"))
model_ae.eval()

cluster_centers = np.genfromtxt('trained_models/cluster_centers.csv', delimiter=';')    
kmeans = KMeans(n_clusters=5)
kmeans.cluster_centers_ = cluster_centers

CS = 10**5
for target in targets:
    print("annotating %s" % target)
    for idx, chunk in enumerate(dwf.load_dataset(target, chunksize=CS)):
        model_conv.annotate_dataframe(chunk,
                                      class_label_mapping=["Land", "Water"],
                                      predicted_column="Predicted")
        model_ae.annotate_dataframe(chunk)

        mat_hidden = dwf.waveform2matrix(chunk, wv_cols=["hidden_%d" % idx for idx in range(12)])
        pred = kmeans.predict(mat_hidden)
        chunk["Cluster"] = pred

        # drop columns
        chunk.drop([str(idx) for idx in range(296)], axis=1, inplace=True)
        chunk.drop(["hidden_" + str(idx) for idx in range(12)], axis=1, inplace=True)
        chunk.drop(["reconstr_" + str(idx) for idx in range(64)], axis=1, inplace=True)
        chunk.to_csv(target[:-4]+"_annotated.csv", header=(idx==0), sep=";", mode='a')
        
        print("annotated %d waveforms..." % ((idx+1)*CS))
    print()

In [None]:
df = next(dwf.load_dataset("../data/strip_139_annotated.csv", samplesize=100000))

In [None]:
dwf.plot_pcl(df, None, targetcol="Cluster", use_plotly=True, inv_z=False)