# Cluster some Data
## do k-means clustering using Keras / Tensorflow
### this example uses the band-limited RMS of some seismometers
* http://learningtensorflow.com/lesson6/
* https://codesachin.wordpress.com/2015/11/14/k-means-clustering-with-tensorflow/
* http://napitupulu-jon.appspot.com/posts/kmeans-ud120.html
* https://www.datascience.com/blog/introduction-to-k-means-clustering-algorithm-learn-data-science-tutorials

In [None]:
%matplotlib inline

from __future__ import division

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy.constants as scc
from scipy.io import loadmat
import scipy.signal as sig
import sys
from timeit import default_timer as timer

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation
from sklearn.cluster import KMeans


# List of non-awful colors
cList = [
         (0.1, 0.1, 1.0, 0.9),
         (1.0, 0.1, 0.1, 0.9),
         (0, 0.7, 0, 0.9),
         (1.0, 0, 0.9, 0.9),
         (0.8, 0.8, 0, 0.9),
         (0, 0.6, 0.9, 0.9),
         (1, 0.5, 0, 0.9),
         (0.5, 0.5, 0.5, 0.9),
         (0.4, 0, 0.5, 0.9),
         (0, 0, 0, 0.9),
         (0.5, 0.3, 0, 0.9),
         (0, 0.3, 0, 0.9),
        ]

# Now alter my matplotlib parameters 
mpl.rcParams.update({'axes.color_cycle': cList,  # this is depreceated; use prop_cycle
                     'axes.grid': True,
                     'font.family': 'serif',
                     'font.size': 12,
                     #'font.serif': 'Palatino Linotype',
                     'grid.color': 'k',
                     'grid.linestyle': '-',
                     'grid.alpha': 0.2,
                     'grid.linewidth': 1,
                     'legend.borderpad': 0.2,
                     'legend.fancybox': True,
                     'legend.fontsize': 8,
                     'legend.framealpha': 0.7,
                     'legend.handletextpad': 0.1,
                     'legend.labelspacing': 0.2,
                     'legend.loc': 'best',
                     'lines.linewidth': 1.5,
                     'savefig.bbox': 'tight',
                     'savefig.pad_inches': 0.02,
                     'text.usetex': False,
                     'text.latex.preamble': r'\usepackage{txfonts}'
                     })
mpl.rc("savefig", dpi=200)
mpl.rc("figure", figsize = (5, 5))

ifo='H1'

In [None]:
data = loadmat('Data/H1_SeismicBLRMS_March.mat')
blrms = np.transpose(data['data'])
#channels = data['chans']
npts, nchans = blrms.shape
tt = np.arange(start=0, step=60, stop = npts*60)

In [None]:
model = tf.global_variables_initializer()

with tf.Session() as session:
    sample_values = session.run(samples)
    centroid_values = session.run(centroids)

### Plot the BLRMS minute trend of the seismic data

In [None]:
plt.figure(figsize=(10,4))
tdays = tt / 60 / 60 / 24
ntraces = 6
for zz in range(ntraces):
    plt.semilogy(tdays, blrms[:,zz], alpha=0.75,
        c = mpl.cm.spectral(int(256*(zz/ntraces))))

plt.xlabel('Time [days]')
plt.show()

In [None]:
random_state = 170
tic = timer()
y_pred = KMeans(n_clusters=10, random_state=random_state).fit_predict(blrms)
tdays = tt / 60 / 60 / 24
print(str(round(timer() - tic, 2)) + " seconds elapsed...")


In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(211)
plt.scatter(tdays, blrms[:,0], c=y_pred, alpha=0.5, s = 4)
plt.yscale('log')
plt.ylim(10,1e4)
#plt.xlabel('Time [days]')
plt.ylabel('Velocity [microns/sec]')

plt.subplot(212)
plt.scatter(tdays, blrms[:,1], c=y_pred, alpha=0.5, s = 4)
plt.yscale('log')
plt.ylim(1e2,5e3)
plt.ylabel('Velocity [microns/sec]')

plt.xlabel('Time [days]')

plt.show()


In [None]:
x = data['data']

In [None]:
list(data.keys())

In [None]:
data['chans']

In [None]:
y_pred.shape