In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import decomposition
from mpl_toolkits.mplot3d import Axes3D
import scipy.stats as stats
from sklearn import preprocessing
import os
import pickle

from ipywidgets import widgets
%matplotlib notebook

In [None]:
# search for csv location

found = []

def find(name,path):
    path = os.getcwd() + path
    file = name

    for root, dir, files in os.walk(path):
        for item in files:
            if file == item:
                return os.path.join(root, file)
        return None
data = ['00', '01', '02', '03', '05', '22', '23', '24', '25']
for d in data:
    found.append(find('data_' + d + '.csv.gz', '/irc-sphere-sleep-56db93f64661/sphere-sensor-data'))
    found.append(find('data_' + d + '.csv', '/irc-sphere-sleep-56db93f64661/sphere-sensor-data'))
found

In [None]:
# load file

df = []
for path in found:
    if path is not None:
        if 'gz' in path:
            fileName = path
            df.append(pd.read_csv(fileName, index_col='datetime', compression='gzip'))
        else: 
            fileName = path
            df.append(pd.read_csv(fileName, index_col='datetime'))

In [None]:
def getFeatures(df):

    # arm angle calculation using accelerometer data
    df['angle'] = 180 / np.pi * np.arctan(df['wearable-xl1-z'] / np.sqrt(np.square(df['wearable-xl1-x']) + np.square(df['wearable-xl1-z'])))

    # get only interesting columns
    df = df[['angle', 'wearable-mag-xl1']]

    # remove NaN values
    df = df.dropna()
    # convert index to DatetimeIndex
    df.index = pd.DatetimeIndex(df.index)


    # resample data within interval given in resampleInterval ('1S' = 1 second intervals)
    resampleInterval = '1S'
    minimum = df.resample(resampleInterval).min()
    maximum = df.resample(resampleInterval).max()
    mean = df.resample(resampleInterval).mean()
    std = df.resample(resampleInterval).std()
    summ = df.resample(resampleInterval).sum()
    skew = df.resample(resampleInterval).apply(lambda array : stats.skew(array))
    kurtosis = df.resample(resampleInterval).apply(lambda array: stats.kurtosis(array, fisher=True))

    # rename column labels in DataFrame
    minimum.rename(columns={'angle': 'angle min'}, inplace=True)
    minimum.rename(columns={'wearable-mag-xl1': 'magnitude min'}, inplace=True)
    
    maximum.rename(columns={'angle': 'angle max'}, inplace=True)
    maximum.rename(columns={'wearable-mag-xl1': 'magnitude max'}, inplace=True)
    
    mean.rename(columns={'angle': 'angle mean'}, inplace=True)
    mean.rename(columns={'wearable-mag-xl1': 'magnitude mean'}, inplace=True)
    
    std.rename(columns={'angle': 'angle std'}, inplace=True)
    std.rename(columns={'wearable-mag-xl1': 'magnutude std'}, inplace=True)
    
    summ.rename(columns={'angle': 'angle sum'}, inplace=True)
    summ.rename(columns={'wearable-mag-xl1': 'magnitude sum'}, inplace=True)
    
    skew.rename(columns={'angle': 'angle skew'}, inplace=True)
    skew.rename(columns={'wearable-mag-xl1': 'magnitude skew'}, inplace=True)
    
    kurtosis.rename(columns={'angle': 'angle kurtosis'}, inplace=True)
    kurtosis.rename(columns={'wearable-mag-xl1': 'magnitude kurtosis'}, inplace=True)
    
    # collect DataFrames
    features = [minimum, maximum, mean, std, summ, skew, kurtosis]
    features = pd.concat(features, axis=1)

    
    # for some weird reason new nans appear. 
    # need to drop them to have same shape on what to plot
    features = features.dropna()
    std = std.dropna()
    
    return [features, std, df]

In [None]:
# load features from data over all nights
allFeatures = []
allStd = []
for d in df:
    [feature, std, dataf] = getFeatures(d)
    allFeatures.append(feature)
    allStd.append(std)

In [None]:
# normalise the values between a 0 1 range
normalisedFeatures = []
for features in allFeatures:
    features_array = features.values #return a numpy array
    min_max_scalar = preprocessing.MinMaxScaler()
    normalise = min_max_scalar.fit_transform(features_array)
    normalised_features = pd.DataFrame(normalise)

    # rename index and columns
    normalised_features.columns = features.columns
    normalised_features.index = features.index

    features = normalised_features.copy(deep=True)
    normalisedFeatures.append(features)
normalisedFeatures[0]

In [None]:
pickle.dump(normalisedFeatures, open('allFeatures.pkl','wb'))

In [None]:
# save all data to pkl
data = pd.concat(normalisedFeatures, axis=0)
data.to_pickle('normalisedDays.pkl')


# Can start from here to load data

In [3]:
# load pickled data
data = pd.read_pickle('normalisedDays.pkl')
normalisedFeatures = pickle.load('allFeatures.pkl')

TypeError: file must have 'read' and 'readline' attributes

In [None]:
# cluster data
label = KMeans(n_clusters=5, random_state=10).fit_predict(data)

In [None]:
# plot PCA of clusters
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

plt.cla()
pca = decomposition.PCA(n_components=3)
pca.fit(data)
X = pca.transform(data)

# only needed for non int labels (I think)
# y = np.choose(label, [1, 2, 0]).astype(np.float)

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=label, cmap=plt.cm.spectral) # alt. cmap='spring'

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
plt.show()

In [None]:
# scatter of labelled data
plt.scatter(data.index, data['angle std'], c=label)
plt.colorbar()

# Train model and export features

In [None]:
# fit model to data
trainModel = KMeans(n_clusters=5, random_state=10).fit(data)

# this is the model that should be used to predict sleep/not

# TODO: drop doesnt work!

In [None]:
# collect features for the sleep data of each day
sleepFeatures = []
for feature in normalisedFeatures:
    label = trainModel.predict(feature)
    
    # merge clusters
    label[label == 0] = 0
    label[label == 1] = 0
    label[label == 2] = 0
    label[label == 3] = 0
    label[label == 4] = 1
    
    # drop awake data
    for i,n in enumerate(label):
        if(n == 1):
            feature.drop(feature.index[i])
    feat = np.concatenate((feature.mean(), feature.std(), feature.skew(), feature.kurt()), axis=0)
    sleepFeatures.append(feat)
    
    normalisedFeatures['label'] = label

In [None]:
# export sleep features
sleepFeatures=np.asarray(sleepFeatures)
np.save('sleepFeatures.npy', sleepFeatures, allow_pickle=True)

# TODO: map label to timestamp