In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
from sklearn import decomposition
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
import os
import pickle

%matplotlib notebook

In [2]:
# load sleep features
allFeatures = np.load('sleepFeatures.npy')

In [3]:
# normalise the values between a 0 1 range
normalisedFeatures = []
for features in allFeatures:
    min_max_scalar = preprocessing.MinMaxScaler()
    normalise = min_max_scalar.fit_transform(features)
    normalisedFeatures.append(normalise)



In [4]:
# search for csv location

found = []

def find(name,path):
    path = os.getcwd() + path
    file = name

    for root, dir, files in os.walk(path):
        for item in files:
            if file == item:
                return os.path.join(root, file)
        return None

In [5]:
# load sleep diaries

path = find('Sleep Data Research - ASD Morning.csv','/irc-sphere-sleep-56db93f64661/Sphere-csv-data/')

if path is not None:
    if 'gz' in path:
        fileName = path
        data = pd.read_csv(path, skiprows=1, index_col='Participant', compression='gzip')
    else: 
        data = pd.read_csv(path, skiprows=1, index_col='Participant')
data

Unnamed: 0_level_0,Date,Time in Bed,Time to Sleep,Time to fall asleep (minutes),Wake up time,Time to get up,Quality of Sleep,How Refreshed,Normal Night,Comments
Participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,01/06,23:00,23:30,5,07:00,07:00,Poor,Somewhat,Yes,-
1,02/06,23:30,00:00,5,06:30,06:30,Fair,Somewhat,Yes,-
1,03/06,23:00,23:30,5,07:00,07:00,Fair,Somewhat,Yes,-
14,20/04,23:00,23:10,60,08:20,08:25,Fair,Refreshed,No,woken by very noisy birds in the morning
14,21/04,22:30,23:00,60,07:55,08:00,Fair,Refreshed,No,Car alarm went off in the night
14,22/04,23:30,00:30,30,08:50,08:50,Good,Refreshed,Yes,
14,23/04,23:30,01:30,20,08:40,10:00,Very Good,Refreshed,Yes,
14,24/04,23:00,01:30,15,10:00,10:15,Very Good,Refreshed,Yes,
14,25/04,23:00,01:30,60,08:20,08:20,Good,Refreshed,Yes,
14,26/07,23:30,23:30,15,08:00,08:00,Very Good,Somewhat,Yes,


In [6]:
# convert 'Quality of Sleep' to numeric labels
allLabels = []
for d in data['Quality of Sleep']:
    if(d == 'Poor'):
        allLabels.append(0)
    elif(d == 'Fair'):
        allLabels.append(1)
    elif(d == 'Good'):
        allLabels.append(2)
    else:
        allLabels.append(3)

# get only labels for trainingData
labels = []
for i in range(len(normalisedFeatures)):
    labels.append(allLabels[i])

In [7]:
# fit data to labels
knn = KNeighborsClassifier() # amount of nearest neighbors  =1
knn.fit(normalisedFeatures, labels)

spectral = SpectralClustering()
spectral.fit(normalisedFeatures, labels)

SpectralClustering(affinity='rbf', assign_labels='kmeans', coef0=1, degree=3,
          eigen_solver=None, eigen_tol=0.0, gamma=1.0, kernel_params=None,
          n_clusters=8, n_init=10, n_jobs=1, n_neighbors=10,
          random_state=None)

In [14]:
# make PCA (dimensionality reduction) to 3D and plot
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

plt.cla()
pca = decomposition.PCA(n_components=3)
pca.fit(normalisedFeatures)
X = pca.transform(normalisedFeatures)

# only needed for non int labels (I think)
# y = np.choose(label, [1, 2, 0]).astype(np.float)

plott = ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels, cmap=plt.cm.spectral) # alt. cmap='spring'

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
fig.colorbar(plott, ax=ax)
plt.show()

<IPython.core.display.Javascript object>