# Sleep Detection Module - SAATHI

## Data Preparation

In [None]:
!pip install gdown
!pip install numpy==1.16.1
!pip install pandas==0.25.1

In [None]:
import os
import numpy as np
import glob
import pandas as pd
from scipy import stats
import random

In [None]:
#Setting seed for reproducibilty
random.seed(272)

In [None]:
# Download Data
!gdown --id 19yYNrxwpBOm6Eg76yUpcdmudqyKfOEY6 

In [None]:
# Unzip Data
!unzip ICHI14_dataset.zip -d './ICHI14_dataset'

In [None]:
# Removing stray data file
os.remove('./ICHI14_dataset/data/pat_inf.npy')

In [None]:
#Loading filepaths
filepaths = glob.glob('./ICHI14 dataset/data/*')
total_subjects = len(filepaths)
#Splitting data into train and test (80% - 20%)
train_num = int(0.8*total_subjects)

In [None]:
# Creating training split
df_train = pd.DataFrame(columns = ['timestamp','d','x','y','z','l','gt'])
for i in range(train_num):
  x = pd.DataFrame(np.load(filepaths[i]),columns = ['timestamp','d','x','y','z','l','gt'], index=None)
  df_train = df_train.append(x)
df_train = df_train.drop(columns = ['d','timestamp','l'])

In [None]:
# Creating test split
df_test = pd.DataFrame(columns = ['timestamp','d','x','y','z','l','gt'])
for i in range(train_num,total_subjects):
  x = pd.DataFrame(np.load(filepaths[i]),columns = ['timestamp','d','x','y','z','l','gt'], index=None)
  df_test = df_test.append(x)
df_test = df_test.drop(columns = ['d','timestamp','l'])


In [None]:
#Function for windowing for time series data
def windowing(dataset,window_size = 100):
  window = window_size * (dataset.shape[1]-1)
  cut = dataset.shape[0] % window_size
  feature = dataset[:-cut,0:-1]
  label = dataset[:-cut,-1]
  label = label.reshape(label.size//window_size,window_size)
  label_f =np.zeros(label.shape[0],)
  for mm in range(label.shape[0]):
    label_f[mm] = int(stats.mode(label[mm])[0][0])
  feature = feature.ravel().reshape(feature.size//window,window)
  return feature, label_f.astype(int)


In [None]:
#Function for preparing data
def prepare_data(dataset):
  dataset['pitch'] = dataset.apply(lambda row: np.arctan(-row.x/np.sqrt( row.y ** 2 + row.z ** 2+0.0001**2)), axis=1)
  dataset['roll'] = dataset.apply(lambda row: np.arctan(row.y/ (row.z+0.001)), axis=1)
  dataset = dataset[dataset['gt']!=0]  #Removing datapoints for which the label was 
  #1-5 for sleep ; 6-7 for awake
  mapping = {1:0, 2:0, 3:0, 5:1, 6:2, 7:2}
  dataset['labels'] = dataset['gt'].map(mapping)
  dataset = dataset.drop(columns = ['gt'])
  return windowing(np.array(dataset))

In [None]:
#Preparing data
X_train, Y_train = prepare_data(df_train)
X_test, Y_test = prepare_data(df_test)


In [None]:
#Saving processed data
np.savez('ICHI14_test', inputs=X_test, targets=Y_test)
np.savez('ICHI14_train', inputs=X_train, targets=Y_train)

## Training

In [None]:
from sklearn import preprocessing
from math import sqrt
import pandas as pd
import numpy as np
import glob
import os
import time
from xgboost import XGBClassifier
import pickle
import random
from sklearn.decomposition import PCA   

In [None]:
#Loading data
npz = np.load("ICHI14_train.npz")
train_inputs = preprocessing.scale(npz["inputs"].astype(np.float))
train_targets = npz["targets"].astype(np.int)


npz = np.load("ICHI14_test.npz")
test_inputs = preprocessing.scale(npz["inputs"].astype(np.float))
test_targets = npz["targets"].astype(np.int)

In [None]:
#Applying PCA
pca=PCA()                                  
pca.fit(train_inputs)                                           
train_inputs=pca.transform(train_inputs)
test_inputs=pca.transform(test_inputs)

In [None]:
#Training using XG Boost algorithm
model = XGBClassifier(n_estimators=1000, random_state=272,max_depth = 7, tree_method = "gpu_hist", scale_pos_weight=99)
eval_metric = ["error"]
model.fit(train_inputs, train_targets, eval_metric=eval_metric, verbose=False)

#Saving model
file_name = "xgb_model.pkl"
pickle.dump(model, open(file_name, "wb"))

## Evaluation

In [None]:
from sklearn.metrics import make_scorer, accuracy_score,precision_recall_fscore_support

In [None]:
#Evaluating model
pred_test = model.predict(test_inputs)
print('Test Accuracy: ', accuracy_score(test_targets, pred_test))
metrics_m= precision_recall_fscore_support(test_targets, pred_test,average = 'weighted')
print('Test Precision:',metrics_m[0])
print('Test Recall:',metrics_m[1])
print('Test F1:',metrics_m[2])
