# Import libraries

In [1]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import pandas as pd
import numpy as np
from IPython.display import SVG, display, Markdown
from datetime import datetime
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
from keras.utils import np_utils
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score
import pickle

Using TensorFlow backend.


# Load training dataset

In [2]:
train_dataframe = pd.read_csv('dataset/2. oxford/parkinsons.data')

# Visualize some training data

In [3]:
train_dataframe.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


# Prepare inputs

In [4]:
x = train_dataframe.drop(['name', 'status'], axis=1)
x = x.as_matrix()
print(x.shape)

(195, 22)


# Prepare labels

In [5]:
y = train_dataframe['status']
y = y.as_matrix()
print(y.shape)
y = np_utils.to_categorical(y)
print(y.shape)

(195,)
(195, 2)


# Normalize data

In [6]:
mins = np.min(x, axis=0)
maxs = np.max(x, axis=0)
rng = maxs - mins
x = 1.0 - (((1.0 - 0.0) * (maxs - x)) / rng)
pd.DataFrame(x).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,0.184308,0.112592,0.054815,0.19568,0.249012,0.145472,0.247588,0.145288,0.312215,0.280197,...,0.172448,0.332584,0.068307,0.511745,0.369155,0.960148,0.569875,0.585765,0.390661,0.49731
1,0.198327,0.09493,0.278323,0.25413,0.288538,0.191233,0.323687,0.191042,0.472887,0.444536,...,0.279424,0.516048,0.059331,0.432577,0.47083,0.977024,0.703277,0.741337,0.473145,0.671326
2,0.165039,0.059128,0.265288,0.280178,0.328063,0.229287,0.369239,0.229411,0.390634,0.326212,...,0.219848,0.443317,0.039596,0.49622,0.404416,1.0,0.636745,0.686371,0.408819,0.596682
3,0.165004,0.072927,0.2642,0.263342,0.328063,0.209056,0.324759,0.208862,0.414278,0.354971,...,0.233785,0.475478,0.040997,0.495936,0.416255,0.975885,0.695627,0.738089,0.436977,0.671949
4,0.16115,0.080909,0.260107,0.354511,0.407115,0.282755,0.437299,0.28287,0.499452,0.410025,...,0.286852,0.584542,0.054174,0.455499,0.375159,0.992813,0.762472,0.513798,0.404336,0.757611


# Shuffle data

In [7]:
x, y = shuffle(x, y)
pd.DataFrame(y).head()

Unnamed: 0,0,1
0,1.0,0.0
1,1.0,0.0
2,0.0,1.0
3,0.0,1.0
4,1.0,0.0


# Split data

In [8]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=1)
for train_index, test_index in sss.split(x, y):
    X_train, X_test = x[train_index], x[test_index]
    Y_train, Y_test = y[train_index], y[test_index]
    
print('x_train: ' + str(X_train.shape))
print('y_train: ' + str(Y_train.shape))
print('x_test: ' + str(X_test.shape))
print('y_test: ' + str(Y_test.shape))

x_train: (136, 22)
y_train: (136, 2)
x_test: (59, 22)
y_test: (59, 2)


# Tuning parameters

In [9]:
# Folder paths to store outputs.
root_directory = 'dataset/8. oxford-normalized-30/'
pickle.dump(X_train, open(root_directory + 'x_train.p', "wb"))
pickle.dump(X_test, open(root_directory + 'x_test.p', "wb"))
pickle.dump(Y_train, open(root_directory + 'y_train.p', "wb"))
pickle.dump(Y_test, open(root_directory + 'y_test.p', "wb"))

# The End