In [50]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [60]:
df = pd.read_csv("data/labeled_test.csv", index_col=0)
df.head()

Unnamed: 0_level_0,Jitter (local),Jitter (local absolute),Jitter (rap),Jitter (ppq5),Jitter (ddp),Shimmer (local),Shimmer (local dB),Shimmer (apq3),Shimmer (apq5),Shimmer (apq11),...,Minimum pitch,Maximum pitch,Number of periods,Mean period,Standard deviation of period,Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks,UNKNOWN,Class
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.135,7e-06,0.067,0.078,0.202,2.033,0.178,1.074,1.336,1.576,...,184.502,187.88,183,182,0.005368,2.5e-05,0,0,0,1
1,0.143,7e-06,0.073,0.081,0.219,1.236,0.107,0.612,0.904,0.894,...,198.665,202.214,194,193,0.004988,2e-05,0,0,0,1
1,0.162,8e-06,0.087,0.089,0.26,1.338,0.117,0.63,0.948,1.017,...,197.22,206.06,198,197,0.00494,4.6e-05,0,0,0,1
1,0.14,7e-06,0.075,0.089,0.224,1.086,0.094,0.556,0.747,0.823,...,202.324,206.182,200,199,0.0049,2.3e-05,0,0,0,1
1,0.15,7e-06,0.08,0.097,0.24,1.049,0.091,0.533,0.698,0.777,...,205.407,209.927,204,203,0.00482,2.2e-05,0,0,0,1


In [52]:
# features 1-5: Jitter (local),Jitter (local, absolute),Jitter (rap),Jitter (ppq5),Jitter (ddp), 
# features 6-11: Shimmer (local),Shimmer (local, dB),Shimmer (apq3),Shimmer (apq5), Shimmer (apq11),Shimmer (dda), 
# features 12-14: AC,NTH,HTN, 
# features 15-19: Median pitch,Mean pitch,Standard deviation,Minimum pitch,Maximum pitch, 
# features 20-23: Number of pulses,Number of periods,Mean period,Standard deviation of period,
# features 24-26: Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks 
# column 28: UPDRS 
# column 29: class information 

# Each subject has 26 voice samples including sustained vowels, numbers, words and short 
# sentences. The voice samples in the training data file are given in the 
# following order: 

# sample# - corresponding voice samples 
# 1: sustained vowel (aaa) 
# 2: sustained vowel (ooo) 
# 3: sustained vowel (uuu) 
# 4-13: numbers from 1 to 10 
# 14-17: short sentences 
# 18-26: words 

# print(df.columns.values)
# print(df.get_value(1, "Class"))

data = df.values
X_train = data[:,0:-3]
y_train = data[:,-2] # UPDRS

NUM_FEATURES = X_train.shape[1]


In [53]:
def create_model():
    model = Sequential()
    # single hidden layer between in and out
    model.add(Dense(NUM_FEATURES, input_dim=NUM_FEATURES, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal')) # predict UPDRS
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [54]:
seed = 7
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=create_model, epochs=50, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

KeyboardInterrupt: 

In [None]:
# define the model
def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(NUM_FEATURES, input_dim=NUM_FEATURES, kernel_initializer='normal', activation='relu'))
    model.add(Dense(10, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [None]:
seed = 7
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=50, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))


In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential()
model.add(Dense(NUM_FEATURES,input_dim=NUM_FEATURES,init='uniform', activation='linear'))
model.add(Dense(1,init='uniform', activation='linear'))
model.compile(loss='mse', optimizer='rmsprop')

model.fit(X_train, y_train, nb_epoch=1000, batch_size=26,verbose=1)
# model.fit(X_train, y_train, nb_epoch=1, batch_size=26,verbose=1)
# score = model.evaluate(X_test, y_test, batch_size=16)