Qifanc@usc.edu
Qifan Chen
9166932624

In [119]:
import numpy as np
import pandas as pd
from metric_learn import MLKR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SequentialFeatureSelector
from scipy.special import logsumexp
from sklearn.metrics import r2_score
from sklearn.neural_network import MLPRegressor

(a) Choose 70% of the data randomly as the training set.

In [93]:
data_path = "../data/parkinsons_updrs.data"
df = pd.read_csv(data_path).drop(columns=['subject#'])
X = df[:].drop(columns=['motor_UPDRS', 'total_UPDRS'])
y = df[['motor_UPDRS', 'total_UPDRS']]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7,
                                                    random_state=1)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((4112, 19), (1763, 19), (4112, 2), (1763, 2))

In [94]:
scaler = StandardScaler().fit(X_train)
X_train_sd = scaler.transform(X_train)
y_train = np.array(y_train)
X_test_sd = scaler.transform(X_test)
y_test = np.array(y_test)

(b) Use metric learning with Gaussian kernels to estimate each of the outputs motor UPDRS and total UPDRS from the features. 

In [57]:
class MLKR_P(MLKR):
    '''subclass of MLKR implementing predict() function
    '''
    def __init__(self, n_components=None, init='auto',verbose=False,
               max_iter=1000, random_state=None):
        super(MLKR_P, self).__init__(n_components=n_components, init=init,
            verbose=verbose, max_iter=max_iter, random_state=random_state)
        self.X_transform = None
        self.y_ = None

    def fit(self, X, y):
        super(MLKR_P, self).fit(X, y)
        self.X_transform = super(MLKR_P, self).transform(X)
        self.y_ = y

        return self

    def predict(self, X):
        X_embedded = super(MLKR_P, self).transform(X)
        dist = self.get_dist(X_embedded)
        softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis])
        yhat = softmax.dot(self.y_)

        return yhat

    def get_dist(self, X_embedded):
        n_pre_samples = X_embedded.shape[0]
        n_train_samples = self.X_transform.shape[0]
        dist = np.empty((n_pre_samples, n_train_samples))
        for i in range(n_pre_samples):
            dist[i] = np.square(self.X_transform - X_embedded[i]).sum(axis=1)

        return dist


In [43]:
# Below code will spend over 30 minutes
# I will not run again
p = len(X_train.columns)
M = [5, 10, 15, p]
cv = KFold(n_splits=5, shuffle=True, random_state=1)
score0 = []
score1 = []
for m in M:
    mlkr = MLKR_P(n_components=m, init='pca')
    cvs = cross_val_score(mlkr, X_train_sd, y_train[:, 0], cv=cv,
                          scoring='neg_mean_squared_error', n_jobs=-1)
    score0.append(np.mean(cvs))
    cvs = cross_val_score(mlkr, X_train_sd, y_train[:, 1], cv=cv,
                          scoring='neg_mean_squared_error', n_jobs=-1)
    score1.append(np.mean(cvs))

max_n0 = M[np.argmax(score0)]
max_n1 = M[np.argmax(score1)]

In [53]:
# Here is the result of max_n0, max_n1
print("Best number of components form for motor_UPDRS is", 5)
print("Best number of components form for total_UPDRS is", 15)

Best number of components form for motor_UPDRS is 5
Best number of components form for total_UPDRS is 15


In [120]:
# motor_UPDRS
# Here code runs for 15 minutes
print("Results for motor_UPDRS:")
mlkr0_final = MLKR_P(n_components=5, init='pca')
mlkr0_final.fit(X_train_sd, y_train[:,0])
y_train_pred0 = mlkr0_final.predict(X_train_sd)
print("Training R2 :", r2_score(y_train[:, 0], y_train_pred0))
y_test_pred0 = mlkr0_final.predict(X_test_sd)
print("Test R2 :",r2_score(y_test[:, 0], y_test_pred0))

Results for motor_UPDRS:
Training R2 : 0.9998441778740059
Test R2 : 0.9766675987194592


In [121]:
# total_UPDRS
# Here code runs for 15 minutes
print("Results for total_UPDRS:")
mlkr1_final = MLKR_P(n_components=15, init='pca')
mlkr1_final.fit(X_train_sd, y_train[:,1])
y_train_pred1 = mlkr1_final.predict(X_train_sd)
print("Training R2 :", r2_score(y_train[:, 1], y_train_pred1))
y_test_pred1 = mlkr1_final.predict(X_test_sd)
print("Test R2 :",r2_score(y_test[:, 1], y_test_pred1))

Results for total_UPDRS:
Training R2 : 0.999999994234005
Test R2 : 0.9797613186757309


(c) Use sklearn’s neural network implementation to train a neural network with two outputs that predicts motor UPDRS and total UPDRS. U

In [97]:
regr = MLPRegressor(hidden_layer_sizes=100, activation='relu',
                    solver='adam', batch_size=200, alpha=0.0001,
                    learning_rate='constant', learning_rate_init=0.001,
                    random_state=2, max_iter=2000, verbose=False, 
                    n_iter_no_change=15, early_stopping=False, tol=1e-5)
regr.fit(X_train_sd, y_train);

In [98]:
# motor_UPDRS
print("Results for motor_UPDRS:")
y_train_pred0 = regr.predict(X_train_sd)[:, 0]
print("Training R2 :", r2_score(y_train[:, 0], y_train_pred0))
y_test_pred0 = regr.predict(X_test_sd)[:, 0]
print("Test R2 :",r2_score(y_test[:, 0], y_test_pred0))

Results for motor_UPDRS:
Training R2 : 0.7953321040706718
Test R2 : 0.6976605014174815


In [99]:
# total_UPDRS
print("Results for total_UPDRS:")
y_train_pred1 = regr.predict(X_train_sd)[:, 1]
print("Training R2 :", r2_score(y_train[:, 1], y_train_pred1))
y_test_pred1 = regr.predict(X_test_sd)[:, 1]
print("Test R2 :",r2_score(y_test[:, 1], y_test_pred1))

Results for total_UPDRS:
Training R2 : 0.8052774399767753
Test R2 : 0.6999590238328346


(d) Use the design parameters that you chose in the first part and train a neural network, but this time set early-stopping=True.

In [116]:
regr = MLPRegressor(hidden_layer_sizes=100, activation='relu',
                    solver='adam', batch_size=200, alpha=0.0001,
                    learning_rate='constant', learning_rate_init=0.001,
                    random_state=2, max_iter=2000, verbose=False, 
                    n_iter_no_change=15, early_stopping=True, 
                    validation_fraction=0.1, tol=1e-5)
regr.fit(X_train_sd, y_train);

In [117]:
# motor_UPDRS
print("Results for motor_UPDRS:")
y_train_pred0 = regr.predict(X_train_sd)[:, 0]
print("Training R2 :", r2_score(y_train[:, 0], y_train_pred0))
y_test_pred0 = regr.predict(X_test_sd)[:, 0]
print("Test R2 :",r2_score(y_test[:, 0], y_test_pred0))

Results for motor_UPDRS:
Training R2 : 0.6925977384852824
Test R2 : 0.6411714212494166


In [118]:
# total_UPDRS
print("Results for total_UPDRS:")
y_train_pred1 = regr.predict(X_train_sd)[:, 1]
print("Training R2 :", r2_score(y_train[:, 1], y_train_pred1))
y_test_pred1 = regr.predict(X_test_sd)[:, 1]
print("Test R2 :",r2_score(y_test[:, 1], y_test_pred1))

Results for total_UPDRS:
Training R2 : 0.6816609468761714
Test R2 : 0.6190416660988958
