In [1]:
import os
import numpy 
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import *
from keras.layers import *
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import *
from sklearn.linear_model import LinearRegression

Using TensorFlow backend.


In [2]:
class PreprocessedData():
    def __init__(self):
        self.tappy_data = self.create_tappy_dict()
        self.patient_data = self.create_patient_dict()
    def get_tappy_data(self):
        return self.tappy_data
    
    def get_patient_data(self):
        return self.patient_data
        
    # Create a dictionry of all patient data
    def create_patient_dict(self):
        archived_users = os.listdir('../Archived users')
        all_patients_demographic_data = {}
        for i in archived_users:
            all_patients_demographic_data[i[5:15]] = self.clean_demographic_data(i)
        
        return all_patients_demographic_data

    # Create a dictionary of all tappy data
    def create_tappy_dict(self):
        all_patient_tappy_data = {}
        tappy_data = os.listdir('../Tappy Data')
        for i in tappy_data:
            all_patient_tappy_data[tuple([i[0:10],i[12:16]])] = self.clean_tappy_data(i)
        
        return all_patient_tappy_data
    def clean_demographic_data(self, file_name):
        first= open('../Archived users/' + file_name, 'r')
        lines = first.readlines()
        dictionary = {}
        for i in lines:
            value = i[i.index(':')+1:].strip(' ')
            key = i[:i.index(':')+1].strip(' ')
            if value[0] == '\n' or value[0] == '-':
                dictionary[key] = None
            else:
                dictionary[key] = value.strip('\n')
        return dictionary
    def clean_tappy_data(self, file_name):
        second= open('../Tappy Data/' + file_name)
        lines = second.readlines()
        dictionary = {}
        mean_flight_time = 0
        mean_latency_time = 0
        mean_hold_time = 0
        num_data_points = 0
        for i in lines:
            vals = i.split('\t')
            
            if (len(vals[4]) == 6 and len(vals[6]) == 6 and len(vals[7]) == 6):
                mean_flight_time += float(vals[4])
                mean_latency_time += float(vals[6])
                mean_hold_time += float(vals[7])
                num_data_points += 1
            
            
        if (num_data_points > 0):
            dictionary['mean_flight_time'] = mean_flight_time/num_data_points
            dictionary['mean_latency_time'] = mean_latency_time/num_data_points
            dictionary['mean_hold_time'] = mean_hold_time/num_data_points
        return dictionary

In [3]:
class CreateLinearRegressionModels():
    def __init__(self):
        self.processed_data = PreprocessedData()
        self.patient_data = self.processed_data.get_patient_data()
        self.tappy_data = self.processed_data.get_tappy_data()
    
    def create_single_regressions(self):
    # Create all possible combinations of single regressions
    # Find the most accurate of all of them using mean squared error
        x1 = []
        x2 = []
        x3 = []
        y1= []
        y2 = []
        y3 = []
        
        for i in self.tappy_data:
            if i[0] in self.patient_data and 'mean_flight_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y1.append(1)
                else:
                    y1.append(0)
                
                x1.append(self.tappy_data[i]['mean_flight_time'])
            
            if i[0] in self.patient_data and 'mean_latency_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y2.append(1)
                else:
                    y2.append(0)
                x2.append(self.tappy_data[i]['mean_latency_time'])
            
            if i[0] in self.patient_data and 'mean_hold_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y3.append(1)
                else:
                    y3.append(0)
                x3.append(self.tappy_data[i]['mean_hold_time'])
        self.perform_singular_linear_regression([x1,y1])
        self.perform_singular_linear_regression([x2,y2])
        self.perform_singular_linear_regression([x3,y3])
    
    def perform_singular_linear_regression(self, input_output):
        
        x = numpy.array(input_output[0]).reshape((-1,1))
        y = numpy.array(input_output[1])
        model = LinearRegression().fit(x,y)
        print(model.score(x,y))
        return model
    
    def perform_multiple_linear_regression(self, input_output):
        # Perform multilple linearn regressions
        # Given a 2D arrat x and an output y (using 30% traingin data)
        x = numpy.array(input_output[0])
        y = numpy.array(input_output[1])
        model = LinearRegression().fit(x,y)
        print(model.score(x,y))
        return model
       
    def find_coefficient_of_determination(self, model, x, y):
        # Find the mean squared error of a model given model attributes
        # assume 30% testing data
        print(model.score(x,y))
        return model.score(x,y)
    
    def create_multiple_regressions(self):
        # Create all possible multiple regression models
        # Used mean squared error to find the most effective
        x1_x2 = []
        x2_x3 = []
        x1_x3 = []
        x1_x2_x3 = []

        y1_y2 = []
        y2_y3 = []
        y1_y3 = []
        y1_y2_y3 = []

    
        for i in self.tappy_data:
            if i[0] in self.patient_data and 'mean_flight_time' in self.tappy_data[i] and 'mean_latency_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y1_y2.append(1)
                else:
                    y1_y2.append(0)
                
                x1_x2.append([self.tappy_data[i]['mean_flight_time'],self.tappy_data[i]['mean_latency_time']])
            
            
            if i[0] in self.patient_data and 'mean_flight_time' in self.tappy_data[i] and 'mean_hold_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y2_y3.append(1)
                else:
                    y2_y3.append(0)
                x2_x3.append([self.tappy_data[i]['mean_flight_time'],self.tappy_data[i]['mean_hold_time']])
        
            if i[0] in self.patient_data and 'mean_latency_time' in self.tappy_data[i] and 'mean_hold_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y1_y3.append(1)
                else:
                    y1_y3.append(0)
                x1_x3.append([self.tappy_data[i]['mean_latency_time'],self.tappy_data[i]['mean_hold_time']])
            
            if i[0] in self.patient_data and 'mean_latency_time' in self.tappy_data[i] and 'mean_hold_time' in self.tappy_data[i] and 'mean_flight_time' in self.tappy_data[i]: 
                if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                    y1_y2_y3.append(1)
                else:
                    y1_y2_y3.append(0)
                x1_x2_x3.append([self.tappy_data[i]['mean_latency_time'],self.tappy_data[i]['mean_hold_time'], self.tappy_data[i]['mean_flight_time']])
                
                
                
        self.perform_multiple_linear_regression([x1_x2,y1_y2])
        self.perform_multiple_linear_regression([x2_x3,y2_y3])
        self.perform_multiple_linear_regression([x1_x3,y1_y3])
        self.perform_multiple_linear_regression([x1_x2_x3,y1_y2_y3])

In [22]:
    class CreateKerasClassifier():
        def __init__(self):
            self.processed_data = PreprocessedData()
            self.patient_data = self.processed_data.get_patient_data()
            self.tappy_data = self.processed_data.get_tappy_data()


        def model_with_deep_learning(self):
            from sklearn.preprocessing import StandardScaler


            x1_x2_x3 = []
            y1_y2_y3 = []
            for i in self.tappy_data:
                if i[0] in self.patient_data and 'mean_latency_time' in self.tappy_data[i] and 'mean_hold_time' in self.tappy_data[i] and 'mean_flight_time' in self.tappy_data[i]: 
                    if self.patient_data[i[0]]['Parkinsons:'] == 'True':
                        y1_y2_y3.append(1)
                    else:
                        y1_y2_y3.append(0)
                    x1_x2_x3.append([self.tappy_data[i]['mean_latency_time'],self.tappy_data[i]['mean_hold_time'], self.tappy_data[i]['mean_flight_time']])

            X = numpy.array(x1_x2_x3)
            Y = numpy.array(y1_y2_y3)

 
            #Model is defined to active when called by Keras
            def create_model():
                # create model
                keras_model = Sequential()
                
                #input_dim is set to the number of input params in this case 3
                # Using normal  kernel init and relu activation
                keras_model.add(Dense(8, input_dim = 3, kernel_initializer='normal', activation='relu'))
                keras_model.add(Dense(4, input_dim = 3, kernel_initializer='normal', activation='relu'))
                keras_model.add(Dense(1, activation='sigmoid'))

                #Configure the learning rate of the model
                adam = Adam(lr = 0.01)
                keras_model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])

                return keras_model
                

            
            model = create_model()
            
            #Get summarization of the model
            model_summary = model.summary()
            
            seed= 6
            
            #Generate random seed
            np.random.seed(seed)

            
            #Create KerasClassifier 
            model = KerasClassifier(build_fn= create_model, verbose=0)

            #Define grid search parameter
            batch_size= [10,20,40]

            epochs = [10,50,100]

            #need a dictionary of the grid search

            param_grid= dict(batch_size=batch_size, epochs=epochs)

            #build and fitGridSearch

            grid= GridSearchCV(estimator = model, param_grid= param_grid, 
                              cv= KFold(random_state=seed), verbose = 10)
            
            scaler=StandardScaler().fit(X)

            X_standardized= scaler.transform(X)

            grid_results= grid.fit(X_standardized, Y)


            #Summarize results of the network
            
            print(grid_results.best_score_)
            print(grid_results.best_params_)

            return [grid_results.best_score_,grid_results.best_params_]
                

In [23]:
models = CreateKerasClassifier().model_with_deep_learning()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 8)                 32        
_________________________________________________________________
dense_29 (Dense)             (None, 4)                 36        
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 5         
Total params: 73
Trainable params: 73
Non-trainable params: 0
_________________________________________________________________
None
Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] batch_size=10, epochs=10 ........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.



[CV]  batch_size=10, epochs=10, score=0.7526881694793701, total=   3.0s
[CV] batch_size=10, epochs=10 ........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.0s remaining:    0.0s


[CV]  batch_size=10, epochs=10, score=0.7903226017951965, total=   2.5s
[CV] batch_size=10, epochs=10 ........................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    5.5s remaining:    0.0s


[CV]  batch_size=10, epochs=10, score=0.7526881694793701, total=   2.7s
[CV] batch_size=10, epochs=50 ........................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    8.3s remaining:    0.0s


[CV]  batch_size=10, epochs=50, score=0.7526881694793701, total=   5.1s
[CV] batch_size=10, epochs=50 ........................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   13.4s remaining:    0.0s


[CV]  batch_size=10, epochs=50, score=0.7903226017951965, total=   5.5s
[CV] batch_size=10, epochs=50 ........................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   18.9s remaining:    0.0s


[CV]  batch_size=10, epochs=50, score=0.7526881694793701, total=   6.1s
[CV] batch_size=10, epochs=100 .......................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   25.0s remaining:    0.0s


[CV]  batch_size=10, epochs=100, score=0.7526881694793701, total=   9.1s
[CV] batch_size=10, epochs=100 .......................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   34.2s remaining:    0.0s


[CV]  batch_size=10, epochs=100, score=0.7903226017951965, total=   9.1s
[CV] batch_size=10, epochs=100 .......................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   43.3s remaining:    0.0s


[CV]  batch_size=10, epochs=100, score=0.7526881694793701, total=   9.1s
[CV] batch_size=20, epochs=10 ........................................


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:   52.4s remaining:    0.0s


[CV]  batch_size=20, epochs=10, score=0.7526881694793701, total=   2.6s
[CV] batch_size=20, epochs=10 ........................................
[CV]  batch_size=20, epochs=10, score=0.7903226017951965, total=   2.8s
[CV] batch_size=20, epochs=10 ........................................
[CV]  batch_size=20, epochs=10, score=0.7526881694793701, total=   3.1s
[CV] batch_size=20, epochs=50 ........................................
[CV]  batch_size=20, epochs=50, score=0.7526881694793701, total=   4.5s
[CV] batch_size=20, epochs=50 ........................................
[CV]  batch_size=20, epochs=50, score=0.7903226017951965, total=   4.8s
[CV] batch_size=20, epochs=50 ........................................
[CV]  batch_size=20, epochs=50, score=0.7526881694793701, total=   4.7s
[CV] batch_size=20, epochs=100 .......................................
[CV]  batch_size=20, epochs=100, score=0.7526881694793701, total=   6.8s
[CV] batch_size=20, epochs=100 ......................................

[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.4min finished


Best: 0.7652329802513123, using {'batch_size': 10, 'epochs': 10}
0.7652329802513123 0.01774104153108467 {'batch_size': 10, 'epochs': 10}
0.7652329802513123 0.01774104153108467 {'batch_size': 10, 'epochs': 50}
0.7652329802513123 0.01774104153108467 {'batch_size': 10, 'epochs': 100}
0.7652329802513123 0.01774104153108467 {'batch_size': 20, 'epochs': 10}
0.7652329802513123 0.01774104153108467 {'batch_size': 20, 'epochs': 50}
0.7652329802513123 0.01774104153108467 {'batch_size': 20, 'epochs': 100}
0.7652329802513123 0.01774104153108467 {'batch_size': 40, 'epochs': 10}
0.7652329802513123 0.01774104153108467 {'batch_size': 40, 'epochs': 50}
0.7652329802513123 0.01774104153108467 {'batch_size': 40, 'epochs': 100}
