In [1]:
# Ignore warnings :
import warnings
warnings.filterwarnings('ignore')


# Handle table-like data and matrices :
import numpy as np
import pandas as pd
import math 
import itertools

# Modelling Helpers :
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import RFECV
from sklearn.model_selection import GridSearchCV , KFold , cross_val_score


from sklearn.preprocessing import MinMaxScaler


# Evaluation metrics :

# Regression
from sklearn.metrics import mean_squared_log_error,mean_squared_error, r2_score,mean_absolute_error 

# Classification
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score


# Deep Learning Libraries
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, LearningRateScheduler
from keras.utils import to_categorical


# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns


# Configure visualisations
%matplotlib inline
mpl.style.use( 'ggplot' )
plt.style.use('fivethirtyeight')
sns.set(context="notebook", palette="dark", style = 'whitegrid' , color_codes=True)

In [2]:
%cd Z:\PPMI_Data\Excels\CollaborativeFiltering
df = pd.read_csv('Feats45_unCateg_APPRDX.csv')
df.shape
df.head(2)

Z:\PPMI_Data\Excels\CollaborativeFiltering


Unnamed: 0,PATNO,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,3002,1,1,0,12.16,7,1,1,1,1,...,57,1,0,45,59,47.5,110,204,17,1
1,3010,2,1,1,7.76,6,1,1,0,0,...,36,1,3,40,39,52.0,27,52,9,0


In [3]:
# Removing a column
df1 = df.drop('PATNO', axis = 1)
df1.head(2)

Unnamed: 0,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,1,1,0,12.16,7,1,1,1,1,0,...,57,1,0,45,59,47.5,110,204,17,1
1,2,1,1,7.76,6,1,1,0,0,0,...,36,1,3,40,39,52.0,27,52,9,0


In [4]:
df2 = pd.read_csv('NonMotor_Empty.csv')
df2.shape
df2.head(2)

Unnamed: 0,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,999,,,,,,,,,,...,,,,,,,,,,


In [5]:
# Removing a column
df3 = df2.drop('Patient_ID', axis = 1)
df3.head(2)

Unnamed: 0,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,Education,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,,,,,,,,,,,...,,,,,,,,,,


In [6]:
# Function to update values in the dataframe based on user inputs
def update_values(df3):
    # Inputs from the user
    column1 = input("Enter the name of the first Feature: ")
    value1 = input("Enter the value to update for the first Feature: ")
    column2 = input("Enter the name of the second Feature: ")
    value2 = input("Enter the value to update for the second Feature: ")
    column3 = input("Enter the name of the third Feature: ")
    value3 = input("Enter the value to update for the third Feature: ")

    # Convert the input values to the appropriate data type
    value1 = int(value1)
    value2 = int(value2)
    value3 = int(value3)

    # Update the values in the dataframe
    df3.loc[df[column1]!= value1, column1] = value1
    df3.loc[df[column2]!= value2, column2] = value2
    df3.loc[df[column3]!= value3, column3] = value3

    
    return df3

# Call the function
df3 = update_values(df3)

# Print the updated dataframe
df3.head()

Enter the name of the first Feature: UPSIT
Enter the value to update for the first Feature: 22
Enter the name of the second Feature: Clock
Enter the value to update for the second Feature: 5
Enter the name of the third Feature: Benton
Enter the value to update for the third Feature: 5


Unnamed: 0,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,Education,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,,,5.0,5.0,,,,,,,...,,,,,,,,,22.0,


In [7]:
# Adding back the column (as the first column)
df3.insert (0, 'Patient_ID', df2['Patient_ID'])
df3.head()

Unnamed: 0,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,999,,,5.0,5.0,,,,,,...,,,,,,,,,22.0,


In [8]:
# merging two dataframes [original and our input dataframes]
merged_df = pd.concat ([df1, df3], axis = 0)
merged_df.reset_index(inplace = True)
merged_df.tail()

Unnamed: 0,index,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
246,246,247,0.0,0.0,14.77,6.0,0.0,1.0,3.0,0.0,...,47.0,0.0,0.0,46.0,44.0,56.25,27.0,75.0,36.0,0.0
247,247,248,0.0,0.0,14.18,6.0,0.0,1.0,0.0,0.0,...,58.0,1.0,3.0,47.0,40.0,58.75,28.0,48.0,38.0,1.0
248,248,249,0.0,0.0,11.06,7.0,0.0,1.0,0.0,0.0,...,49.0,2.0,1.0,45.0,46.0,45.830002,36.0,106.0,32.0,1.0
249,249,250,0.0,0.0,11.06,7.0,0.0,1.0,0.0,1.0,...,57.0,0.0,3.0,44.0,45.0,50.0,30.0,80.0,38.0,0.0
250,0,999,,,5.0,5.0,,,,,...,,,,,,,,,22.0,


In [9]:
# dropping a few columns
merged_df1 = merged_df.drop(['index','Patient_ID'], axis = 1)
merged_df1.head(2)


Unnamed: 0,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,Education,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,1.0,0.0,12.16,7.0,1.0,1.0,1.0,1.0,0.0,16.0,...,57.0,1.0,0.0,45.0,59.0,47.5,110.0,204.0,17.0,1.0
1,1.0,1.0,7.76,6.0,1.0,1.0,0.0,0.0,0.0,16.0,...,36.0,1.0,3.0,40.0,39.0,52.0,27.0,52.0,9.0,0.0


In [10]:
# scaling values of every column from 1 to 5 [column wise]
scaler = MinMaxScaler (feature_range = (1,5))
df_scaled = scaler.fit_transform(merged_df1)
df_scaled = pd.DataFrame (df_scaled, columns = merged_df1.columns)
df_scaled.head(2)

Unnamed: 0,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,Education,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,2.333333,1.0,3.516696,5.0,3.0,1.0,2.333333,2.0,1.0,2.777778,...,3.285714,3.0,1.0,3.434783,5.0,2.833333,3.832117,3.686131,2.333333,2.0
1,2.333333,3.0,1.970123,4.333333,3.0,1.0,1.0,1.0,1.0,2.777778,...,1.785714,3.0,4.0,2.565217,3.0,3.133333,1.408759,1.467153,1.363636,1.0


In [11]:
# adding the column [as the first column]
df_scaled.insert (0, 'Patient_ID', merged_df['Patient_ID'], True)
df_scaled.head(3)

Unnamed: 0,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
0,1,2.333333,1.0,3.516696,5.0,3.0,1.0,2.333333,2.0,1.0,...,3.285714,3.0,1.0,3.434783,5.0,2.833333,3.832117,3.686131,2.333333,2.0
1,2,2.333333,3.0,1.970123,4.333333,3.0,1.0,1.0,1.0,1.0,...,1.785714,3.0,4.0,2.565217,3.0,3.133333,1.408759,1.467153,1.363636,1.0
2,3,1.0,1.0,3.984183,5.0,3.0,5.0,1.0,1.0,1.0,...,2.428571,3.0,3.0,2.913043,3.3,2.555333,3.248175,4.386861,3.424242,1.0


In [12]:
df_scaled.tail(3)

Unnamed: 0,Patient_ID,Anxiety,Apathy,Benton,Clock,Cognition,COGSTATE,Constipate,Depress,DopaDefic,...,Semantic,SleepDay,SleepNight,STAIA,STAIS,Symbol_Digit,Trail_Making_A,Trail_Making_B,UPSIT,Urine
248,249,1.0,1.0,3.130053,5.0,1.0,1.0,1.0,1.0,1.0,...,2.714286,5.0,2.0,3.434783,3.7,2.722,1.671533,2.255474,4.151515,2.0
249,250,1.0,1.0,3.130053,5.0,1.0,1.0,1.0,2.0,1.0,...,3.285714,1.0,4.0,3.26087,3.6,3.0,1.49635,1.875912,4.878788,1.0
250,999,,,1.0,3.666667,,,,,,...,,,,,,,,,2.939394,


In [13]:
# using melt function [reshape]
df_melted = df_scaled.melt(id_vars='Patient_ID', var_name='Feature', value_name='Value')
print(df_melted.shape)
df_melted.head(2)

(11295, 3)


Unnamed: 0,Patient_ID,Feature,Value
0,1,Anxiety,2.333333
1,2,Anxiety,2.333333


In [14]:
df_n_ratings = pd.DataFrame(df_melted.groupby('Feature')['Value'].mean())
df_n_ratings.rename(columns = {'Value': 'mean Value'}, inplace=True)
df_n_ratings.sort_values('mean Value', ascending=False).head(10)


Unnamed: 0_level_0,mean Value
Feature,Unnamed: 1_level_1
Clock,4.498008
Montreal_Cognitive,4.281231
Hopkins_Recog,4.029463
Benton,3.666662
Hopkins,3.514182
STAIS,3.4808
UPSIT,3.467705
STAIA,3.299826
LetterNumber,3.061867
Modif_Boston,2.919692
