In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from tsfresh import extract_features

carotid_df = pd.read_csv('data/carotid_pressure.csv')
illiac_df = pd.read_csv('data/illiac_pressure.csv')

#Rename the first column(s)
carotid_df.rename(columns={'Unnamed: 0':'patient_id'}, inplace=True)
illiac_df.rename(columns={'Unnamed: 0':'patient_id'}, inplace=True)


print("Rows in carotid_df before removing rows with >100Nans:", len(carotid_df))
rows=0
for index,row in carotid_df.iterrows():
    byRow = carotid_df.loc[[index]].isna().sum().sum()
    if byRow > 100:
        carotid_df = carotid_df.drop(rows, axis=0)
    rows += 1
print("Rows in carotid_df after removing rows with >100Nans:", len(carotid_df))
print(carotid_df)

print("Rows in illiac_df before removing rows with >100Nans:", len(illiac_df))
rows=0
for index,row in illiac_df.iterrows():
    byRow = illiac_df.loc[[index]].isna().sum().sum()
    if byRow > 100:
        illiac_df = illiac_df.drop(rows, axis=0)
    rows += 1
print("Rows in illiac_df after removing rows with >100Nans:", len(illiac_df))
print(illiac_df)

#Keep only the rows with indexes common to the two dataframes
carotid_df_2 = carotid_df.loc[carotid_df.index.isin(illiac_df.index)]
illiac_df_2 = illiac_df.loc[illiac_df.index.isin(carotid_df.index)]


print("New length carotid df 2",len(carotid_df_2))
print(carotid_df_2)
print("New length illiac df 2",len(illiac_df_2))
print(illiac_df_2)

# X and Y split
y = carotid_df_2[['target']].copy()
X_carotid = carotid_df_2.copy()
X_carotid = X_carotid.drop(X_carotid.columns[0],axis=1)
X_carotid = X_carotid.drop('target', axis=1)
X_illiac = illiac_df_2.copy()
X_illiac = X_illiac.drop('target', axis=1)
X_illiac = X_illiac.drop(X_illiac.columns[0],axis=1)


#Cleanup
X_carotid = X_carotid.interpolate(method="linear", limit_direction="both")
sigma_row = X_carotid.std()
sigma_avg = np.mean(sigma_row)

rows=0
for index,row in X_carotid.iterrows():
    X_carotid.iloc[rows,:] = scipy.ndimage.gaussian_filter1d(X_carotid.iloc[rows,:], sigma_avg)
    rows += 1

X_illiac = X_illiac.interpolate(method="linear", limit_direction="both")
sigma_row = X_illiac.std()
sigma_avg = np.mean(sigma_row)

rows=0
for index,row in X_illiac.iterrows():
    X_illiac.iloc[rows,:] = scipy.ndimage.gaussian_filter1d(X_illiac.iloc[rows,:], sigma_avg)
    rows += 1

    
# Train and test sets splitting
X_train_carotid, X_test_carotid, y_train, y_test = train_test_split(X_carotid, y, test_size = 0.10, random_state = 0)
X_train_illiac, X_test_illiac, y_train, y_test = train_test_split(X_illiac, y, test_size = 0.10, random_state = 0)


#Scale the data    
scaler = StandardScaler()
X_train_carotid = scaler.fit_transform(X_train_carotid)
X_train_illiac = scaler.fit_transform(X_train_illiac)
X_test_carotid = scaler.fit_transform(X_test_carotid)
X_test_illiac = scaler.fit_transform(X_test_illiac)


# Combine Carotid and Illiac inputs. This might also be tricky...be creative here!
X_train = X_train_carotid
#X_train = np.hstack((X_train_carotid, X_train_illiac))
X_test = X_test_carotid
#X_test = np.hstack((X_test_carotid, X_test_illiac))
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)
        
# # Training the Naive Bayes model on the Training set
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)


# # Predicting the Test set results
y_pred = classifier.predict(X_test)

        
# # Making the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
        
stats =  accuracy_score(y_test, y_pred)
modelLearn = True


print(stats)



print(y_test)
print(y_pred)






#screen = pd.options.display
#screen.max_columns = 1000
#screen.max_rows = 3500
#print(carotid_df_2)
#print(illiac_df_2)



Rows in carotid_df before removing rows with >100Nans: 3494
Rows in carotid_df after removing rows with >100Nans: 3045
      patient_id  carotid_t_0  carotid_t_1  carotid_t_2  carotid_t_3  \
0              0    61.557625    58.796436    60.827158    64.123071   
2              2    74.936590    75.564313    75.399869    75.712397   
3              3    72.934124    75.031846    75.370799    75.422582   
4              4    62.530546    65.121261    71.311909    66.524650   
5              5    76.601938    81.123482    78.137922    79.719750   
...          ...          ...          ...          ...          ...   
3486        3486    67.385692    67.169153    64.760777    66.760339   
3487        3487    65.730373    65.835182    64.006698    66.837689   
3489        3489    73.893931    76.653160    76.126752    74.634942   
3490        3490    66.707998    68.634724    67.538312    68.542903   
3491        3491    76.340137    79.251945    76.495170    79.716762   

      carotid_t_