------
# **Dementia Patients -- Analysis and Prediction**
### ***Author : Akhilesh Vyas***
### ****Date : August, 2019****



# ***Result Plots***

- <a href='#00'>0. Setup </a>
    - <a href='#00.1'>0.1. Load libraries </a>
    - <a href='#00.2'>0.2. Define paths </a>

- <a href='#01'>1. Data Preparation </a>  
    - <a href='#01.1'>1.1. Read Data </a> 
    - <a href='#01.2'>1.2. Prepare data  </a>
    - <a href='#01.3'>1.3. Prepare target </a>
    - <a href='#01.4'>1.4. Removing Unwanted Features </a>
    
- <a href='#02'>2. Data Analysis</a> 
    - <a href='#02.1'>2.1. Feature </a> 
    - <a href='#02.2'>2.2. Target </a> 
    
- <a href='#03'>3. Data Preparation and Vector Transformation</a>

- <a href='#04'>4. Analysis and Imputing Missing Values </a>

- <a href='#05'>5. Feature Analysis</a> 
    - <a href='#05.1'>5.1. Correlation Matrix</a>
    - <a href='#05.2'>5.2. Feature and target </a>
    - <a href='#05.3'>5.3. Feature Selection Models </a>
    
- <a href='#06'>6.Machine Learning -Classification Model</a> 

# <a id='00'>0. Setup </a>

# <a id='00.1'>0.1 Load libraries </a>

Loading Libraries

In [None]:
import sys
sys.path.insert(1, '../preprocessing/')
import numpy as np
import pickle
import scipy.stats as spstats
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_profiling
from sklearn.datasets.base import Bunch
#from data_transformation_cls import FeatureTransform
from ast import literal_eval
import plotly.figure_factory as ff
import plotly.offline as py
import plotly.graph_objects as go

import pandas as pd
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

from ordered_set import OrderedSet

from func_def import *

%matplotlib inline

# <a id='00.2'>0.2 Define paths </a>

In [None]:
# data_path
# !cp -r ../../../datalcdem/data/optima/dementia_18July/data_notasked/ ../../../datalcdem/data/optima/dementia_18July/data_notasked_mmse_0_30/
#data_path = '../../../datalcdem/data/optima/dementia_03_2020/data_filled_wiiliam/'
#result_path = '../../../datalcdem/data/optima/dementia_03_2020/data_filled_wiiliam/results/'
#optima_path = '../../../datalcdem/data/optima/optima_excel/'

data_path = '../../data/'

In [None]:
# Reading Data

#patients data
patient_df = pd.read_csv(data_path+'patients.csv')
print (patient_df.dtypes)
# change dataType if there is something
for col in patient_df.columns: 
    if 'Date' in col:
        patient_df[col] = pd.to_datetime(patient_df[col]) 
 
patient_df = patient_df[['patient_id','gender', 'smoker', 'education', 'ageAtFirstEpisode', 'apoe']]
patient_df.rename(columns={'ageAtFirstEpisode':'age'}, inplace=True)
patient_df.head(5)

# <a id='1'>1. Data Preparation </a> 

## <a id='01.1'>1.1. Read Data</a>

In [None]:
#Preparation Features from Raw data

# Extracting selected features from Raw data
def rename_columns(col_list):
    d = {}
    for i in col_list:
        if i=='GLOBAL_PATIENT_DB_ID':
            d[i]='patient_id'
        elif 'CAMDEX SCORES: ' in i:
            d[i]=i.replace('CAMDEX SCORES: ', '').replace(' ', '_')
        elif 'CAMDEX ADMINISTRATION 1-12: ' in i:
            d[i]=i.replace('CAMDEX ADMINISTRATION 1-12: ', '').replace(' ', '_')
        elif 'DIAGNOSIS 334-351: ' in i:
            d[i]=i.replace('DIAGNOSIS 334-351: ', '').replace(' ', '_')
        elif 'OPTIMA DIAGNOSES V 2010: ' in i:
            d[i]=i.replace('OPTIMA DIAGNOSES V 2010: ', '').replace(' ', '_')
        elif 'PM INFORMATION: ' in i:
            d[i]=i.replace('PM INFORMATION: ', '').replace(' ', '_')
        else:
            d[i]=i.replace(' ', '_')
    return d


columns_selected = ['GLOBAL_PATIENT_DB_ID', 'EPISODE_DATE', 'CAMDEX SCORES: MINI MENTAL SCORE', 'CLINICAL BACKGROUND: BODY MASS INDEX',  
          'DIAGNOSIS 334-351: ANXIETY/PHOBIC', 'OPTIMA DIAGNOSES V 2010: CERBRO-VASCULAR DISEASE PRESENT', 'DIAGNOSIS 334-351: DEPRESSIVE ILLNESS',
          'OPTIMA DIAGNOSES V 2010: DIAGNOSTIC CODE', 'CAMDEX ADMINISTRATION 1-12: EST OF SEVERITY OF DEPRESSION', 
           'CAMDEX ADMINISTRATION 1-12: EST SEVERITY OF DEMENTIA', 'DIAGNOSIS 334-351: PRIMARY PSYCHIATRIC DIAGNOSES', 'OPTIMA DIAGNOSES V 2010: PETERSEN MCI']



columns_selected = list(OrderedSet(columns_selected).union(OrderedSet(features_all)))

# Need to think about other columns eg. dementia, social, sleeping habits, 
df_datarequest = pd.read_excel(data_path+'Optima_Data_Report_Cases_6511_filled.xlsx')
display(df_datarequest.head(1))
df_datarequest_features = df_datarequest[columns_selected]
display(df_datarequest_features.columns)

columns_renamed = rename_columns(df_datarequest_features.columns.tolist())
df_datarequest_features.rename(columns=columns_renamed, inplace=True)
patient_com_treat_fea_raw_df = df_datarequest_features # Need to be changed ------------------------
display(patient_com_treat_fea_raw_df.head(5))
           
# merging
patient_df = patient_com_treat_fea_raw_df.merge(patient_df,how='inner', on=['patient_id'])           
           
# age calculator
patient_df['age'] = patient_df['age'] + patient_df.groupby(by=['patient_id'])['EPISODE_DATE'].transform(lambda x: (x - x.iloc[0])/(np.timedelta64(1, 'D')*365.25))

# saving file
patient_df.to_csv(data_path + 'patient_com_treat_fea_filled_sel_col.csv', index=False)

           

# patient_com_treat_fea_raw_df = patient_com_treat_fea_raw_df.drop_duplicates(subset=['patient_id', 'EPISODE_DATE'])
patient_df.sort_values(by=['patient_id', 'EPISODE_DATE'], inplace=True)

display(patient_df.head(5))


In [None]:
display(patient_df.describe(include='all'))
display(patient_df.info())

tmp_l = []
for i in range(len(patient_df.index)):
    # print("Nan in row ", i , " : " ,  patient_com_treat_fea_raw_df.iloc[i].isnull().sum())
    tmp_l.append(patient_df.iloc[i].isnull().sum())
    
plt.hist(tmp_l)
plt.show()

In [None]:
# find NAN and Notasked and replace them with suitable value
'''print (patient_df.columns.tolist())
notasked_columns = ['ANXIETY/PHOBIC', 'CERBRO-VASCULAR_DISEASE_PRESENT', 'DEPRESSIVE_ILLNESS','EST_OF_SEVERITY_OF_DEPRESSION', 'EST_SEVERITY_OF_DEMENTIA', 
                    'PRIMARY_PSYCHIATRIC_DIAGNOSES']
print ('total nan values %: ', 100*patient_df.isna().sum().sum()/patient_df.size)
patient_df.loc[:, notasked_columns] =  patient_df.loc[:, notasked_columns].replace([9], [np.nan])
print ('total nan values % after considering notasked: ', 100*patient_df.isna().sum().sum()/patient_df.size)
display(patient_df.isna().sum())
notasked_columns.append('DIAGNOSTIC_CODE')
notasked_columns.append('education')
patient_df.loc[:, notasked_columns] = patient_df.groupby(by=['patient_id'])[notasked_columns].transform(lambda x: x.fillna(method='pad'))
patient_df.loc[:, ['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']] = patient_df.groupby(by=['patient_id'])[['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']].transform(lambda x: x.interpolate())
patient_df.loc[:, ['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']] = patient_df.groupby(by=['patient_id'])[['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']].transform(lambda x: x.fillna(method='pad'))
print ('total nan values % after filling : ', 100*patient_df.isna().sum().sum()/patient_df.size)
display(patient_df.isna().sum())'''

In [None]:
# Label of patients:
misdiagnosed_df = pd.read_csv(data_path+'misdiagnosed.csv')
display(misdiagnosed_df.head(5))

misdiagnosed_df['EPISODE_DATE'] = pd.to_datetime(misdiagnosed_df['EPISODE_DATE'])

#Merge Patient_df
patient_df = patient_df.merge(misdiagnosed_df[['patient_id', 'EPISODE_DATE', 'Misdiagnosed','Misdiagnosed1']], how='left', on=['patient_id', 'EPISODE_DATE'])
display(patient_df.head(5))

In [None]:
patient_df.to_csv(data_path+'patient_df.csv', index=False)

In [None]:
patient_df = pd.read_csv(data_path+'patient_df.csv')
patient_df['EPISODE_DATE'] = pd.to_datetime(patient_df['EPISODE_DATE'])

In [None]:
# duration and previous mini mental score state
patient_df['durations(years)'] = patient_df.groupby(by='patient_id')['EPISODE_DATE'].transform(lambda x: (x - x.iloc[0])/(np.timedelta64(1, 'D')*365.25))
patient_df['MINI_MENTAL_SCORE_PRE'] = patient_df.groupby(by='patient_id')['MINI_MENTAL_SCORE'].transform(lambda x: x.shift(+1))

In [None]:
patient_df[['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']].describe() # Out of Range values

In [None]:
patient_df['CLINICAL_BACKGROUND:_BODY_MASS_INDEX'][(patient_df['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']>54) | (patient_df['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']<8)]=np.nan

In [None]:
patient_df[['CLINICAL_BACKGROUND:_BODY_MASS_INDEX']].describe()

In [None]:
# drop unnecessary columns
# patient_df.drop(columns=['patient_id', 'EPISODE_DATE'], inplace=True)

In [None]:
# drop rows where Misdiagnosed cases are invalid
patient_df = patient_df.dropna(subset=['MINI_MENTAL_SCORE_PRE'], axis=0 )


In [None]:
patient_df['gender'].unique(), patient_df['smoker'].unique(), patient_df['education'].unique(), patient_df['apoe'].unique(), patient_df['Misdiagnosed1'].unique(), patient_df['Misdiagnosed'].unique()

In [None]:
# encoding of categorial features
patient_df['smoker'] = patient_df['smoker'].replace(['smoker', 'no_smoker'],[1, 0])
patient_df['education'] = patient_df['education'].replace(['medium', 'higher','basic'],[1, 2, 0])
patient_df['Misdiagnosed1'] = patient_df['Misdiagnosed1'].replace(['NO', 'YES', 'UNKNOWN'],[0, 1, 2])
patient_df['Misdiagnosed'] = patient_df['Misdiagnosed'].replace(['NO', 'YES', 'UNKNOWN'],[0, 1, 2])
patient_df = pd.get_dummies(patient_df, columns=['gender', 'apoe'])
patient_df.replace(['mixed mitral & Aortic Valve disease', 'Bilateral knee replacements'],[np.nan, np.nan], inplace=True)

In [None]:
patient_df.dtypes

In [None]:
for i, j in zip(patient_df, patient_df.dtypes):
    if not (j == "float64" or j == "int64" or j == 'uint8' or j == 'datetime64[ns]'):
        print(i)
        patient_df[i] = pd.to_numeric(patient_df[i], errors='coerce')

patient_df = patient_df.fillna(-9)

In [None]:
# Misdiagnosed Criteria
patient_df = patient_df[patient_df['Misdiagnosed']<2]
patient_df = patient_df.astype({col: str('float64') for col, dtype in zip (patient_df.columns.tolist(), patient_df.dtypes.tolist()) if 'int' in str(dtype) or str(dtype)=='object'})
patient_df.describe()

In [None]:
patient_df_X = patient_df.drop(columns=['patient_id', 'EPISODE_DATE', 'Misdiagnosed1', 'MINI_MENTAL_SCORE', 'PETERSEN_MCI', 'Misdiagnosed'])
patient_df_y_cat = patient_df['Misdiagnosed1']
patient_df_y_cat_s = patient_df['Misdiagnosed']
patient_df_y_real = patient_df['MINI_MENTAL_SCORE']

In [None]:
print (patient_df_X.shape, patient_df_y_cat.shape, patient_df_y_cat_s.shape, patient_df_y_real.shape)
print(patient_df_X.shape, patient_df_y_cat.shape, patient_df_y_cat_s.shape, patient_df_y_real.shape)

In [None]:
# training data
patient_df_X_fill_data = pd.DataFrame(data=patient_df_X.values, columns=patient_df_X.columns, index=patient_df_X.index)

patient_df_X_train, patient_df_y_train = patient_df_X_fill_data[patient_df_y_cat==0], patient_df_y_real[patient_df_y_cat==0]
patient_df_X_test, patient_df_y_test= patient_df_X_fill_data[patient_df_y_cat==1], patient_df_y_real[patient_df_y_cat==1]

patient_df_X_s_train, patient_df_y_s_train = patient_df_X_fill_data[patient_df_y_cat_s==0], patient_df_y_real[patient_df_y_cat_s==0]
patient_df_X_s_test, patient_df_y_s_test= patient_df_X_fill_data[patient_df_y_cat_s==1], patient_df_y_real[patient_df_y_cat_s==1]


In [None]:
patient_df_X_train.to_csv(data_path+'X_train.csv', index=False)
patient_df_y_train.to_csv(data_path+'y_train.csv', index=False)
patient_df_X_test.to_csv(data_path+'X_test.csv', index=False)
patient_df_y_test.to_csv(data_path+'y_test.csv', index=False)

In [None]:
print(patient_df_X_train.shape, patient_df_y_train.shape, patient_df_X_test.shape, patient_df_y_test.shape)
print(patient_df_X_s_train.shape, patient_df_y_s_train.shape, patient_df_X_s_test.shape, patient_df_y_s_test.shape)

In [None]:
X_train, y_train, X_test, y_test = patient_df_X_train.values, patient_df_y_train.values.reshape(-1, 1),patient_df_X_test.values, patient_df_y_test.values.reshape(-1,1)

X_s_train, y_s_train, X_s_test, y_s_test = patient_df_X_s_train.values, patient_df_y_s_train.values.reshape(-1, 1),patient_df_X_s_test.values, patient_df_y_s_test.values.reshape(-1,1)

In [None]:
# Random Forest Classfier

from sklearn.ensemble import RandomForestClassifier
from sklearn import svm, datasets
from sklearn.model_selection import cross_val_score, cross_validate, cross_val_predict
from sklearn.metrics import classification_report
# patient_df_X_fill_data[patient_df_y_cat==0]
X, y = patient_df_X_fill_data, patient_df_y_cat
clf = RandomForestClassifier(n_estimators=100)
print (cross_validate(clf, X, y, scoring=['recall_macro', 'precision_macro', 'f1_macro', 'accuracy'], cv=5) )
y_pred = cross_val_predict(clf,X, y, cv=5 )
print(classification_report(y, y_pred, target_names=['NO','YES']))

from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='auto')
data_p_s, target_p_s = smote.fit_sample(patient_df_X_fill_data, patient_df_y_cat)
print (data_p_s.shape, target_p_s.shape)
# patient_df_X_fill_data[patient_df_y_cat==0]
X, y = data_p_s,  target_p_s
clf = RandomForestClassifier(n_estimators=100)
print (cross_validate(clf, X, y, scoring=['recall_macro', 'precision_macro', 'f1_macro', 'accuracy'], cv=5) )
y_pred = cross_val_predict(clf,X, y, cv=5 )
print(classification_report(y, y_pred, target_names=['NO','YES']))

from collections import Counter
from imblearn.under_sampling import ClusterCentroids
cc = ClusterCentroids(random_state=0)
X_resampled, y_resampled = cc.fit_resample(patient_df_X_fill_data, patient_df_y_cat)
print(sorted(Counter(y_resampled).items()))
X, y = X_resampled,  y_resampled
clf = RandomForestClassifier(n_estimators=100)
print (cross_validate(clf, X, y, scoring=['recall_macro', 'precision_macro', 'f1_macro', 'accuracy'], cv=5) )
y_pred = cross_val_predict(clf,X, y, cv=5 )
print(classification_report(y, y_pred, target_names=['NO','YES']))

from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=0)
X, y = rus.fit_resample(patient_df_X_fill_data, patient_df_y_cat)
clf = RandomForestClassifier(n_estimators=100)
print (cross_validate(clf, X, y, scoring=['recall_macro', 'precision_macro', 'f1_macro', 'accuracy'], cv=5) )
y_pred = cross_val_predict(clf,X, y, cv=5 )
print(classification_report(y, y_pred, target_names=['NO','YES']))

In [None]:
X_positive, y_positive, X_negative, y_negative = X_train, y_train, X_test, y_test

In [None]:
X_positive

In [None]:
cr_score_list = []
y_true_5, y_pred_5 = np.array([]), np.array([])
y_true_5.shape, y_pred_5.shape

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


for i in range(5):
    X_train, X_test_pos, y_train, y_test_pos = train_test_split(X_positive, y_positive, test_size=0.136)
    print (X_train.shape, X_test_pos.shape, y_train.shape, y_test_pos.shape)
    X_test, y_test = np.append(X_negative, X_test_pos, axis=0), np.append(y_negative, y_test_pos, axis=0)
    #X_test, y_test = X_negative, y_negative
    print (X_test.shape, y_test.shape)

    regr = RandomForestRegressor(max_depth=2, random_state=0)
    regr.fit(X_train, y_train)
    #print(regr.feature_importances_)

    y_pred = regr.predict(X_test)
    #print(regr.predict(X_test))
    print (regr.score(X_test, y_test))
    print (regr.score(X_train, y_train))

    X_y_test = np.append(X_test, y_pred.reshape(-1,1), axis=1)
    print (X_test.shape, y_test.shape, X_y_test.shape)
    df_X_y_test = pd.DataFrame(data=X_y_test, columns=patient_df_X_fill_data.columns.tolist()+['MMSE_Predicted'])
    df_X_y_test.head(5)

    patient_df_tmp = patient_df[['patient_id', 'EPISODE_DATE', 'DIAGNOSTIC_CODE', 'smoker', 'gender_Male', 'age', 'durations(years)', 'MINI_MENTAL_SCORE_PRE', ]]
    df_X_y_test_tmp = df_X_y_test[['smoker', 'gender_Male', 'DIAGNOSTIC_CODE', 'age', 'durations(years)', 'MINI_MENTAL_SCORE_PRE', 'MMSE_Predicted']]
    p_tmp = patient_df_tmp.merge(df_X_y_test_tmp)
    print (patient_df.shape, df_X_y_test_tmp.shape, p_tmp.shape)
    print (p_tmp.head(5))

    # Compare it with Predicted MMSE Scores and True MMSE values
    patient_df_misdiag = pd.read_csv(data_path+'misdiagnosed.csv')
    patient_df_misdiag['EPISODE_DATE'] = pd.to_datetime(patient_df_misdiag['EPISODE_DATE'])
    patient_df_misdiag.head(5)

    patient_df_misdiag_predmis = patient_df_misdiag.merge(p_tmp[['patient_id', 'EPISODE_DATE', 'MMSE_Predicted']], how='outer', on=['patient_id', 'EPISODE_DATE'])
    patient_df_misdiag_predmis.head(5)
    display(patient_df_misdiag_predmis.isna().sum())

    index_MMSE_Predicted = patient_df_misdiag_predmis['MMSE_Predicted'].notnull()
    patient_df_misdiag_predmis['MMSE_Predicted'] = patient_df_misdiag_predmis['MMSE_Predicted'].fillna(patient_df_misdiag_predmis['MINI_MENTAL_SCORE'])

    print (sum(patient_df_misdiag_predmis['MMSE_Predicted']!=patient_df_misdiag_predmis['MINI_MENTAL_SCORE']))

    # find Misdiagnosed

    def find_misdiagonsed1():
        k = 0
        l_misdiagno = []
        for pat_id in patient_df_misdiag_predmis['patient_id'].unique():
            tmp_df = patient_df_misdiag_predmis[['PETERSEN_MCI', 'AD_STATUS', 'MMSE_Predicted', 'durations(years)']][patient_df_misdiag_predmis['patient_id']==pat_id]
            flag = False
            mms_val = 0.0
            dur_val = 0.0
            for i, row in tmp_df.iterrows():
                if (row[0]==1.0 or row[1]== 1.0) and flag==False:
                    l_misdiagno.append('UNKNOWN')
                    mms_val = row[2]
                    dur_val = row[3]
                    flag = True
                elif (flag==True):
                    if (row[2]-mms_val>5.0) and (row[3]-dur_val<=1.0) or\
                       (row[2]-mms_val>3.0) and ((row[3]-dur_val<2.0 and row[3]-dur_val>1.0)) or\
                       (row[2]-mms_val>0.0) and (row[3]-dur_val>=2.0):
                        l_misdiagno.append('YES')
                    else:
                        l_misdiagno.append('NO')
                else:
                    l_misdiagno.append('UNKNOWN')

        return l_misdiagno



    print (len(find_misdiagonsed1()))
    patient_df_misdiag_predmis['Misdiagnosed_Predicted'] = find_misdiagonsed1()

    c2=patient_df_misdiag_predmis['Misdiagnosed1']!=patient_df_misdiag_predmis['Misdiagnosed_Predicted']
    misdiagnosed1_true_pred= patient_df_misdiag_predmis[index_MMSE_Predicted][['Misdiagnosed1', 'Misdiagnosed_Predicted']].replace(['NO', 'YES'], [0,1])
    print(classification_report(misdiagnosed1_true_pred.Misdiagnosed1, misdiagnosed1_true_pred.Misdiagnosed_Predicted, target_names=['NO', 'YES']))
    y_true_5, y_pred_5 = np.append(y_true_5, misdiagnosed1_true_pred.Misdiagnosed1, axis=0), np.append(y_pred_5, misdiagnosed1_true_pred.Misdiagnosed_Predicted, axis=0)

print(y_true_5.shape, y_pred_5.shape)

In [None]:
df_all = pd.DataFrame(classification_report(y_true_5, y_pred_5, target_names=['NO', 'YES'], output_dict=True))
df_all = df_all.round(2)

n_range = int(y_true_5.shape[0]/X_test.shape[0])
y_shape = X_test.shape[0]

for cr in range(n_range):
    d = classification_report(y_true_5.reshape(n_range,y_shape)[cr], y_pred_5.reshape(n_range,y_shape)[cr], target_names=['NO', 'YES'], output_dict=True)
    cr_score_list.append(d)

print(cr_score_list)

df_tot = pd.DataFrame(cr_score_list[0])
for i in range(n_range-1):
    df_tot = pd.concat([df_tot, pd.DataFrame(cr_score_list[i])], axis='rows')

df_avg = df_tot.groupby(level=0, sort=False).mean().round(2)
acc, sup, acc1, sup1 = df_avg.loc['precision', 'accuracy'], df_avg.loc['support', 'macro avg'],\
df_all.loc['precision', 'accuracy'], df_all.loc['support', 'macro avg']

pd.concat([df_avg.drop(columns='accuracy'), df_all.drop(columns='accuracy')], \
          keys= ['Average classification metrics (accuracy:{},  support:{})'.format(acc, sup),\
                 'Classification metrics (accuracy:{},  support:{})'.format(acc1, sup1)], axis=1)

In [None]:
cm_all = confusion_matrix(y_true_5, y_pred_5)
print(cm_all)
 
n_range = int(y_true_5.shape[0]/X_test.shape[0])
y_shape = X_test.shape[0]
cr_score_list = []

for cr in range(n_range):
    d = confusion_matrix(y_true_5.reshape(n_range,y_shape)[cr], y_pred_5.reshape(n_range,y_shape)[cr])
    cr_score_list.append(d)

print(cr_score_list)

cr_score_np = np.array(cr_score_list)

cm_avg = cr_score_np.sum(axis=0)/cr_score_np.shape[0]

print(cm_avg)