## Importing required libraries

In [406]:
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import RobustScaler,MinMaxScaler,StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import pickle
import numpy.ma as ma
import lightgbm as lgb
#from sklearn.datasets import make_classification
from sklearn.multioutput import MultiOutputClassifier
#from sklearn.utils import shuffle

## Reading the extracted the dataset

In [302]:
dataset_feb04=pd.read_csv("10k_Final_dataset.csv")
dataset_feb04=dataset_feb04.drop(["Unnamed: 0"],axis=1)
dataset_feb04=dataset_feb04.drop_duplicates()
dataset_feb04=dataset_feb04.reset_index()
dataset_feb04=dataset_feb04.drop(["index"],axis=1)
dataset_feb04

Unnamed: 0,patient_id,diagnosis_coded,concept_id,value_numeric,value_coded
0,16359994,1385083,1482681,0.883785,\N
1,16359994,1385083,1520177,74,\N
2,16359994,1385083,1520177,75,\N
3,16359994,1385083,1520177,77,\N
4,16359994,1385083,1520177,69,\N
...,...,...,...,...,...
6442641,19999784,1384995,1520694,32.1,\N
6442642,19999784,1384995,1520696,160,\N
6442643,19999784,1384995,1520694,31.6,\N
6442644,19999784,1384995,1520731,\N,1502522


## Taking only the diagnosis column and patient columns

In [303]:
dataset_feb13=dataset_feb04[["patient_id","diagnosis_coded"]]
dataset_feb13=dataset_feb13.drop_duplicates()
dataset_feb13=dataset_feb13.reset_index()
dataset_feb13=dataset_feb13.drop(["index"],axis=1)
dataset_feb13

Unnamed: 0,patient_id,diagnosis_coded
0,16359994,1385083
1,16360107,1385109
2,16360107,1385121
3,16360107,1474651
4,16360581,1384886
...,...,...
13016,19998497,1397715
13017,19999379,1397715
13018,19999379,1384870
13019,19999625,1385121


## Trying to see total how many patients and how many diagnoses

In [20]:
print("Number of patients in the dataset",len(dataset_feb13["patient_id"].unique()))
print("Number of diagnoses in the dataset",len(dataset_feb13["diagnosis_coded"].unique()))

Number of patients in the dataset 10000
Number of diagnoses in the dataset 10


## Trying to if there is any patients who have no diagnosis

In [21]:
dataset06_1=dataset_feb13[["patient_id","diagnosis_coded"]]
df_null=dataset06_1[dataset06_1['diagnosis_coded'].isnull()]
print(len(df_null["patient_id"].unique()),"number of patients have no diagnosis")

0 number of patients have no diagnosis


## Finding number of patients who have multiple diagnosis in this dataset

In [None]:
list_unique_patients=list(dataset_feb13["patient_id"].unique())
list_all_patients=list(dataset_feb13["patient_id"].values)
i=0
while len(list_unique_patients) > 0:
    patient_unique_number=len(list_unique_patients)
    print(patient_unique_number,"patients have at least",i+1,"diagnosis")
    uniquep=list_unique_patients
    allp=list_all_patients
    for k in range(len(uniquep)):
        #print(k)
        for j in range(len(allp)):
            if uniquep[k]==allp[j]:
                allp.remove(allp[j])
                break
    list_unique_patients=list(set(allp))
    list_all_patients=allp
    i=i+1        

# Creating columns for each concepts which have values in value_numeric column

### Preprocessing the dataset

In [28]:
dataset_feb13_1=dataset_feb04.drop(['value_coded'], axis=1) #we do not need value_coded column anymore
dataset_feb13_1["value_numeric"] = dataset_feb13_1["value_numeric"].str.replace(r'\\N','np.nan', regex=True) #replacing \N with 0
dataset_feb13_1=dataset_feb13_1.replace("np.nan",np.nan)
dataset_feb13_1["value_numeric"]=dataset_feb13_1['value_numeric'].astype(float) #converting the datatypes of value_numeric as float
dataset_feb13_1=dataset_feb13_1.dropna()
dataset_feb13_1=dataset_feb13_1.drop_duplicates()
dataset_feb13_1=dataset_feb13_1.reset_index(drop=True)
dataset_feb13_1

Unnamed: 0,patient_id,diagnosis_coded,concept_id,value_numeric
0,16359994,1385083,1482681,0.883785
1,16359994,1385083,1520177,74.000000
2,16359994,1385083,1520177,75.000000
3,16359994,1385083,1520177,77.000000
4,16359994,1385083,1520177,69.000000
...,...,...,...,...
6330662,19999784,1384995,1520694,31.500000
6330663,19999784,1384995,1520694,31.900000
6330664,19999784,1384995,1520694,32.100000
6330665,19999784,1384995,1520696,160.000000


### Getting value_numeric in different columns using get_dummies

In [29]:
dataset_feb13_1["concept_id"] = dataset_feb13_1["concept_id"].astype("category")
dataset_feb13_1['concept_id'].dtypes
dummies_1 = pd.get_dummies(dataset_feb13_1['concept_id'])
dummies_1=dummies_1.replace(0,np.nan)
dummies_1=dummies_1.astype(float)
dummies_1.values[dummies_1 == 1.0] = dataset_feb13_1['value_numeric']
dummies_1

Unnamed: 0,1482681,1520176,1520177,1520178,1520179,1520180,1520181,1520182,1520183,1520184,...,1529674,1529675,1529676,1529677,1529678,1529679,1529680,1529682,1529683,1529684
0,0.883785,,,,,,,,,,...,,,,,,,,,,
1,,,74.0,,,,,,,,...,,,,,,,,,,
2,,,75.0,,,,,,,,...,,,,,,,,,,
3,,,77.0,,,,,,,,...,,,,,,,,,,
4,,,69.0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6330662,,,,,,,,,,,...,,,,,,,,,,
6330663,,,,,,,,,,,...,,,,,,,,,,
6330664,,,,,,,,,,,...,,,,,,,,,,
6330665,,,,,,,,,,,...,,,,,,,,,,


### Taking the patient_id column only to merge with dummies dataframe

In [31]:
data_id_only=dataset_feb13_1[['patient_id']]
dataframe_numeric=pd.concat([data_id_only,dummies_1],axis=1)
dataframe_numeric=dataframe_numeric.dropna(axis=1,how='all')

### Now if there are multiple rows for one patient, we are squeezing the dataframe to reduce the total number of rows(and making it equal to the number of patients) by taking the mean value of the column values for individual patient

In [33]:
squeezed_numeric= dataframe_numeric.groupby('patient_id').agg('mean')
squeezed_numeric=squeezed_numeric.reset_index()
squeezed_numeric

Unnamed: 0,patient_id,1482681,1520176,1520177,1520178,1520179,1520180,1520181,1520182,1520183,...,1529674,1529675,1529676,1529677,1529678,1529679,1529680,1529682,1529683,1529684
0,16359994,0.883785,1.550000,78.500000,117.0,129.0,,,,16.100000,...,79.40,174.70,,,,,,,,
1,16360107,2.967860,2.200000,92.000000,38.4,,2454.0,,7.266667,15.500000,...,84.85,186.65,69.5,176.5,,,,,,
2,16360581,,,96.000000,,,,,,11.500000,...,,,,,,,,,,
3,16360590,2.304110,,84.500000,,,,,,11.000000,...,79.30,174.50,65.0,165.0,,,,,,
4,16360714,,,90.000000,,,,,,11.857143,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,19998350,,2.100000,81.666667,,32.0,,,,13.333333,...,,,,,,,,,,
9996,19998497,,1.500000,91.100000,24.0,199.5,,,,15.600000,...,,,,,,,,,,
9997,19999379,,,92.000000,,,,,,13.000000,...,,,,,,,,,,
9998,19999625,0.960741,0.100000,85.666667,65.0,,,,,11.833333,...,50.50,111.10,,,,,,,,


# Creating columns for each concepts which have values in value_coded column

### Preprocessing the dataset

In [115]:
dataset_feb13_2=dataset_feb04.drop(['value_numeric'], axis=1) #we do not need value_numeric column anymore
dataset_feb13_2["value_coded"] = dataset_feb13_2["value_coded"].str.replace(r'\\N','np.nan', regex=True) #replacing \N with 0
dataset_feb13_2=dataset_feb13_2.replace("np.nan",np.nan)
#dataset_feb13_2["value_coded"]=dataset_feb13_2['value_coded'].astype(float) #converting the datatypes of value_numeric as float
dataset_feb13_2=dataset_feb13_2.dropna()
dataset_feb13_2=dataset_feb13_2.drop_duplicates()
dataset_feb13_2=dataset_feb13_2.reset_index(drop=True)
dataset_feb13_2

Unnamed: 0,patient_id,diagnosis_coded,concept_id,value_coded
0,16359994,1385083,1520778,1502470
1,16360107,1385109,1520261,1502535
2,16360107,1385109,1520288,1502467
3,16360107,1385109,1520288,1502548
4,16360107,1385109,1520333,1502519
...,...,...,...,...
80737,19998497,1397715,1520417,1502499
80738,19999625,1385121,1520417,1502499
80739,19999625,1385121,1520778,1502470
80740,19999784,1384995,1520731,1502522


### Getting value_numeric in different columns using get_dummies

In [None]:
dataset_feb13_2["concept_id"] = dataset_feb13_2["concept_id"].astype("category")
dataset_feb13_2['concept_id'].dtypes
dummies_2 = pd.get_dummies(dataset_feb13_2['concept_id'])
dummies_2=dummies_2.replace(0,np.nan)
dummies_2=dummies_2.astype(float)
dummies_2.values[dummies_2 == 1.0] = dataset_feb13_2['value_coded']
dummies_2=dummies_2.fillna(3178353473).astype(int)
#dummies_2 = dummies_2.astype('int')
dummies_2

In [124]:
concept_coded_names=pd.read_csv("concept_coded_name.csv")
value_coded=concept_coded_names["Code"].unique().tolist()
name_coded=concept_coded_names["Name"].unique().tolist()
#len(value_coded)
for i in range(len(value_coded)):
    dummies_2=dummies_2.replace(value_coded[i],name_coded[i])
dummies_2=dummies_2.replace(3178353473,np.nan)
dummies_2

Unnamed: 0,1520187,1520206,1520214,1520215,1520234,1520256,1520261,1520274,1520276,1520287,...,1520778,1520779,1520781,1520782,1520784,1520785,1520786,1520787,1520789,1520792
0,,,,,,,,,,,...,Venipuncture,,,,,,,,,
1,,,,,,,Clear,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80737,,,,,,,,,,,...,,,,,,,,,,
80738,,,,,,,,,,,...,,,,,,,,,,
80739,,,,,,,,,,,...,Venipuncture,,,,,,,,,
80740,,,,,,,,,,,...,,,,,,,,,,


### Taking the patient_id column only to merge with dummies dataframe

In [125]:
data_id_only=dataset_feb13_2[['patient_id']]
dataframe_coded=pd.concat([data_id_only,dummies_2],axis=1)
dataframe_coded=dataframe_coded.dropna(axis=1,how='all')
dataframe_coded

Unnamed: 0,patient_id,1520187,1520206,1520214,1520215,1520234,1520256,1520261,1520274,1520276,...,1520778,1520779,1520781,1520782,1520784,1520785,1520786,1520787,1520789,1520792
0,16359994,,,,,,,,,,...,Venipuncture,,,,,,,,,
1,16360107,,,,,,,Clear,,,...,,,,,,,,,,
2,16360107,,,,,,,,,,...,,,,,,,,,,
3,16360107,,,,,,,,,,...,,,,,,,,,,
4,16360107,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80737,19998497,,,,,,,,,,...,,,,,,,,,,
80738,19999625,,,,,,,,,,...,,,,,,,,,,
80739,19999625,,,,,,,,,,...,Venipuncture,,,,,,,,,
80740,19999784,,,,,,,,,,...,,,,,,,,,,


### Now if there are multiple rows for one patient, we are squeezing the dataframe to reduce the total number of rows(and making it equal to the number of patients) by taking the mode of the column values for individual patient

In [None]:
#dataframe_coded.groupby(['patient_id']).agg(pd.Series.mode).to_frame()
#dataframe_coded.groupby('patient_id').agg(lambda x: x.value_counts().index[0])
list_concepts=dataframe_coded.columns.tolist()
list_concepts.pop(0)
df=pd.DataFrame()
df["patient_id"]=dataframe_coded["patient_id"].unique()
#df
for i in range(len(list_concepts)):
    print(i)
    df1=dataframe_coded.groupby(['patient_id'])[list_concepts[i]].agg(pd.Series.mode).to_frame()
    df=pd.merge(df,df1,on="patient_id")
    
df = df.astype(str)
df=df.replace('[]',np.nan)
squeezed_coded=df

## Now merging these two dataframes

In [382]:
# squeezed_coded=squeezed_coded.dropna(axis=1, how='all')
w=squeezed_coded
#identify all categorical variables
cat_columns = w.select_dtypes(['object']).columns

#convert all categorical variables to numeric
w[cat_columns] = w[cat_columns].apply(lambda x: pd.factorize(x)[0])
w
squeezed_coded=w

In [383]:
# final=pd.merge(squeezed_numeric, squeezed_coded, on="patient_id")
# final
data_numeric=squeezed_numeric.set_index('patient_id')
data_numeric
data_coded=squeezed_coded.set_index('patient_id')
data_coded

Unnamed: 0_level_0,1520187,1520206,1520214,1520215,1520234,1520256,1520261,1520274,1520276,1520287,...,1520778,1520779,1520781,1520782,1520784,1520785,1520786,1520787,1520789,1520792
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16359994,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,0,-1,-1,-1,-1,-1,-1,-1,-1,-1
16360107,-1,0,-1,-1,-1,-1,0,-1,-1,-1,...,1,-1,-1,-1,-1,-1,0,-1,0,-1
16360590,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,2,-1,-1,-1,-1,-1,-1,-1,-1,-1
16360714,-1,0,-1,-1,-1,-1,0,-1,-1,-1,...,2,-1,-1,-1,-1,-1,-1,-1,-1,-1
16361265,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,0,-1,-1,-1,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19997887,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
19997922,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,0,-1,1,-1
19998497,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
19999625,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,0,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [384]:
#pd.merge(data_numeric, data_coded,left_index=True, right_index=True)
df2 = pd.concat([data_numeric, data_coded], axis=1)
df2.reset_index(inplace=True)
df2

Unnamed: 0,patient_id,1482681,1520176,1520177,1520178,1520179,1520180,1520181,1520182,1520183,...,1520778,1520779,1520781,1520782,1520784,1520785,1520786,1520787,1520789,1520792
0,16359994,0.883785,1.550000,78.500000,117.0,129.0,,,,16.100000,...,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,16360107,2.967860,2.200000,92.000000,38.4,,2454.0,,7.266667,15.500000,...,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0
2,16360581,,,96.000000,,,,,,11.500000,...,,,,,,,,,,
3,16360590,2.304110,,84.500000,,,,,,11.000000,...,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,16360714,,,90.000000,,,,,,11.857143,...,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,19998350,,2.100000,81.666667,,32.0,,,,13.333333,...,,,,,,,,,,
9996,19998497,,1.500000,91.100000,24.0,199.5,,,,15.600000,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
9997,19999379,,,92.000000,,,,,,13.000000,...,,,,,,,,,,
9998,19999625,0.960741,0.100000,85.666667,65.0,,,,,11.833333,...,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


## MAking the target variable dataframe

In [388]:
dataset_feb13=dataset_feb13.astype(str)
dataset_feb13_dia=dataset_feb13
dataset_feb13_dia=dataset_feb13_dia.replace({'1385187':1,'1385109':2,'1384870':3,'1384841':4,'1384995':5,'1397715':6,'1384886':7,'1474651':8,'1385083':9,'1385121':10})
dataset_feb13_dia["diagnosis_coded"] = dataset_feb13_dia["diagnosis_coded"].astype("category")
dataset_feb13_dia['diagnosis_coded'].dtypes
dummies_3 = pd.get_dummies(dataset_feb13_dia['diagnosis_coded'])
#dummies_3=dummies_3.replace(0,np.nan)
dummies_3=dummies_3.astype(float)
dummies_3.values[dummies_3 == 1.0] = dataset_feb13_dia['diagnosis_coded']
# dummies_3=dummies_2.fillna(3178353473).astype(int)
dummies_3 = dummies_3.astype('int')
dummies_3

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
0,0,0,0,0,0,0,0,0,9,0
1,0,2,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,10
3,0,0,0,0,0,0,0,8,0,0
4,0,0,0,0,0,0,7,0,0,0
...,...,...,...,...,...,...,...,...,...,...
13016,0,0,0,0,0,6,0,0,0,0
13017,0,0,0,0,0,6,0,0,0,0
13018,0,0,3,0,0,0,0,0,0,0
13019,0,0,0,0,0,0,0,0,0,10


In [389]:
data_id_only=dataset_feb13[['patient_id']]
dataframe_coded_3=pd.concat([data_id_only,dummies_3],axis=1)
dataframe_coded_3=dataframe_coded_3.dropna(axis=1,how='all')
dataframe_coded_3

Unnamed: 0,patient_id,1,2,3,4,5,6,7,8,9,10
0,16359994,0,0,0,0,0,0,0,0,9,0
1,16360107,0,2,0,0,0,0,0,0,0,0
2,16360107,0,0,0,0,0,0,0,0,0,10
3,16360107,0,0,0,0,0,0,0,8,0,0
4,16360581,0,0,0,0,0,0,7,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
13016,19998497,0,0,0,0,0,6,0,0,0,0
13017,19999379,0,0,0,0,0,6,0,0,0,0
13018,19999379,0,0,3,0,0,0,0,0,0,0
13019,19999625,0,0,0,0,0,0,0,0,0,10


In [390]:
squeezed_diagnosis= dataframe_coded_3.groupby('patient_id').agg('max')
squeezed_diagnosis=squeezed_diagnosis.reset_index()
squeezed_diagnosis

Unnamed: 0,patient_id,1,2,3,4,5,6,7,8,9,10
0,16359994,0,0,0,0,0,0,0,0,9,0
1,16360107,0,2,0,0,0,0,0,8,0,10
2,16360581,0,0,0,0,0,0,7,0,0,0
3,16360590,0,0,3,0,0,0,0,0,0,0
4,16360714,0,0,0,0,0,0,0,0,0,10
...,...,...,...,...,...,...,...,...,...,...,...
9995,19998350,0,2,0,0,0,0,0,0,0,0
9996,19998497,0,2,3,0,0,6,0,0,0,0
9997,19999379,0,0,3,0,0,6,0,0,0,0
9998,19999625,0,0,0,0,0,0,0,0,0,10


In [394]:
squeezed_diagnosis["patient_id"]=squeezed_diagnosis["patient_id"].astype(int)

In [395]:
merged=pd.merge(df2,squeezed_diagnosis,on="patient_id")
merged

Unnamed: 0,patient_id,1482681,1520176,1520177,1520178,1520179,1520180,1520181,1520182,1520183,...,1,2,3,4,5,6,7,8,9,10
0,16359994,0.883785,1.550000,78.500000,117.0,129.0,,,,16.100000,...,0,0,0,0,0,0,0,0,9,0
1,16360107,2.967860,2.200000,92.000000,38.4,,2454.0,,7.266667,15.500000,...,0,2,0,0,0,0,0,8,0,10
2,16360581,,,96.000000,,,,,,11.500000,...,0,0,0,0,0,0,7,0,0,0
3,16360590,2.304110,,84.500000,,,,,,11.000000,...,0,0,3,0,0,0,0,0,0,0
4,16360714,,,90.000000,,,,,,11.857143,...,0,0,0,0,0,0,0,0,0,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,19998350,,2.100000,81.666667,,32.0,,,,13.333333,...,0,2,0,0,0,0,0,0,0,0
9996,19998497,,1.500000,91.100000,24.0,199.5,,,,15.600000,...,0,2,3,0,0,6,0,0,0,0
9997,19999379,,,92.000000,,,,,,13.000000,...,0,0,3,0,0,6,0,0,0,0
9998,19999625,0.960741,0.100000,85.666667,65.0,,,,,11.833333,...,0,0,0,0,0,0,0,0,0,10


In [None]:
n_columns=len(merged.columns)

In [484]:
%%time
list_patients=merged["patient_id"].unique()

list_original=[]
list_prediction=[]
list_prediction_with_zero=[]
list_original_with_zero=[]

for i in range(len(list_patients)):
    if i>299: #doing for only first 300 patients for now
        break
    print("Patient:",i+1)
    patient_id=list_patients[i]
    test_set=merged.query("patient_id == @patient_id")
    training_set=merged.query("patient_id != @patient_id")
    training_set=training_set.reset_index()
    training_set=training_set.drop(['index'], axis=1)

    y_train=training_set.iloc[:,n_columns-10:n_columns]
    y_train=y_train.values
    #y_train.shape

    y_test=test_set.iloc[:,n_columns-10:n_columns]
    y_test=y_test.values
    #y_test.shape


    X_train=training_set.iloc[:,1:n_columns-10]
    X_train=X_train.values
    a=X_train
    X_train=np.where(pd.isnull(a), ma.array(a, mask=pd.isnull(a)).mean(axis=0), a)  
    #X_train.shape


    X_test=test_set.iloc[:,1:n_columns-10]
    X_test=X_test.values
    a=X_test
    X_test=np.where(pd.isnull(a), ma.array(a, mask=pd.isnull(a)).mean(axis=0), a)  
    #X_test.shape
    
    #oversample = SMOTE()
    #X_train_res, y_train_res = oversample.fit_resample(X_train, y_train)

    # Feature Scaling 
    sc_X = StandardScaler()
    X_train = sc_X.fit_transform(X_train)
    X_test = sc_X.transform(X_test)


    n_samples, n_features = X_train.shape # 10000,605
    n_outputs = y_train.shape[1] # 10
    n_classes = 11

    forest = RandomForestClassifier(random_state=1)
    multi_target_forest = MultiOutputClassifier(forest, n_jobs=2)
    prediction=multi_target_forest.fit(X_train, y_train).predict(X_test)

    for i in range(y_test.shape[1]):
        list_prediction_with_zero.append(prediction[0][i])
        list_original_with_zero.append(y_test[0][i])
        if y_test[0][i]!=0:
            list_original.append(y_test[0][i])
            list_prediction.append(prediction[0][i])

Patient: 1
Patient: 2
Patient: 3
Patient: 4
Patient: 5
Patient: 6
Patient: 7
Patient: 8
Patient: 9
Patient: 10
Patient: 11
Patient: 12
Patient: 13
Patient: 14
Patient: 15
Patient: 22
Patient: 23
Patient: 24
Patient: 25
Patient: 26
Patient: 27
Patient: 28
Patient: 29
Patient: 30
Patient: 31
Patient: 32
Patient: 33
Patient: 34
Patient: 35
Patient: 36
Patient: 37
Patient: 38
Patient: 39
Patient: 40
Patient: 41
Patient: 42
Patient: 43
Patient: 44
Patient: 45
Patient: 46
Patient: 47
Patient: 48
Patient: 49
Patient: 50
Patient: 51
Patient: 52
Patient: 53
Patient: 54
Patient: 55
Patient: 56
Patient: 57
Patient: 58
Patient: 59
Patient: 60
Patient: 61
Patient: 62
Patient: 63
Patient: 64
Patient: 65
Patient: 66
Patient: 67
Patient: 68
Patient: 69
Patient: 70
Patient: 71
Patient: 72
Patient: 73
Patient: 74
Patient: 75
Patient: 76
Patient: 77
Patient: 78
Patient: 79
Patient: 80
Patient: 81
Patient: 82
Patient: 83
Patient: 84
Patient: 85
Patient: 86
Patient: 87
Patient: 88
Patient: 89
Patient: 90
P

In [485]:
print(list_original)
print(list_prediction)

[9, 2, 8, 10, 7, 3, 10, 10, 1, 4, 5, 7, 4, 1, 5, 1, 1, 4, 1, 9, 3, 3, 6, 5, 3, 6, 1, 7, 10, 7, 3, 6, 6, 1, 4, 4, 3, 6, 2, 1, 7, 1, 8, 7, 3, 6, 6, 2, 7, 2, 1, 4, 6, 3, 2, 1, 3, 6, 4, 7, 9, 7, 1, 3, 10, 3, 7, 1, 7, 2, 10, 3, 4, 8, 1, 2, 1, 2, 8, 4, 1, 4, 10, 1, 8, 10, 9, 4, 4, 2, 3, 3, 6, 1, 2, 6, 4, 4, 9, 4, 8, 4, 2, 7, 7, 10, 10, 1, 7, 2, 3, 6, 3, 9, 2, 6, 9, 3, 10, 1, 3, 4, 6, 2, 1, 3, 9, 7, 8, 8, 5, 1, 2, 3, 6, 6, 9, 4, 2, 4, 5, 10, 6, 1, 7, 3, 6, 1, 5, 4, 2, 2, 7, 8, 9, 2, 5, 10, 5, 9, 2, 2, 4, 10, 1, 9, 1, 3, 6, 3, 6, 9, 2, 1, 7, 1, 8, 5, 3, 3, 3, 4, 4, 3, 3, 8, 4, 7, 10, 2, 3, 6, 4, 2, 1, 2, 1, 8, 9, 2, 9, 10, 7, 4, 1, 4, 7, 10, 4, 2, 5, 4, 1, 2, 3, 4, 6, 8, 1, 9, 10, 9, 9, 6, 9, 4, 5, 2, 3, 2, 8, 3, 4, 7, 9, 10, 9, 3, 3, 10, 2, 5, 2, 4, 4, 1, 2, 8, 7, 10, 10, 6, 2, 4, 7, 9, 10, 2, 8, 2, 6, 7, 2, 1, 7, 9, 7, 2, 4, 3, 2, 10, 5, 1, 3, 6, 3, 2, 8, 9, 1, 4, 5, 9, 2, 8, 4, 8, 1, 1, 1, 2, 8, 3, 4, 10, 2, 10, 8, 9, 5, 9, 4, 7, 5, 6, 8, 7, 1, 2, 9, 7, 7, 1, 8, 5, 1, 3, 6, 4, 10, 3, 3, 4, 

In [486]:
# Y=Y.values
# prediction=prediction.values
report = classification_report(list_original,list_prediction, output_dict=True)
df_report = pd.DataFrame(report).transpose()
df_report

Unnamed: 0,precision,recall,f1-score,support
0,0.0,0.0,0.0,0.0
1,1.0,0.910714,0.953271,56.0
2,1.0,1.0,1.0,51.0
3,1.0,0.285714,0.444444,49.0
4,1.0,0.74,0.850575,50.0
5,1.0,0.826087,0.904762,23.0
6,1.0,0.5,0.666667,34.0
7,1.0,0.975,0.987342,40.0
8,1.0,0.96,0.979592,25.0
9,1.0,1.0,1.0,36.0
