In [1]:
import datetime as dt
import os
import sys

import numpy as np
import pandas as pd
from scipy import interp
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, auc, confusion_matrix, roc_curve, average_precision_score, precision_recall_curve
from sklearn.model_selection import StratifiedKFold
import sqlalchemy as sa
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
%matplotlib inline

sys.path.append('../')
from utilities import sql_utils as su
from utilities import model_eval_utils as meu

DWH = os.getenv('MIMIC_DWH')
engine = create_engine(DWH)

pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.set_option('display.float_format', lambda x: '%.3f' % x)

  """)


## Testing One Hot Encoding
http://www.insightsbot.com/blog/McTKK/python-one-hot-encoding-with-scikit-learn

Let's say the third car has two colors

In [2]:
df = pd.DataFrame([
       ['green', 'Chevrolet', 2017],
       ['blue', 'BMW', 2015], 
       ['yellow', 'Lexus', 2018],
       ['blue', 'Lexus', 2018],
])
df.columns = ['color', 'make', 'year']
df['ID'] = [1, 2, 3, 3]

In [3]:
df.head()

Unnamed: 0,color,make,year,ID
0,green,Chevrolet,2017,1
1,blue,BMW,2015,2
2,yellow,Lexus,2018,3
3,blue,Lexus,2018,3


Note that the One Hot Encoder from SKLearn requires *numerical* labels. Since we currently have text, we have to convert into numerical label first.

This is what `LabelEncoder` does.

In [4]:
from sklearn.preprocessing import LabelEncoder
le_color = LabelEncoder()
le_make = LabelEncoder()
df['color_encoded'] = le_color.fit_transform(df.color)
df['make_encoded'] = le_make.fit_transform(df.make)

In [5]:
df.head()

Unnamed: 0,color,make,year,ID,color_encoded,make_encoded
0,green,Chevrolet,2017,1,1,1
1,blue,BMW,2015,2,0,0
2,yellow,Lexus,2018,3,2,2
3,blue,Lexus,2018,3,0,2


Now that we have numerical values, we can utilize the `OneHotEncoder` class of SciKitLearn

In [6]:
from sklearn.preprocessing import OneHotEncoder
color_ohe = OneHotEncoder()
make_ohe = OneHotEncoder()
color_ohe_array = color_ohe.fit_transform(df.color_encoded.values.reshape(-1,1)).toarray()
make_ohe_array = make_ohe.fit_transform(df.make_encoded.values.reshape(-1,1)).toarray()

In [7]:
color_ohe_array

array([[0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.]])

In [8]:
make_ohe_array

array([[0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.]])

Back to Pandas

In [9]:
dfOneHot = pd.DataFrame(color_ohe_array,
                        columns = ["Color_" + str(int(i)) for i in range(color_ohe_array.shape[1])])
df_with_one_hot = pd.concat([df, dfOneHot], axis=1)

dfOneHot = pd.DataFrame(make_ohe_array, columns = ["Make" + str(int(i)) for i in range(make_ohe_array.shape[1])])
df_with_one_hot = pd.concat([df_with_one_hot, dfOneHot], axis=1)

In [10]:
df_with_one_hot.head()

Unnamed: 0,color,make,year,ID,color_encoded,make_encoded,Color_0,Color_1,Color_2,Make0,Make1,Make2
0,green,Chevrolet,2017,1,1,1,0.0,1.0,0.0,0.0,1.0,0.0
1,blue,BMW,2015,2,0,0,1.0,0.0,0.0,1.0,0.0,0.0
2,yellow,Lexus,2018,3,2,2,0.0,0.0,1.0,0.0,0.0,1.0
3,blue,Lexus,2018,3,0,2,1.0,0.0,0.0,0.0,0.0,1.0


## Alternative Method using `Label Binarizer`

In [11]:
from sklearn.preprocessing import LabelBinarizer
color_lb = LabelBinarizer()
make_lb = LabelBinarizer()
X = color_lb.fit_transform(df.color.values)
Xm = make_lb.fit_transform(df.make.values)

In [12]:
df.head()

Unnamed: 0,color,make,year,ID,color_encoded,make_encoded
0,green,Chevrolet,2017,1,1,1
1,blue,BMW,2015,2,0,0
2,yellow,Lexus,2018,3,2,2
3,blue,Lexus,2018,3,0,2


In [13]:
X

array([[0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0]])

In [14]:
Xm

array([[0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 1]])

In [15]:
dfOneHot = pd.DataFrame(X,
                        columns = ["Color_" + str(int(i)) for i in range(X.shape[1])])
df_with_one_hot_new = pd.concat([df, dfOneHot], axis=1)

dfOneHot = pd.DataFrame(Xm, columns = ["Make" + str(int(i)) for i in range(Xm.shape[1])])
df_with_one_hot_new = pd.concat([df_with_one_hot_new, dfOneHot], axis=1)

In [16]:
df_with_one_hot_new.head()

Unnamed: 0,color,make,year,ID,color_encoded,make_encoded,Color_0,Color_1,Color_2,Make0,Make1,Make2
0,green,Chevrolet,2017,1,1,1,0,1,0,0,1,0
1,blue,BMW,2015,2,0,0,1,0,0,1,0,0
2,yellow,Lexus,2018,3,2,2,0,0,1,0,0,1
3,blue,Lexus,2018,3,0,2,1,0,0,0,0,1


In [17]:
X[[0]]

array([[0, 1, 0]])

In [18]:
green_ohe = X[[0]]

In [19]:
color_lb.inverse_transform(green_ohe)

array(['green'], dtype='<U6')

In [20]:
color_lb.inverse_transform(np.array([[1, 0, 0]]))

array(['blue'], dtype='<U6')

In [21]:
color_lb.inverse_transform(np.array([[0, 0, 1]]))

array(['yellow'], dtype='<U6')

### Group by ID to collapse

In [22]:
df_with_one_hot_new = df_with_one_hot_new.groupby('ID').max()
df_with_one_hot_new.reset_index(inplace=True)

In [23]:
df_with_one_hot_new.head()

Unnamed: 0,ID,color,make,year,color_encoded,make_encoded,Color_0,Color_1,Color_2,Make0,Make1,Make2
0,1,green,Chevrolet,2017,1,1,0,1,0,0,1,0
1,2,blue,BMW,2015,0,0,1,0,0,1,0,0
2,3,yellow,Lexus,2018,2,2,1,0,1,0,0,1


## Using `MultiLabelBinarizer`
https://stackoverflow.com/questions/46791626/one-hot-encoding-multi-level-column-data

In [24]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
# df = pd.DataFrame(mlb.fit_transform(df['color']),
#                   columns=['COLOR_' + x for x in mlb.classes_], 
#                   index=df.ID).max(level=0)

In [25]:
# X = mlb.fit_transform(df['color'].values)

In [26]:
# X

## Now trying over Real Data

In [27]:
QUERY = """
select
  hadm_id,
  ccs_category_description
from datasets.admissions_diagnoses_icd_ccs_mapping
where ccs_category_description is not null
"""
with engine.begin() as conn:
    df = pd.read_sql(QUERY, conn)

In [28]:
df.head()

Unnamed: 0,hadm_id,ccs_category_description
0,172335,Htn complicn
1,172335,Pneumonia
2,172335,Nephritis
3,172335,Chr kidney disease
4,172335,Carditis


In [29]:
from sklearn.preprocessing import LabelBinarizer
ccs_lb = LabelBinarizer()
X = ccs_lb.fit_transform(df.ccs_category_description.values)

In [30]:
adm_dx_one_hot = pd.DataFrame(X,
                              columns = ["ccs_" + str(int(i)) for i in range(X.shape[1])])
df_adm_dx_one_hot = pd.concat([df, adm_dx_one_hot], axis=1)

In [31]:
df_adm_dx_one_hot.head()

Unnamed: 0,hadm_id,ccs_category_description,ccs_0,ccs_1,ccs_2,ccs_3,ccs_4,ccs_5,ccs_6,ccs_7,ccs_8,ccs_9,ccs_10,ccs_11,ccs_12,ccs_13,ccs_14,ccs_15,ccs_16,ccs_17,ccs_18,ccs_19,ccs_20,ccs_21,ccs_22,ccs_23,ccs_24,ccs_25,ccs_26,ccs_27,ccs_28,ccs_29,ccs_30,ccs_31,ccs_32,ccs_33,ccs_34,ccs_35,ccs_36,ccs_37,ccs_38,ccs_39,ccs_40,ccs_41,ccs_42,ccs_43,ccs_44,ccs_45,ccs_46,ccs_47,ccs_48,ccs_49,ccs_50,ccs_51,ccs_52,ccs_53,ccs_54,ccs_55,ccs_56,ccs_57,ccs_58,ccs_59,ccs_60,ccs_61,ccs_62,ccs_63,ccs_64,ccs_65,ccs_66,ccs_67,ccs_68,ccs_69,ccs_70,ccs_71,ccs_72,ccs_73,ccs_74,ccs_75,ccs_76,ccs_77,ccs_78,ccs_79,ccs_80,ccs_81,ccs_82,ccs_83,ccs_84,ccs_85,ccs_86,ccs_87,ccs_88,ccs_89,ccs_90,ccs_91,ccs_92,ccs_93,ccs_94,ccs_95,ccs_96,ccs_97,ccs_98,ccs_99,ccs_100,ccs_101,ccs_102,ccs_103,ccs_104,ccs_105,ccs_106,ccs_107,ccs_108,ccs_109,ccs_110,ccs_111,ccs_112,ccs_113,ccs_114,ccs_115,ccs_116,ccs_117,ccs_118,ccs_119,ccs_120,ccs_121,ccs_122,ccs_123,ccs_124,ccs_125,ccs_126,ccs_127,ccs_128,ccs_129,ccs_130,ccs_131,ccs_132,ccs_133,ccs_134,ccs_135,ccs_136,ccs_137,ccs_138,ccs_139,ccs_140,ccs_141,ccs_142,ccs_143,ccs_144,ccs_145,ccs_146,ccs_147,ccs_148,ccs_149,ccs_150,ccs_151,ccs_152,ccs_153,ccs_154,ccs_155,ccs_156,ccs_157,ccs_158,ccs_159,ccs_160,ccs_161,ccs_162,ccs_163,ccs_164,ccs_165,ccs_166,ccs_167,ccs_168,ccs_169,ccs_170,ccs_171,ccs_172,ccs_173,ccs_174,ccs_175,ccs_176,ccs_177,ccs_178,ccs_179,ccs_180,ccs_181,ccs_182,ccs_183,ccs_184,ccs_185,ccs_186,ccs_187,ccs_188,ccs_189,ccs_190,ccs_191,ccs_192,ccs_193,ccs_194,ccs_195,ccs_196,ccs_197,ccs_198,ccs_199,ccs_200,ccs_201,ccs_202,ccs_203,ccs_204,ccs_205,ccs_206,ccs_207,ccs_208,ccs_209,ccs_210,ccs_211,ccs_212,ccs_213,ccs_214,ccs_215,ccs_216,ccs_217,ccs_218,ccs_219,ccs_220,ccs_221,ccs_222,ccs_223,ccs_224,ccs_225,ccs_226,ccs_227,ccs_228,ccs_229,ccs_230,ccs_231,ccs_232,ccs_233,ccs_234,ccs_235,ccs_236,ccs_237,ccs_238,ccs_239,ccs_240,ccs_241,ccs_242,ccs_243,ccs_244,ccs_245,ccs_246,ccs_247,ccs_248,ccs_249,ccs_250,ccs_251,ccs_252,ccs_253,ccs_254,ccs_255,ccs_256,ccs_257,ccs_258,ccs_259,ccs_260,ccs_261,ccs_262,ccs_263,ccs_264,ccs_265,ccs_266,ccs_267,ccs_268,ccs_269,ccs_270,ccs_271,ccs_272,ccs_273,ccs_274,ccs_275,ccs_276,ccs_277,ccs_278,ccs_279,ccs_280,ccs_281
0,172335,Htn complicn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,172335,Pneumonia,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,172335,Nephritis,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,172335,Chr kidney disease,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,172335,Carditis,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [32]:
df_adm_dx_ohe = df_adm_dx_one_hot.groupby('hadm_id').max()
df_adm_dx_ohe.reset_index(inplace=True)

In [33]:
df_adm_dx_ohe.head()

Unnamed: 0,hadm_id,ccs_category_description,ccs_0,ccs_1,ccs_2,ccs_3,ccs_4,ccs_5,ccs_6,ccs_7,ccs_8,ccs_9,ccs_10,ccs_11,ccs_12,ccs_13,ccs_14,ccs_15,ccs_16,ccs_17,ccs_18,ccs_19,ccs_20,ccs_21,ccs_22,ccs_23,ccs_24,ccs_25,ccs_26,ccs_27,ccs_28,ccs_29,ccs_30,ccs_31,ccs_32,ccs_33,ccs_34,ccs_35,ccs_36,ccs_37,ccs_38,ccs_39,ccs_40,ccs_41,ccs_42,ccs_43,ccs_44,ccs_45,ccs_46,ccs_47,ccs_48,ccs_49,ccs_50,ccs_51,ccs_52,ccs_53,ccs_54,ccs_55,ccs_56,ccs_57,ccs_58,ccs_59,ccs_60,ccs_61,ccs_62,ccs_63,ccs_64,ccs_65,ccs_66,ccs_67,ccs_68,ccs_69,ccs_70,ccs_71,ccs_72,ccs_73,ccs_74,ccs_75,ccs_76,ccs_77,ccs_78,ccs_79,ccs_80,ccs_81,ccs_82,ccs_83,ccs_84,ccs_85,ccs_86,ccs_87,ccs_88,ccs_89,ccs_90,ccs_91,ccs_92,ccs_93,ccs_94,ccs_95,ccs_96,ccs_97,ccs_98,ccs_99,ccs_100,ccs_101,ccs_102,ccs_103,ccs_104,ccs_105,ccs_106,ccs_107,ccs_108,ccs_109,ccs_110,ccs_111,ccs_112,ccs_113,ccs_114,ccs_115,ccs_116,ccs_117,ccs_118,ccs_119,ccs_120,ccs_121,ccs_122,ccs_123,ccs_124,ccs_125,ccs_126,ccs_127,ccs_128,ccs_129,ccs_130,ccs_131,ccs_132,ccs_133,ccs_134,ccs_135,ccs_136,ccs_137,ccs_138,ccs_139,ccs_140,ccs_141,ccs_142,ccs_143,ccs_144,ccs_145,ccs_146,ccs_147,ccs_148,ccs_149,ccs_150,ccs_151,ccs_152,ccs_153,ccs_154,ccs_155,ccs_156,ccs_157,ccs_158,ccs_159,ccs_160,ccs_161,ccs_162,ccs_163,ccs_164,ccs_165,ccs_166,ccs_167,ccs_168,ccs_169,ccs_170,ccs_171,ccs_172,ccs_173,ccs_174,ccs_175,ccs_176,ccs_177,ccs_178,ccs_179,ccs_180,ccs_181,ccs_182,ccs_183,ccs_184,ccs_185,ccs_186,ccs_187,ccs_188,ccs_189,ccs_190,ccs_191,ccs_192,ccs_193,ccs_194,ccs_195,ccs_196,ccs_197,ccs_198,ccs_199,ccs_200,ccs_201,ccs_202,ccs_203,ccs_204,ccs_205,ccs_206,ccs_207,ccs_208,ccs_209,ccs_210,ccs_211,ccs_212,ccs_213,ccs_214,ccs_215,ccs_216,ccs_217,ccs_218,ccs_219,ccs_220,ccs_221,ccs_222,ccs_223,ccs_224,ccs_225,ccs_226,ccs_227,ccs_228,ccs_229,ccs_230,ccs_231,ccs_232,ccs_233,ccs_234,ccs_235,ccs_236,ccs_237,ccs_238,ccs_239,ccs_240,ccs_241,ccs_242,ccs_243,ccs_244,ccs_245,ccs_246,ccs_247,ccs_248,ccs_249,ccs_250,ccs_251,ccs_252,ccs_253,ccs_254,ccs_255,ccs_256,ccs_257,ccs_258,ccs_259,ccs_260,ccs_261,ccs_262,ccs_263,ccs_264,ccs_265,ccs_266,ccs_267,ccs_268,ccs_269,ccs_270,ccs_271,ccs_272,ccs_273,ccs_274,ccs_275,ccs_276,ccs_277,ccs_278,ccs_279,ccs_280,ccs_281
0,100001,Ulcer skin,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,100003,Unclassified,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,100006,Screening and history of mental health an,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,100007,Pneumonia,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,100009,Unclassified,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


#### Going Backwards

In [45]:
checklist = [0 for i in range(X.shape[1])]

In [46]:
len(checklist)

282

In [47]:
checklist[5] = 1

In [60]:
ccs_lb.inverse_transform(np.array([checklist]))

array(['Acq foot def'], dtype='<U45')