In [1]:
%matplotlib inline

In [116]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pickle

from sklearn.datasets import fetch_california_housing, load_digits

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder, FunctionTransformer, LabelEncoder, PolynomialFeatures
from sklearn.linear_model import LinearRegression, LogisticRegression, RANSACRegressor

# Model Training and Improvement

In [104]:
EPS = 1e-10

dataset: https://www.kaggle.com/datasets/brandao/diabetes

In [4]:
diabetic_data = pd.read_csv("diabetic_data.csv")
diabetic_data

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,...,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,...,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),?,1,1,7,2,...,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),?,1,1,7,2,...,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),?,1,1,7,1,...,No,Steady,No,No,No,No,No,Ch,Yes,NO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101761,443847548,100162476,AfricanAmerican,Male,[70-80),?,1,3,7,3,...,No,Down,No,No,No,No,No,Ch,Yes,>30
101762,443847782,74694222,AfricanAmerican,Female,[80-90),?,1,4,5,5,...,No,Steady,No,No,No,No,No,No,Yes,NO
101763,443854148,41088789,Caucasian,Male,[70-80),?,1,1,7,1,...,No,Down,No,No,No,No,No,Ch,Yes,NO
101764,443857166,31693671,Caucasian,Female,[80-90),?,2,3,7,10,...,No,Up,No,No,No,No,No,Ch,Yes,NO


In [5]:
diabetic_data.columns

Index(['encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight',
       'admission_type_id', 'discharge_disposition_id', 'admission_source_id',
       'time_in_hospital', 'payer_code', 'medical_specialty',
       'num_lab_procedures', 'num_procedures', 'num_medications',
       'number_outpatient', 'number_emergency', 'number_inpatient', 'diag_1',
       'diag_2', 'diag_3', 'number_diagnoses', 'max_glu_serum', 'A1Cresult',
       'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
       'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed', 'readmitted'],
      dtype='object')

In [7]:
diabetic_data.columns[diabetic_data.columns.str.contains("_id")]

Index(['encounter_id', 'admission_type_id', 'discharge_disposition_id',
       'admission_source_id'],
      dtype='object')

In [9]:
diabetic_data.pioglitazone.unique()

array(['No', 'Steady', 'Up', 'Down'], dtype=object)

In [11]:
diabetic_data.dtypes

encounter_id                 int64
patient_nbr                  int64
race                        object
gender                      object
age                         object
weight                      object
admission_type_id            int64
discharge_disposition_id     int64
admission_source_id          int64
time_in_hospital             int64
payer_code                  object
medical_specialty           object
num_lab_procedures           int64
num_procedures               int64
num_medications              int64
number_outpatient            int64
number_emergency             int64
number_inpatient             int64
diag_1                      object
diag_2                      object
diag_3                      object
number_diagnoses             int64
max_glu_serum               object
A1Cresult                   object
metformin                   object
repaglinide                 object
nateglinide                 object
chlorpropamide              object
glimepiride         

In [15]:
pd.get_dummies(diabetic_data, drop_first = True)

Unnamed: 0,encounter_id,patient_nbr,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,...,glyburide-metformin_Steady,glyburide-metformin_Up,glipizide-metformin_Steady,glimepiride-pioglitazone_Steady,metformin-rosiglitazone_Steady,metformin-pioglitazone_Steady,change_No,diabetesMed_Yes,readmitted_>30,readmitted_NO
0,2278392,8222157,6,25,1,1,41,0,1,0,...,False,False,False,False,False,False,True,False,False,True
1,149190,55629189,1,1,7,3,59,0,18,0,...,False,False,False,False,False,False,False,True,True,False
2,64410,86047875,1,1,7,2,11,5,13,2,...,False,False,False,False,False,False,True,True,False,True
3,500364,82442376,1,1,7,2,44,1,16,0,...,False,False,False,False,False,False,False,True,False,True
4,16680,42519267,1,1,7,1,51,0,8,0,...,False,False,False,False,False,False,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101761,443847548,100162476,1,3,7,3,51,0,16,0,...,False,False,False,False,False,False,False,True,True,False
101762,443847782,74694222,1,4,5,5,33,3,18,0,...,False,False,False,False,False,False,True,True,False,True
101763,443854148,41088789,1,1,7,1,53,0,9,1,...,False,False,False,False,False,False,False,True,False,True
101764,443857166,31693671,2,3,7,10,45,2,21,0,...,False,False,False,False,False,False,False,True,False,True


In [16]:
pd.get_dummies(diabetic_data, drop_first = True).dtypes

encounter_id                     int64
patient_nbr                      int64
admission_type_id                int64
discharge_disposition_id         int64
admission_source_id              int64
                                 ...  
metformin-pioglitazone_Steady     bool
change_No                         bool
diabetesMed_Yes                   bool
readmitted_>30                    bool
readmitted_NO                     bool
Length: 2436, dtype: object

In [18]:
diabetes_attributes = diabetic_data.drop(columns = 'readmitted')
target = diabetic_data.readmitted

In [20]:
diabetes_attributes_dummies = pd.get_dummies(diabetes_attributes)

In [21]:
logistic_regression = LogisticRegression()

In [22]:
logistic_regression.fit(diabetes_attributes_dummies, target)

In [23]:
logistic_regression.coef_

array([[-3.31701395e-09, -3.86905107e-09, -1.15814534e-15, ...,
        -3.26369841e-16, -1.63318872e-16, -3.02903976e-16],
       [-1.50329991e-10,  4.51003945e-09, -1.47130836e-16, ...,
        -1.14848644e-16, -1.11876233e-16,  7.41720589e-17],
       [ 3.46734394e-09, -6.40988385e-10,  1.30527618e-15, ...,
         4.41218484e-16,  2.75195106e-16,  2.28731917e-16]])

In [24]:
scaler = MinMaxScaler()

In [25]:
scaler.fit_transform(diabetes_attributes_dummies)

array([[5.10498143e-03, 4.33874102e-02, 7.14285714e-01, ...,
        1.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       [3.07911576e-04, 2.93553165e-01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [1.16903122e-04, 4.54071832e-01, 0.00000000e+00, ...,
        1.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       ...,
       [9.99970544e-01, 2.16823828e-01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [9.99977344e-01, 1.67246019e-01, 1.42857143e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 9.25735491e-01, 0.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 0.00000000e+00]])

In [26]:
diabetes_attribites_scaled = scaler.fit_transform(diabetes_attributes_dummies)

In [27]:
logistic_regression.fit(diabetes_attribites_scaled, target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [28]:
logistic_regression.score(diabetes_attribites_scaled, target)

0.5951693099856533

In [30]:
logistic_regression.score(diabetes_attributes_dummies, target)



0.5387162706601419

<b>Pipleine</b>

In [33]:
pipeline = Pipeline({
    ('scaler', MinMaxScaler()),
    ('model', LogisticRegression())
})

In [34]:
pipeline

In [35]:
sample_data = diabetic_data.sample(5000, random_state = 42)

In [36]:
pd.get_dummies(sample_data)

Unnamed: 0,encounter_id,patient_nbr,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,...,glimepiride-pioglitazone_No,metformin-rosiglitazone_No,metformin-pioglitazone_No,change_Ch,change_No,diabetesMed_No,diabetesMed_Yes,readmitted_<30,readmitted_>30,readmitted_NO
35956,110939484,19274094,1,1,6,11,68,0,20,0,...,True,True,True,False,True,False,True,False,False,True
60927,170328306,65634327,1,1,1,1,20,0,7,0,...,True,True,True,False,True,False,True,False,False,True
79920,245688426,100657359,3,6,1,4,21,3,23,1,...,True,True,True,False,True,False,True,False,False,True
50078,150826224,83144448,2,1,1,12,28,0,19,0,...,True,True,True,False,True,False,True,False,True,False
44080,135993852,65234214,1,2,7,1,21,0,6,0,...,True,True,True,False,True,False,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35562,110096790,33502212,6,1,7,12,65,2,19,0,...,True,True,True,True,False,False,True,False,False,True
98563,402583472,141357506,7,3,7,6,61,1,14,0,...,True,True,True,False,True,False,True,False,False,True
88066,282442506,45759951,1,1,7,3,28,6,6,0,...,True,True,True,False,True,False,True,False,False,True
55955,161139018,112510251,5,1,1,3,27,0,10,1,...,True,True,True,True,False,False,True,False,False,True


In [38]:
pd.get_dummies(sample_data).dtypes

encounter_id                int64
patient_nbr                 int64
admission_type_id           int64
discharge_disposition_id    int64
admission_source_id         int64
                            ...  
diabetesMed_No               bool
diabetesMed_Yes              bool
readmitted_<30               bool
readmitted_>30               bool
readmitted_NO                bool
Length: 1255, dtype: object

In [37]:
pd.get_dummies(sample_data).astype(int)

Unnamed: 0,encounter_id,patient_nbr,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,...,glimepiride-pioglitazone_No,metformin-rosiglitazone_No,metformin-pioglitazone_No,change_Ch,change_No,diabetesMed_No,diabetesMed_Yes,readmitted_<30,readmitted_>30,readmitted_NO
35956,110939484,19274094,1,1,6,11,68,0,20,0,...,1,1,1,0,1,0,1,0,0,1
60927,170328306,65634327,1,1,1,1,20,0,7,0,...,1,1,1,0,1,0,1,0,0,1
79920,245688426,100657359,3,6,1,4,21,3,23,1,...,1,1,1,0,1,0,1,0,0,1
50078,150826224,83144448,2,1,1,12,28,0,19,0,...,1,1,1,0,1,0,1,0,1,0
44080,135993852,65234214,1,2,7,1,21,0,6,0,...,1,1,1,0,1,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35562,110096790,33502212,6,1,7,12,65,2,19,0,...,1,1,1,1,0,0,1,0,0,1
98563,402583472,141357506,7,3,7,6,61,1,14,0,...,1,1,1,0,1,0,1,0,0,1
88066,282442506,45759951,1,1,7,3,28,6,6,0,...,1,1,1,0,1,0,1,0,0,1
55955,161139018,112510251,5,1,1,3,27,0,10,1,...,1,1,1,1,0,0,1,0,0,1


In [42]:
sample_attributes = sample_data.drop(columns = 'readmitted')
sample_target = sample_data.readmitted

In [44]:
sample_attributes = pd.get_dummies(sample_attributes)

In [45]:
pipeline.fit(sample_attributes, sample_target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


<b>OneHotEncoder</b>

In [47]:
ohe = OneHotEncoder()

In [48]:
ohe.fit(sample_attributes)

In [49]:
ohe.feature_names_in_

array(['encounter_id', 'patient_nbr', 'admission_type_id', ...,
       'change_No', 'diabetesMed_No', 'diabetesMed_Yes'], dtype=object)

In [51]:
ohe.categories_

[array([   325848,   1139226,   1212006, ..., 443730002, 443775086,
        443824292], dtype=int64),
 array([    10827,     15849,     27315, ..., 186774602, 187042703,
        189502619], dtype=int64),
 array([1, 2, 3, 5, 6, 7, 8], dtype=int64),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 11, 13, 14, 15, 17, 18, 22, 23,
        24, 25, 28], dtype=int64),
 array([ 1,  2,  3,  4,  5,  6,  7,  9, 17, 20], dtype=int64),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
       dtype=int64),
 array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
         27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
         40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
         53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
         66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
         79,  80,  81,  82,  83,  84,  85,  86,  87,  

<b>ColumnTransformer</b>

In [54]:
sample_data.columns

Index(['encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight',
       'admission_type_id', 'discharge_disposition_id', 'admission_source_id',
       'time_in_hospital', 'payer_code', 'medical_specialty',
       'num_lab_procedures', 'num_procedures', 'num_medications',
       'number_outpatient', 'number_emergency', 'number_inpatient', 'diag_1',
       'diag_2', 'diag_3', 'number_diagnoses', 'max_glu_serum', 'A1Cresult',
       'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
       'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed', 'readmitted'],
      dtype='object')

In [57]:
sample_data.dtypes[sample_data.dtypes == np.object_]

race                        object
gender                      object
age                         object
weight                      object
payer_code                  object
medical_specialty           object
diag_1                      object
diag_2                      object
diag_3                      object
max_glu_serum               object
A1Cresult                   object
metformin                   object
repaglinide                 object
nateglinide                 object
chlorpropamide              object
glimepiride                 object
acetohexamide               object
glipizide                   object
glyburide                   object
tolbutamide                 object
pioglitazone                object
rosiglitazone               object
acarbose                    object
miglitol                    object
troglitazone                object
tolazamide                  object
examide                     object
citoglipton                 object
insulin             

In [58]:
sample_data.dtypes[sample_data.dtypes == np.object_].index

Index(['race', 'gender', 'age', 'weight', 'payer_code', 'medical_specialty',
       'diag_1', 'diag_2', 'diag_3', 'max_glu_serum', 'A1Cresult', 'metformin',
       'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride',
       'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed', 'readmitted'],
      dtype='object')

In [59]:
sample_data.dtypes[sample_data.dtypes == np.object_].index.values

array(['race', 'gender', 'age', 'weight', 'payer_code',
       'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'max_glu_serum',
       'A1Cresult', 'metformin', 'repaglinide', 'nateglinide',
       'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide',
       'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone',
       'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'examide',
       'citoglipton', 'insulin', 'glyburide-metformin',
       'glipizide-metformin', 'glimepiride-pioglitazone',
       'metformin-rosiglitazone', 'metformin-pioglitazone', 'change',
       'diabetesMed', 'readmitted'], dtype=object)

In [66]:
categorical_columns = sample_data.dtypes[sample_data.dtypes == np.object_].index.values

In [67]:
categorical_columns

array(['race', 'gender', 'age', 'weight', 'payer_code',
       'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'max_glu_serum',
       'A1Cresult', 'metformin', 'repaglinide', 'nateglinide',
       'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide',
       'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone',
       'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'examide',
       'citoglipton', 'insulin', 'glyburide-metformin',
       'glipizide-metformin', 'glimepiride-pioglitazone',
       'metformin-rosiglitazone', 'metformin-pioglitazone', 'change',
       'diabetesMed', 'readmitted'], dtype=object)

In [68]:
categorical_columns = categorical_columns[:-1]

In [71]:
numerical_columns = sample_data.dtypes[sample_data.dtypes != np.object_].index.values
numerical_columns

array(['encounter_id', 'patient_nbr', 'admission_type_id',
       'discharge_disposition_id', 'admission_source_id',
       'time_in_hospital', 'num_lab_procedures', 'num_procedures',
       'num_medications', 'number_outpatient', 'number_emergency',
       'number_inpatient', 'number_diagnoses'], dtype=object)

In [72]:
numerical_columns = [
    'admission_type_id', 'discharge_disposition_id', 'time_in_hospital', 'num_lab_procedures', 'num_procedures',
    'num_medications', 'number_outpatient', 'number_emergency', 'number_inpatient', 'number_diagnoses']

In [81]:
preprocessor = ColumnTransformer([
    ('categorical', OneHotEncoder(), categorical_columns),
    ('numerical', MinMaxScaler(), numerical_columns),
], remainder = 'passthrough') # remainder = passthrough -> the rest stay in the analysis; drop -> the rest are dropped

In [82]:
preprocessor

In [87]:
log_transformer = FunctionTransformer(np.log10)

In [119]:
number_processor = Pipeline([
    ('log_transformer', FunctionTransformer(lambda x: np.log10(x + EPS))),
    ('minmax', MinMaxScaler((-5, 5)))
])

In [121]:
number_processor

In [122]:
preprocessor = ColumnTransformer([
    ('categorical', OneHotEncoder(), categorical_columns),
    ('numerical', number_processor, numerical_columns),
])

In [124]:
preprocessor

In [125]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

In [127]:
pipeline

In [128]:
pipeline.steps[0]

('preprocessor',
 ColumnTransformer(transformers=[('categorical', OneHotEncoder(),
                                  array(['race', 'gender', 'age', 'weight', 'payer_code',
        'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'max_glu_serum',
        'A1Cresult', 'metformin', 'repaglinide', 'nateglinide',
        'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide',
        'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone',
        'acar...
                                  Pipeline(steps=[('log_transformer',
                                                   FunctionTransformer(func=<function <lambda> at 0x0000020E02BE5580>)),
                                                  ('minmax',
                                                   MinMaxScaler(feature_range=(-5,
                                                                               5)))]),
                                  ['admission_type_id',
                                   'discharge_dispos

In [130]:
pipeline.steps[0][1]

In [131]:
sample_attributes = sample_data.drop(columns = 'readmitted')
sample_target = sample_data.readmitted

In [134]:
pipeline.fit(sample_attributes, sample_target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [135]:
pipeline.score(sample_attributes, sample_target)

0.631

# Pickle

In [136]:
pickle.dump(logistic_regression, open("prediction_pipelin.pkl", "wb"))

In [137]:
pipeline.steps[0][1].feature_names_in_

array(['encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight',
       'admission_type_id', 'discharge_disposition_id',
       'admission_source_id', 'time_in_hospital', 'payer_code',
       'medical_specialty', 'num_lab_procedures', 'num_procedures',
       'num_medications', 'number_outpatient', 'number_emergency',
       'number_inpatient', 'diag_1', 'diag_2', 'diag_3',
       'number_diagnoses', 'max_glu_serum', 'A1Cresult', 'metformin',
       'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride',
       'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol',
       'troglitazone', 'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed'], dtype=object)

In [138]:
pipeline.predict(sample_data.sample(10))

array(['NO', 'NO', 'NO', '>30', '>30', 'NO', '>30', 'NO', '>30', '>30'],
      dtype=object)