In [0]:
import os
import pandas as pd
import lazypredict
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyClassifier
from sklearn.preprocessing import LabelEncoder
import numpy as np
import matplotlib.pyplot as plt

In [0]:
data = pd.read_csv('diabetic_data.csv')

In [0]:
data.describe()

In [0]:
data.head()

In [0]:
data.drop(columns=['encounter_id', 'patient_nbr','weight','payer_code'],inplace=True)

In [0]:
data.dropna(subset=['readmitted'],inplace=True)

In [0]:
data['readmitted'][data['readmitted']=='NO'] = 0 #class 0 for NO readmission
data['readmitted'][data['readmitted']=='<30'] = 1 #class 1 for <30 readmission
data['readmitted'][data['readmitted']=='>30'] = 2 #class 2 for >30 readmission


In [0]:
data.readmitted.value_counts()

In [0]:
data.head()

In [0]:
data.columns

In [0]:
enc_list= ['gender','age','race','medical_specialty','max_glu_serum', 'A1Cresult', 'metformin', 'repaglinide', 'nateglinide',
       'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide',
       'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone', 'acarbose',
       'miglitol', 'troglitazone', 'tolazamide', 'examide', 'citoglipton',
       'insulin', 'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed']
for i in enc_list:
    labelencoder = LabelEncoder()
    # Assigning numerical values and storing in another column
    data[i] = labelencoder.fit_transform(data[i])


In [0]:
data.replace('?','',inplace=True)

In [0]:
data

In [0]:
data['diag_1'] = data['diag_1'].map(lambda x: x.lstrip('V'))
data['diag_2'] = data['diag_2'].map(lambda x: x.lstrip('V'))
data['diag_3'] = data['diag_3'].map(lambda x: x.lstrip('V'))
data['diag_1'] = data['diag_1'].map(lambda x: x.lstrip('E'))
data['diag_2'] = data['diag_2'].map(lambda x: x.lstrip('E'))
data['diag_3'] = data['diag_3'].map(lambda x: x.lstrip('E'))
data['diag_1'] = pd.to_numeric(data['diag_1'])
data['diag_2'] = pd.to_numeric(data['diag_2'])
data['diag_3'] = pd.to_numeric(data['diag_3'])

In [0]:
# data['race'].fillna('NA',inplace=True)
# data['marital'].fillna('NA',inplace=True)
# data['inc'].fillna('NA',inplace=True)
# data['smoke'].fillna('NA',inplace=True)
# data['time'].fillna('NA',inplace=True)
# data['ed'].fillna('NA',inplace=True)
# data['ded'].fillna('NA',inplace=True)
# data['drace'].fillna('NA',inplace=True)

In [0]:
X = data.drop(['readmitted'], axis=1)
y = data.readmitted

In [0]:
X=X.fillna(0)

In [0]:
X.columns

In [0]:

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [0]:
X_train.dtypes

In [0]:
clf = LazyClassifier(verbose=0,ignore_warnings=True)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
models

In [0]:
y_test.value_counts()

In [0]:
from sklearn.ensemble import RandomForestClassifier

feature_names = [f'feature {i}' for i in range(X.shape[1])]
forest = RandomForestClassifier(n_estimators=1000,random_state=42)
forest.fit(X_train, y_train)
y_pred = forest.predict(X_test)

In [0]:
y_pred

In [0]:
y_test

In [0]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

precision = precision_score(y_test, y_pred,average='micro')
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test, y_pred,average='micro')
print('Recall: %f' % recall)

In [0]:
df=pd.DataFrame({'Actual':y_test, 'Predicted':y_pred})

In [0]:
importances = forest.feature_importances_
std = np.std([
    tree.feature_importances_ for tree in forest.estimators_], axis=0)

In [0]:
importances = pd.DataFrame(data={
    'Attribute': X_train.columns,
    'Importance': forest.feature_importances_
})

In [0]:
importances = importances.sort_values(by='Importance', ascending=False)

In [0]:
plt.bar(x=importances['Attribute'], height=importances['Importance'], color='#087E8B')
plt.title('Feature importances obtained from coefficients', size=20)
plt.xticks(rotation='vertical')
plt.show()

In [0]:
importances