# LIVER DISEASE PREDICTION

**Importing the Necessary Libraries:**

In [81]:
import numpy as np
import pandas as pd
import pickle
import warnings
warnings.filterwarnings('ignore')

**Reading the Dataset:**

In [82]:
patients=pd.read_csv('indian_liver_patient.csv')

In [83]:
patients.head()

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.0,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.4,1


In [84]:
patients.shape

(583, 11)

**Let us make the Gender column into numerical format:**

In [85]:
patients.replace({"Gender":{"Male":1,"Female":0}},inplace=True)
patients['Dataset'] = patients['Dataset'].replace([2,1],[1,0])
patients

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,0,0.7,0.1,187,16,18,6.8,3.3,0.90,0
1,62,1,10.9,5.5,699,64,100,7.5,3.2,0.74,0
2,62,1,7.3,4.1,490,60,68,7.0,3.3,0.89,0
3,58,1,1.0,0.4,182,14,20,6.8,3.4,1.00,0
4,72,1,3.9,2.0,195,27,59,7.3,2.4,0.40,0
...,...,...,...,...,...,...,...,...,...,...,...
578,60,1,0.5,0.1,500,20,34,5.9,1.6,0.37,1
579,40,1,0.6,0.1,98,35,31,6.0,3.2,1.10,0
580,52,1,0.8,0.2,245,48,49,6.4,3.2,1.00,0
581,31,1,1.3,0.5,184,29,32,6.8,3.4,1.00,0


**Let us check for the null values:**

In [86]:
patients.isnull().sum()

Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    4
Dataset                       0
dtype: int64

**Let us fill these null values by imputing the mean of that column.**

In [87]:
patients['Albumin_and_Globulin_Ratio'].mean()

0.9470639032815201

In [88]:
patients=patients.fillna(0.94)

In [89]:
patients.isnull().sum()

Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    0
Dataset                       0
dtype: int64

**So Let us start building our model.**

In [90]:
from sklearn.model_selection import train_test_split

In [91]:
patients.columns

Index(['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin',
       'Alkaline_Phosphotase', 'Alamine_Aminotransferase',
       'Aspartate_Aminotransferase', 'Total_Protiens', 'Albumin',
       'Albumin_and_Globulin_Ratio', 'Dataset'],
      dtype='object')

**Now let us define our X and y.**

**Here X is our features and y is our target.**

In [92]:
X=patients.drop('Dataset',axis=1)
y=patients['Dataset']

In [93]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

### Logistic Regression

In [105]:
from sklearn.linear_model import LogisticRegression
np.random.seed(42)
logmodel = LogisticRegression()

logmodel.fit(X_train,y_train)

logmodel.score(X_test,y_test)

0.5982905982905983

#### SVM

In [95]:
from sklearn import svm
svmModel = svm.SVC()
svmModel.fit(X_train, y_train)
svmModel.score(X_test,y_test)

0.6581196581196581

#### Decision Tree

In [96]:
from sklearn.tree import DecisionTreeClassifier

dtModel=DecisionTreeClassifier()

dtModel.fit(X_train,y_train)

dtModel.score(X_test,y_test)

0.6324786324786325

#### Random Forest

In [97]:
from sklearn.ensemble import RandomForestClassifier

rmodel=RandomForestClassifier()
rmodel.fit(X_train,y_train)
rmodel.score(X_test,y_test)

0.6153846153846154

In [98]:
pickle.dump(logmodel, open('model_lr_liver.pkl','wb'))