In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from joblib import dump

from sklearn.svm import SVC

In [2]:
df = pd.read_csv('hepatitis.csv')
df.head(10)

Unnamed: 0,class,age,sex,steroid,antivirals,fatigue,malaise,anorexia,liver_big,liver_firm,spleen_palable,spiders,ascites,varices,bilirubin,alk_phosphate,sgot,albumin,protime,histology
0,2,30,2,1,2,2,2,2,1,2,2,2,2,2,1.0,85,18,4.0,61,1
1,2,50,1,1,2,1,2,2,1,2,2,2,2,2,0.9,135,42,3.5,61,1
2,2,78,1,2,2,1,2,2,2,2,2,2,2,2,0.7,96,32,4.0,61,1
3,2,34,1,2,2,2,2,2,2,2,2,2,2,2,1.0,105,200,4.0,61,1
4,2,34,1,2,2,2,2,2,2,2,2,2,2,2,0.9,95,28,4.0,75,1
5,1,51,1,1,2,1,2,1,2,2,1,1,2,2,1.42,105,85,3.81,61,1
6,2,23,1,2,2,2,2,2,2,2,2,2,2,2,1.0,105,85,3.81,61,1
7,2,39,1,2,2,1,2,2,2,1,2,2,2,2,0.7,105,48,4.4,61,1
8,2,30,1,2,2,2,2,2,2,2,2,2,2,2,1.0,105,120,3.9,61,1
9,2,39,1,1,1,2,2,2,1,1,2,2,2,2,1.3,78,30,4.4,85,1


In [3]:
X=df.iloc[:,1:]
y=df['class']

In [4]:
y.shape

(142,)

In [5]:
scaler = StandardScaler()
scaledX = scaler.fit_transform(X)
xtrain, xtest, ytrain, ytest = train_test_split(scaledX, y, test_size=.2, random_state=1234)

In [6]:
svc = SVC(gamma='auto',kernel='poly',degree=3)
svc.fit(xtrain,ytrain)
svc_pred = svc.predict(xtest)
cm_svc = confusion_matrix(ytest, svc_pred)

In [7]:
cm_svc

array([[ 3,  2],
       [ 0, 24]], dtype=int64)

In [8]:
model = {
    'title':'Model for hepatitis predictin',
    'classifier':svc,
    'scaler':scaler
}
dump(model,'hepatitis.pkl')

['hepatitis.pkl']

In [9]:
X.dtypes

age                 int64
sex                 int64
steroid             int64
antivirals          int64
fatigue             int64
malaise             int64
anorexia            int64
liver_big           int64
liver_firm          int64
spleen_palable      int64
spiders             int64
ascites             int64
varices             int64
bilirubin         float64
alk_phosphate       int64
sgot                int64
albumin           float64
protime             int64
histology           int64
dtype: object

In [10]:
df.nunique()

class              2
age               47
sex                2
steroid            2
antivirals         2
fatigue            2
malaise            2
anorexia           2
liver_big          2
liver_firm         2
spleen_palable     2
spiders            2
ascites            2
varices            2
bilirubin         33
alk_phosphate     79
sgot              83
albumin           30
protime           43
histology          2
dtype: int64

In [11]:
df['bilirubin'].unique()

array([1.  , 0.9 , 0.7 , 1.42, 1.3 , 2.2 , 2.  , 1.2 , 0.6 , 0.4 , 0.8 ,
       1.4 , 2.3 , 0.5 , 0.3 , 1.8 , 3.5 , 4.1 , 1.6 , 2.8 , 1.5 , 2.5 ,
       4.6 , 3.  , 4.8 , 2.4 , 1.7 , 1.1 , 3.2 , 2.9 , 8.  , 1.9 , 7.6 ])

In [12]:
df['bilirubin'].max(), df['bilirubin'].min()

(8.0, 0.3)

In [13]:
df['albumin'].unique()

array([4.  , 3.5 , 3.81, 4.4 , 3.9 , 3.7 , 4.9 , 2.9 , 4.3 , 4.1 , 4.2 ,
       4.7 , 3.8 , 2.7 , 4.6 , 5.  , 3.3 , 4.5 , 3.4 , 3.1 , 3.  , 2.6 ,
       5.3 , 4.8 , 2.8 , 3.6 , 2.1 , 6.4 , 2.4 , 2.2 ])

In [14]:
df['albumin'].max(), df['albumin'].min()

(6.4, 2.1)

In [15]:
df['protime'].max(), df['protime'].min()

(100, 0)

In [16]:
np.any(np.isnan(df))

False

In [17]:
np.all(np.isfinite(df))

True

In [18]:
from joblib import load
def load_model():
    filepath = 'hepatitis.pkl'
    return load(filepath)

def pred(age,sex,steroid,antivirals,fatigue,malaise,anorexia,liver_big,liver_firm,spleen_palable,
            spiders,ascites,varices,bilirubin,alk_phosphate,sgot,albumin,protime,histology):
    userinp = [[age,sex,steroid,antivirals,fatigue,malaise,anorexia,liver_big,liver_firm,spleen_palable,
            spiders,ascites,varices,bilirubin,alk_phosphate,sgot,albumin,protime,histology]]
    x = load_model().get('scaler').transform(userinp)
    print(type(x),x)
    p= load_model().get('classifier').predict(x)
    return p

In [19]:
m = load_model()
model = m.get('classifier')
model.n_features_in_

19

In [20]:
model.predict()[0]

TypeError: predict() missing 1 required positional argument: 'X'

In [None]:
model.classes_

array([1, 2], dtype=int64)