# **import library**

In [5]:
import pickle
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
np.random.seed(42)

# **load data**

In [6]:
#load data
load_data=pd.read_csv(r"/content/Indian Liver Patient Dataset (ILPD).csv")

#view sample data


In [7]:
print(load_data.head())

   age  gender  tot_bilirubin  ...  sgot  alkphos  is_patient
0   65  Female            0.7  ...   3.3     0.90           1
1   62    Male           10.9  ...   3.2     0.74           1
2   62    Male            7.3  ...   3.3     0.89           1
3   58    Male            1.0  ...   3.4     1.00           1
4   72    Male            3.9  ...   2.4     0.40           1

[5 rows x 11 columns]


# **handel missing data**

In [8]:
print(load_data.info())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               583 non-null    int64  
 1   gender            583 non-null    object 
 2   tot_bilirubin     583 non-null    float64
 3   direct_bilirubin  583 non-null    float64
 4   tot_proteins      583 non-null    int64  
 5   albumin           583 non-null    int64  
 6   ag_ratio          583 non-null    int64  
 7   sgpt              583 non-null    float64
 8   sgot              583 non-null    float64
 9   alkphos           579 non-null    float64
 10  is_patient        583 non-null    int64  
dtypes: float64(5), int64(5), object(1)
memory usage: 50.2+ KB
None


In [9]:
load_data.dropna(how="any",inplace=True)

In [10]:
print(load_data.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 579 entries, 0 to 582
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               579 non-null    int64  
 1   gender            579 non-null    object 
 2   tot_bilirubin     579 non-null    float64
 3   direct_bilirubin  579 non-null    float64
 4   tot_proteins      579 non-null    int64  
 5   albumin           579 non-null    int64  
 6   ag_ratio          579 non-null    int64  
 7   sgpt              579 non-null    float64
 8   sgot              579 non-null    float64
 9   alkphos           579 non-null    float64
 10  is_patient        579 non-null    int64  
dtypes: float64(5), int64(5), object(1)
memory usage: 54.3+ KB
None


# ** map labled data **

In [11]:
load_data["gender"] =load_data["gender"].map({"Female": 0 , "Male":1 })
X=load_data.drop('is_patient',axis=1)
y=load_data['is_patient']

#**split data to train \ test**

In [12]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

#**choose model**



In [15]:
model=svm.SVC(C=1, gamma= 1, kernel='rbf')

#**train model** 

In [16]:
model.fit(x_train,y_train)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf', max_iter=-1,
    probability=False, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

# **tune model parms**

In [13]:
"""
from sklearn.model_selection import GridSearchCV
prams={"kernel":("linear","rbf"), "C":[1,5,10], "gamma":[1,0.1,0.01,0.001]}
grid=GridSearchCV(svm.SVC(),prams,verbose=3)
grid.fit(x_train,y_train)
"""

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] C=1, gamma=1, kernel=linear .....................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ......... C=1, gamma=1, kernel=linear, score=0.731, total=  16.7s
[CV] C=1, gamma=1, kernel=linear .....................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   16.7s remaining:    0.0s


[CV] ......... C=1, gamma=1, kernel=linear, score=0.731, total=  14.5s
[CV] C=1, gamma=1, kernel=linear .....................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   31.2s remaining:    0.0s


[CV] ......... C=1, gamma=1, kernel=linear, score=0.742, total=  10.8s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ......... C=1, gamma=1, kernel=linear, score=0.739, total=   5.6s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ......... C=1, gamma=1, kernel=linear, score=0.728, total=   7.1s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.753, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.731, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.742, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.750, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] .

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed: 20.2min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1, 5, 10], 'gamma': [1, 0.1, 0.01, 0.001],
                         'kernel': ('linear', 'rbf')},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [14]:
#print(grid.best_params_)
#print(grid.best_score_)

{'C': 1, 'gamma': 1, 'kernel': 'rbf'}
0.7451612903225806


# **validate modle**

In [17]:
print(model.score(x_train , y_train))
print(model.score(x_test , y_test))

1.0
0.6293103448275862


#**save model** 


In [19]:
pickle.dump(model , open(r"mushrooms.pkl","wb"))