In [18]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import ComplementNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [3]:
df = pd.read_csv('diabetes.csv')
df.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


# NB

In [4]:
# Select relevant features
X = df.drop('Outcome', axis=1)
y = df['Outcome']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
clf = GaussianNB()
clf.fit(X_train, y_train)

GaussianNB()

In [7]:
y_predGB = clf.predict(X_test)
acgb = accuracy_score(y_test, y_predGB)
print(f'Accuracy: {acgb:.2f}')

Accuracy: 0.77


In [8]:
clf = MultinomialNB()
clf.fit(X_train, y_train)

MultinomialNB()

In [9]:
y_predMB = clf.predict(X_test)
acmb = accuracy_score(y_test, y_predMB)
print(f'Accuracy: {acmb:.2f}')

Accuracy: 0.66


In [10]:
clf = BernoulliNB()
clf.fit(X_train, y_train)

BernoulliNB()

In [11]:
y_predbb = clf.predict(X_test)
acbb = accuracy_score(y_test, y_predbb)
print(f'Accuracy: {acbb:.2f}')

Accuracy: 0.66


In [12]:
clf = ComplementNB()
clf.fit(X_train, y_train)

ComplementNB()

In [13]:
y_predcb = clf.predict(X_test)
accb = accuracy_score(y_test, y_predcb)
print(f'Accuracy: {accb:.2f}')

Accuracy: 0.66


In [14]:
res={'Type':['Gaussian','Bernoulli','Multinomial','Complement'],
    'Accuracy':[acgb,acbb,acmb,accb]}
pd.DataFrame(res)

Unnamed: 0,Type,Accuracy
0,Gaussian,0.766234
1,Bernoulli,0.655844
2,Multinomial,0.662338
3,Complement,0.655844


# SVM

In [15]:
# Select relevant features
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [16]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [19]:
svc = SVC(kernel="linear")
svc.fit(X_train, y_train)

SVC(kernel='linear')

In [20]:
y_predL = svc.predict(X_test)
acl = accuracy_score(y_test, y_predL)
print(f"Accuracy: {acl:.2f}")


Accuracy: 0.75


In [21]:
svc = SVC(kernel="rbf")
svc.fit(X_train, y_train)

SVC()

In [22]:
y_predr = svc.predict(X_test)
acr = accuracy_score(y_test, y_predr)
print(f"Accuracy: {acr:.2f}")

Accuracy: 0.77


In [23]:
svc = SVC(kernel="poly")
svc.fit(X_train, y_train)

SVC(kernel='poly')

In [24]:
y_predp = svc.predict(X_test)
acp = accuracy_score(y_test, y_predp)
print(f"Accuracy: {acp:.2f}")

Accuracy: 0.76


In [25]:
svc = SVC(kernel="sigmoid")
svc.fit(X_train, y_train)

SVC(kernel='sigmoid')

In [26]:
y_preds = svc.predict(X_test)
acs = accuracy_score(y_test, y_preds)
print(f"Accuracy: {acs:.2f}")

Accuracy: 0.55


In [27]:
res = {"Type": ["Linear", "RBF", "Poly", "Sigmoid"], "Accuracy": [acl, acr, acp, acs]}
pd.DataFrame(res)


Unnamed: 0,Type,Accuracy
0,Linear,0.753247
1,RBF,0.766234
2,Poly,0.75974
3,Sigmoid,0.551948
