In [5]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import ComplementNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [6]:
df = pd.read_excel('wine.xlsx')
df.head()


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


# NB

In [10]:
# Select relevant features
X = df.drop('quality', axis=1)
y = df['quality']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
clf = GaussianNB()
clf.fit(X_train, y_train)

GaussianNB()

In [13]:
y_predGB = clf.predict(X_test)
acgb = accuracy_score(y_test, y_predGB)
print(f'Accuracy: {acgb:.2f}')

Accuracy: 0.55


In [14]:
clf = MultinomialNB()
clf.fit(X_train, y_train)

MultinomialNB()

In [15]:
y_predMB = clf.predict(X_test)
acmb = accuracy_score(y_test, y_predMB)
print(f'Accuracy: {acmb:.2f}')

Accuracy: 0.47


In [16]:
clf = BernoulliNB()
clf.fit(X_train, y_train)

BernoulliNB()

In [17]:
y_predbb = clf.predict(X_test)
acbb = accuracy_score(y_test, y_predbb)
print(f'Accuracy: {acbb:.2f}')

Accuracy: 0.41


In [18]:
clf = ComplementNB()
clf.fit(X_train, y_train)

ComplementNB()

In [19]:
y_predcb = clf.predict(X_test)
accb = accuracy_score(y_test, y_predcb)
print(f'Accuracy: {accb:.2f}')

Accuracy: 0.51


In [20]:
res={'Type':['Gaussian','Bernoulli','Multinomial','Complement'],
    'Accuracy':[acgb,acbb,acmb,accb]}
pd.DataFrame(res)

Unnamed: 0,Type,Accuracy
0,Gaussian,0.55
1,Bernoulli,0.40625
2,Multinomial,0.475
3,Complement,0.50625


# SVM

In [21]:
# Select relevant features
X = df.drop('quality', axis=1)
y = df['quality']

In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [23]:
svc = SVC(kernel="linear")
svc.fit(X_train, y_train)

SVC(kernel='linear')

In [25]:
y_predL = svc.predict(X_test)
acl = accuracy_score(y_test, y_predL)
print(f"Accuracy: {acl:.2f}")


Accuracy: 0.57


In [26]:
svc = SVC(kernel="rbf")
svc.fit(X_train, y_train)

SVC()

In [27]:
y_predr = svc.predict(X_test)
acr = accuracy_score(y_test, y_predr)
print(f"Accuracy: {acr:.2f}")

Accuracy: 0.51


In [28]:
svc = SVC(kernel="poly")
svc.fit(X_train, y_train)

SVC(kernel='poly')

In [29]:
y_predp = svc.predict(X_test)
acp = accuracy_score(y_test, y_predp)
print(f"Accuracy: {acp:.2f}")

Accuracy: 0.50


In [30]:
svc = SVC(kernel="sigmoid")
svc.fit(X_train, y_train)

SVC(kernel='sigmoid')

In [31]:
y_preds = svc.predict(X_test)
acs = accuracy_score(y_test, y_preds)
print(f"Accuracy: {acs:.2f}")

Accuracy: 0.31


In [32]:
res = {"Type": ["Linear", "RBF", "Poly", "Sigmoid"], "Accuracy": [acl, acr, acp, acs]}
pd.DataFrame(res)


Unnamed: 0,Type,Accuracy
0,Linear,0.56875
1,RBF,0.509375
2,Poly,0.5
3,Sigmoid,0.309375
