In [1]:
# gerekli kütüphaneler
import numpy as np  # sayısal işlemler 
import pandas as pd  # veri manipülasyonu 
import seaborn as sns  # görselleştirme
import matplotlib.pyplot as plt  # görselleştirme

# görsel çıktılardaki uyarı mesajları için
import warnings 
warnings.filterwarnings('ignore')

# modellerin basarisini degerlendirmek icin
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# train ve test olarak bölmek için
from sklearn.model_selection import train_test_split

In [2]:
# temizlenmiş verinin wine değişkenine yüklenmesi
wine = pd.read_csv('redwine-clean.csv', index_col=0)
df = wine.copy()

In [3]:
# Modellerin olası bir hata vermesini önlemek için sütun isimlerindeki boşukları _ ile dolduruyoruz.
df.rename(columns = {'fixed acidity': 'fixed_acidity', 'volatile acidity': 'volatile_acidity',
                    'citric acid': 'citric_acid', 'residual sugar': 'residual_sugar',
                    'chlorides': 'chlorides', 'free sulfur dioxide': 'free_sulfur_dioxide',
                    'total sulfur dioxide': 'total_sulfur_dioxide'}, inplace = True)

In [4]:
# xgboost gibi bazı algoritmalar hedef değişkenin değerlerinin 0,1,2... 
# gibi olmasını ister dolayısıyla ona göre düzenleme yapıyoruz
df['quality'] = df['quality'].map({'yuksek': 2, 'orta': 1, 'zayif': 0})

In [5]:
# bağımlı ve bağımzısz değişkenlerimizi belirliyoruz
y = df['quality']
x = df.drop(['quality'], axis=1)

In [6]:
# verinin train ve test olarak ikiye bölünmesi 
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [7]:
df.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,1


In [8]:
df['quality'].unique()

array([1, 2, 0], dtype=int64)

## Logistic Regression

In [9]:
from sklearn.linear_model import LogisticRegression
logr = LogisticRegression(random_state=0)
logr.fit(X_train,y_train)
y_pred_logr = logr.predict(X_test)
cm_logr = confusion_matrix(y_test,y_pred_logr)
logr_accuracy = accuracy_score(y_test, y_pred_logr)
print(cm_logr, '\n--------------------------\n', 'Logistic Regression Accuracy Score: ', logr_accuracy)

[[  0  11   0]
 [  0 253   9]
 [  0  38   9]] 
--------------------------
 Logistic Regression Accuracy Score:  0.81875


## Support Vector Machine

In [10]:
from sklearn.svm import SVC
svc = SVC(kernel='rbf')
svc.fit(X_train,y_train)
y_pred_svc = svc.predict(X_test)
cm_svc = confusion_matrix(y_test,y_pred_svc)
svc_accuracy = accuracy_score(y_test, y_pred_svc)
print(cm_svc, '\n--------------------------\n', 'Support Vector Classifier Accuracy Score: ', svc_accuracy)

[[  0  11   0]
 [  0 262   0]
 [  0  46   1]] 
--------------------------
 Support Vector Classifier Accuracy Score:  0.821875


## Naive Bayes

In [11]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)
cm_gnb = confusion_matrix(y_test,y_pred_gnb)
gnb_accuracy = accuracy_score(y_test, y_pred_gnb)
print(cm_gnb, '\n--------------------------\n', 'Naive Bayes Accuracy Score: ', gnb_accuracy)

[[  3   8   0]
 [  2 228  32]
 [  0  18  29]] 
--------------------------
 Naive Bayes Accuracy Score:  0.8125


## K Neighbors Classifier

In [12]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1, metric='minkowski')
knn.fit(X_train,y_train)
y_pred_knn = knn.predict(X_test)
cm_knn = confusion_matrix(y_test,y_pred_knn)
knn_accuracy = accuracy_score(y_test, y_pred_knn)
print(cm_knn, '\n--------------------------\n', 'KNN Accuracy Score: ', knn_accuracy)

[[  2   9   0]
 [  4 236  22]
 [  1  24  22]] 
--------------------------
 KNN Accuracy Score:  0.8125


## Decision Tree

In [13]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(criterion = 'entropy')
dtc.fit(X_train,y_train)
y_pred_dtc = dtc.predict(X_test)
cm_dtc = confusion_matrix(y_test,y_pred_dtc)
dtc_accuracy = accuracy_score(y_test, y_pred_dtc)
print(cm_dtc, '\n--------------------------\n', 'Decision Tree Accuracy Score: ', dtc_accuracy)

[[  2   9   0]
 [  6 236  20]
 [  0  22  25]] 
--------------------------
 Decision Tree Accuracy Score:  0.821875


## Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
y_pred_rfc = rfc.predict(X_test)
cm_rfc = confusion_matrix(y_test,y_pred_rfc)
rfc_accuracy = accuracy_score(y_test, y_pred_rfc)
print(cm_rfc, '\n--------------------------\n', 'Random Forest Accuracy Score: ', rfc_accuracy)

[[  0  11   0]
 [  0 251  11]
 [  0  22  25]] 
--------------------------
 Random Forest Accuracy Score:  0.8625


## XGBoost

In [15]:
import xgboost as xgb
xgb = xgb.XGBClassifier(objective='multiclass:softmax', num_class=3)
xgb.fit(X_train,y_train)
y_pred_xgb = xgb.predict(X_test)
cm_xgb = confusion_matrix(y_test,y_pred_xgb)
xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
print(cm_xgb, '\n--------------------------\n', 'XGB Accuracy Score: ', xgb_accuracy)

[[  0  11   0]
 [  0 256   6]
 [  0  20  27]] 
--------------------------
 XGB Accuracy Score:  0.884375


## LightGBM

In [16]:
import lightgbm as lgb
lgbm = lgb.LGBMClassifier(objective='multiclass', num_class=3)
lgbm.fit(X_train, y_train)
y_pred_lgbm = lgbm.predict(X_test)
cm_lgbm = confusion_matrix(y_test, y_pred_lgbm)
lgbm_accuracy = accuracy_score(y_test, y_pred_lgbm)
print(cm_lgbm, '\n--------------------------\n', 'LightGBM Accuracy Score: ', lgbm_accuracy)

[[  0  11   0]
 [  0 248  14]
 [  0  19  28]] 
--------------------------
 LightGBM Accuracy Score:  0.8625


## CatBoost

In [19]:
import catboost as cb
cb = cb.CatBoostClassifier(loss_function='MultiClass', classes_count=3, verbose=0)
cb.fit(X_train, y_train)
y_pred_cb = cbm.predict(X_test)
cm_cb = confusion_matrix(y_test, y_pred_cb)
cb_accuracy = accuracy_score(y_test, y_pred_cb)
print(cm_cb, '\n--------------------------\n', 'CatBoost Accuracy Score: ', cb_accuracy)

[[  1  10   0]
 [  0 252  10]
 [  0  18  29]] 
--------------------------
 CatBoost Accuracy Score:  0.88125
