In [130]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
#from sklearn.datasets import load_wine
from sklearn import tree
import seaborn as sns
from termcolor import colored
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix, classification_report
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [4]:
wine_data = pd.read_csv('../input/wine-quality-dataset/WineQT.csv') 
wine_data

In [5]:
wine_data['quality'].unique()

In [18]:
wine_data.info()


In [55]:
feature_cols = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar','chlorides','free sulfur dioxide','total sulfur dioxide','density', 'pH', 'sulphates', 'alcohol']
X = wine_data[feature_cols] # Features
y = wine_data.quality # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

**DTC with GINI** 

In [56]:
wine_clf = DecisionTreeClassifier()
wine_clf = wine_clf.fit(X_train,y_train)
y_pred = wine_clf.predict(X_test)

In [57]:
print(colored('Model accuracy score with criterion gini index: {0:0.4f}'. format(accuracy_score(y_test, y_pred)),'blue'))

In [61]:
y_pred_train = wine_clf.predict(X_train)

In [72]:
print('Training set score: {:.4f}'.format(wine_clf.score(X_train, y_train)))

print(colored('Test set score: {:.4f}'.format(wine_clf.score(X_test, y_test)),'red'))

In [65]:
plt.figure(figsize=(15,8))
tree.plot_tree(wine_clf.fit(X_train, y_train));

**DTC with ENTROPY**

In [79]:
wine_clf_en = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)


# fit the model
wine_clf_en.fit(X_train, y_train)

y_pred_en = wine_clf_en.predict(X_test)
print(colored('Model accuracy score with criterion gini index: {0:0.4f}'. format(accuracy_score(y_test, y_pred_en)),'blue'))

In [83]:
print('Training set score: {:.4f}'.format(wine_clf_en.score(X_train, y_train)))
print(colored('Test set score: {:.4f}'.format(wine_clf_en.score(X_test, y_test)),'red'))

In [91]:
plt.figure(figsize=(24,8))
tree.plot_tree(wine_clf_en.fit(X_train, y_train))

In [92]:
plt.figure(figsize=(24,8))
tree.plot_tree(wine_clf_en.fit(X_train, y_train));

# **RANDOM FOREST** 

In [100]:
feature_cols = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar','chlorides','free sulfur dioxide','total sulfur dioxide','density', 'pH', 'sulphates', 'alcohol']
X = wine_data[feature_cols] # Features
y = wine_data.quality # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [120]:
wine_clf_rf = RandomForestClassifier()
wine_clf_rf.fit(X_train,y_train)

wine_clf_rf.fit(X_train,y_train)
y_pred_rf=wine_clf_rf.predict(X_test)

In [121]:
print(colored('Model accuracy score with criterion gini index: {0:0.4f}'. format(accuracy_score(y_test, y_pred_rf)),'blue'))

In [122]:
y_pred_train_rf = wine_clf_rf.predict(X_train)
print('Training set score: {:.4f}'.format(wine_clf_rf.score(X_train, y_train)))
print(colored('Test set score: {:.4f}'.format(wine_clf_rf.score(X_test, y_test)),'red'))

In [128]:
preds = wine_clf_rf.predict(X_test)
plot_confusion_matrix(wine_clf_rf, X_test, y_test)
plt.show()
print(classification_report(y_test, preds))

In [132]:
plt.figure(figsize=(10, 7))
sns.barplot(y=X_train.columns, x=wine_clf_rf.feature_importances_, )
plt.title("Feature Importance")
plt.show()