In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC 
from sklearn.metrics import confusion_matrix

In [None]:
red_wine_quality = pd.read_csv("winequality-red.csv")
data = red_wine_quality.copy()
data.shape

In [None]:
data.info()

In [None]:
data.describe().T

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "fixed acidity",data = data)

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "volatile acidity",data = data)
# There is a "downing trend" in the below plot.

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "citric acid",data = data)
# There is a "increasing trend" in the below plot.

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "residual sugar",data = data)
# There is a "downing trend" in the below plot.

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "chlorides",data = data)
# There is a "downing trend" in the below plot.

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "free sulfur dioxide",data = data)

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "total sulfur dioxide",data = data)

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "sulphates",data = data)
# There is a "increasing trend" in the below plot.

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "alcohol",data = data)
# There is a "increasing trend" in the below plot.

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="quality",y = "pH",data = data)
# There is a "downing trend" in the below plot.

In [None]:
data["quality"].value_counts()

In [None]:
data["quality"] = data["quality"].apply(lambda x:1 if x > 6.5 else 0)
data.head()

In [None]:
print(data["quality"].value_counts())

fig = plt.figure(figsize = (10,6))
sns.countplot(data["quality"])

In [None]:
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=42)
print("X TRAIN SHAPE:", X_train.shape)
print("X TEST SHAPE:", X_test.shape)
print("Y TRAIN SHAPE:", y_train.shape)
print("Y TEST SHAPE:", y_test.shape)

In [None]:
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [None]:
rf_regressor = RandomForestClassifier(n_estimators= 100)
rf_regressor.fit(X_train,y_train)
y_pred = rf_regressor.predict(X_test)

In [None]:
cm = confusion_matrix(y_test,y_pred)
print(cm)

fig = plt.figure(figsize=(10,6))
plt.matshow(cm,cmap=plt.cm.binary,interpolation= "nearest")
plt.title("Confusion Matrix - Random Forest Classifier\n")
plt.colorbar()
plt.ylabel("Expected Label")
plt.xlabel("Predcit Label")
plt.show()

In [None]:
print("Total Predictions:",len(y_pred))
print("Correct Prediction:",sum(y_pred == y_test))
print("Incorrect Prediction:",sum(y_pred != y_test))
print("Accuary:",sum(y_pred == y_test)*100 / len(y_pred),"%")

In [None]:
svc_regressor = SVC()
svc_regressor.fit(X_train,y_train)
y_pred = svc_regressor.predict(X_test)

In [None]:
cm = confusion_matrix(y_test,y_pred)
print(cm)

fig = plt.figure(figsize=(10,6))
plt.matshow(cm,cmap=plt.cm.binary,interpolation= "nearest")
plt.title("Confusion Matrix - Super Vector Classifier\n")
plt.colorbar()
plt.ylabel("Expected Label")
plt.xlabel("Predcit Label")
plt.show()

In [None]:
print("Total Predictions:",len(y_pred))
print("Correct Prediction:",sum(y_pred == y_test))
print("Incorrect Prediction:",sum(y_pred != y_test))
print("Accuary:",sum(y_pred == y_test)*100 / len(y_pred),"%")

In [None]:
param = {
    "C":[0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4],
    "kernel":["linear","rbf"],
    "gamma":[0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4]
}
grid_svc = GridSearchCV(svc_regressor, param_grid = param, scoring = "accuracy",cv=10)

In [None]:
grid_svc.fit(X_train,y_train)

In [None]:
print("Best params fpr SVC: ",grid_svc.best_params_)

In [None]:
svc_regressor2 = SVC(C = 1.2, gamma = 0.9, kernel = "rbf")
svc_regressor2.fit(X_train,y_train)
y_pred_svc = svc_regressor2.predict(X_test)
cm = confusion_matrix(y_test,y_pred_svc)
print(cm)

fig = plt.figure(figsize=(10,6))
plt.matshow(cm,cmap=plt.cm.binary,interpolation= "nearest")
plt.title("Confusion Matrix - Super Vector Classifier\n")
plt.colorbar()
plt.ylabel("Expected Label")
plt.xlabel("Predcit Label")
plt.show()

In [None]:
print("Total Predictions:",len(y_pred_svc))
print("Correct Prediction:",sum(y_pred_svc == y_test))
print("Incorrect Prediction:",sum(y_pred_svc != y_test))
print("Accuary:",sum(y_pred_svc == y_test)*100 / len(y_pred_svc),"%")

In [None]:
rf_eval = cross_val_score(estimator=rf_regressor, X = X_train, y = y_train,cv=10)
rf_eval.mean()