In [None]:
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics
from sklearn.metrics import precision_recall_fscore_support
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv('WineQT.csv')
df = df.drop(columns='Id')


In [128]:
y=df.quality
features=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'sulphates', 'alcohol']
X = df[features]

transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="mean")), ("scaler", StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", transformer, features),
    ])

clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LogisticRegression())]
)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1)

clf.fit(X_train, y_train)

preds = clf.predict(X_test)

mae = mean_absolute_error(y_test, preds)

c_matrix = metrics.confusion_matrix(y_test, preds)

prf = precision_recall_fscore_support(y_test, preds, average='weighted')
print(prf)
print(c_matrix)
print(f'MAE is: {mae}')
print("model score: %.3f" % clf.score(X_test, y_test))


(0.5824755388459473, 0.6157205240174672, 0.5944531363252853, None)
[[ 0  0  0  0  0  0]
 [ 0  0  3  0  0  0]
 [ 1  0 74 22  0  0]
 [ 0  0 30 60  5  0]
 [ 0  0  3 18  7  0]
 [ 0  0  0  2  4  0]]
MAE is: 0.4104803493449782
model score: 0.616


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [133]:
y=df.quality
features2=['volatile acidity', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'sulphates', 'alcohol']

transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="mean")), ("scaler", MinMaxScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", transformer, features2),
    ])

clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", DecisionTreeRegressor(random_state=1))]
)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1)

clf.fit(X_train, y_train)

preds = clf.predict(X_test)

c_matrix = metrics.confusion_matrix(y_test, preds)

mae = mean_absolute_error(y_test, preds)

prf = precision_recall_fscore_support(y_test, preds, average='weighted')

print(prf)
print(c_matrix)
print(f'MAE is: {mae}')
print("model score: %.3f" % clf.score(X_test, y_test))


(0.6467956266457321, 0.6157205240174672, 0.6236223626271538, None)
[[ 1  0  2  0  0]
 [ 4 66 23  4  0]
 [ 2 22 55 16  0]
 [ 1  2  8 17  0]
 [ 0  0  1  3  2]]
MAE is: 0.4410480349344978
model score: 0.102
