In [22]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics
from sklearn.metrics import precision_recall_fscore_support
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('WineQT.csv')
df = df.drop(columns='Id')


In [25]:
y=df.quality
features=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol']
X = df[features]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=1)
df_model = DecisionTreeRegressor(random_state=1)
df_model.fit(X_train, y_train)

qual_predictions = df_model.predict(X)

y_true = df.quality
y_actual = df.quality
y_pred = df_model.predict(X)
c_matrix = metrics.confusion_matrix(y_actual, y_pred)

prf = precision_recall_fscore_support(y_true, y_pred, average='weighted')

transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", transformer, features),
    ])

clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', df_model)
                     ])
clf.fit(X_train, y_train)

preds = clf.predict(X_test)

submitted_mae = mean_absolute_error(y_test, preds)
print(f'Your submitted MAE is: {submitted_mae}')
print(prf)


Your submitted MAE is: 0.47202797202797203
(0.894640253211521, 0.8932633420822397, 0.8931423325292326, None)


In [8]:
from sklearn.preprocessing import MinMaxScaler


y=df.quality
features=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides']
X = df[features]

# Split into validation and training data
X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=1)
# Specify Model
df_model = DecisionTreeRegressor(random_state=1)

# Fit Model
df_model_norm =  MinMaxScaler().fit(X_train)
# Normalise Model
X_train_norm = df_model_norm.transform(X_train)
X_val_norm = df_model_norm.transform(X_val)

df_model.fit(X_train_norm, y_train)

qual_predictions2 = df_model.predict(X_val_norm)

y_true = df.quality
y2_pred = df_model.predict(X)
c_matrix = metrics.confusion_matrix(y_true, y2_pred)

prf = precision_recall_fscore_support(y_true, y2_pred, average='weighted')
print(c_matrix)
print(prf)






[[  2   0   2   2   0   0]
 [  7   0  23   3   0   0]
 [ 23  38 360  42  18   2]
 [ 13  37 323  45  37   7]
 [  4  13  80  20  23   3]
 [  0   0  11   3   2   0]]
(0.3847436869547279, 0.3762029746281715, 0.3265618230192724, None)
