# Linear Regression (Polynomial features) on wine quality dataset
dataset description: https://archive.ics.uci.edu/ml/datasets/wine+quality  

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

# Read 'winequality-red.csv' using pandas
df = pd.read_csv('dataset/winequality-red.csv')

## Define the feature vector X and the label y
y = df['quality']
X = df.drop('quality', axis=1)

# Use PolynomialFeatures to increase the dimension (assuming degree=2)
poly = PolynomialFeatures(degree=2)
X = poly.fit_transform(X)

# Split dataset into traing data and testing data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1)


# Normalize training data
normalizer = preprocessing.MinMaxScaler()
X_train = normalizer.fit_transform(X_train)

model = linear_model.LinearRegression()
model.fit(X_train, y_train)

# Normalize testing data
X_test = normalizer.transform(X_test)

y_pred = model.predict(X_test)

# The coefficients
print('Coefficients: {}\n'.format(model.coef_))
# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
print('R2 score: {}'.format(r2_score(y_test, y_pred)))

Coefficients: [-1.13249644e-10 -2.09788623e+02 -2.83635410e+02 -9.98491091e+01
 -1.37039863e+02 -8.62987477e+02 -5.36486341e+02  8.45031178e+02
 -1.70903973e+02 -6.39126802e+02  4.51418756e+02 -4.05429615e+01
 -7.49565069e+00 -3.07311033e+00 -8.69409210e-01 -5.42117157e+00
 -1.14788700e+01 -3.50831755e+00  4.26520634e+00  2.32311362e+02
 -9.84583362e+00  4.67139993e+00  6.29826381e-01 -1.21634801e+00
  1.85869681e-01 -5.39659145e-01  2.21759172e+00 -3.85924002e-01
  3.08748611e+00  2.86828832e+02 -8.69282724e+00 -1.96966045e+00
  7.69038945e+00 -2.42369807e-01  1.23759892e+00  1.54482157e+00
  1.06662958e+00 -4.21575598e-01  1.11781877e+02 -1.36239101e+01
 -3.68513622e+00  5.06565652e+00 -2.00701426e+00 -3.01215127e+00
  6.03208921e-01 -9.17598123e-03  1.74131809e+02 -2.74895082e+01
  1.30653959e-01 -3.47699775e+00  1.10012434e+00 -7.63697456e-01
 -1.25138092e+00  8.73559925e+02 -6.30754707e+00  2.07529960e+00
  6.97274055e+00 -1.78544790e+00 -3.56536541e-01  5.40327815e+02
 -3.7948257