# Linear Regression  
dataset description: https://www.kaggle.com/c/boston-housing  

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import preprocessing


# load csv file
df = pd.read_csv('./dataset/housing.csv', header=None, delim_whitespace=True)

y = df[13]
X = df.drop([13], axis=1)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

model = linear_model.LinearRegression()
model.fit(X_train, y_train)

X_test = scaler.transform(X_test)
y_pred = model.predict(X_test)

# The coefficients
print('Coefficients: {}\n'.format(model.coef_))
# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
print('R2 score: {}'.format(r2_score(y_test, y_pred)))



Coefficients: [-0.95362142  1.20782061  0.28653884  0.71729438 -2.11499766  2.65350713
  0.3476974  -2.98961137  2.78056037 -2.08243174 -2.08592516  0.89305432
 -4.00520137]

Mean squared error: 16.34524965351277
R2 score: 0.736331231941306


# Polynomial Regression  
dataset description: https://archive.ics.uci.edu/ml/datasets/wine+quality  

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('./dataset/winequality-red.csv')

y = df['quality']
X = df.drop('quality',axis=1)

poly = PolynomialFeatures(degree=2).fit(X)
X = poly.transform(X)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1) #random_state 種子值


scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)


model = linear_model.LinearRegression()
model.fit(X_train, y_train)

X_test = scaler.transform(X_test)
y_pred = model.predict(X_test)

# The coefficients
print('Coefficients: {}\n'.format(model.coef_))
# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
print('R2 score: {}'.format(r2_score(y_test, y_pred)))

Coefficients: [-2.20439093e-11 -3.32036477e+01 -3.46762494e+01 -1.98569253e+01
 -1.40652656e+01 -6.85273381e+01 -7.74114997e+01  1.00727869e+02
 -2.50880009e+01 -7.93478035e+01  4.63945174e+01 -6.70038809e+00
 -1.07912073e+00 -4.28103408e-01 -1.80118094e-01 -5.27741073e-01
 -9.30638006e-01 -6.23191815e-01  5.34638897e-01  3.69394013e+01
 -1.63849368e+00  6.25798078e-01  6.86240624e-02 -1.07543978e-01
  3.25708843e-02 -6.49414077e-02  2.15989372e-01 -5.66509424e-02
  3.43575308e-01  3.51401508e+01 -1.05497188e+00 -2.55960941e-01
  8.98178254e-01 -3.09937110e-02  9.34787505e-02  8.98840857e-02
  1.48808583e-01 -3.20881226e-02  2.21970228e+01 -3.13608092e+00
 -3.12173913e-01  1.11046257e+00 -1.59627758e-01 -2.03532456e-01
  3.43111110e-02 -6.51049711e-04  1.79109779e+01 -2.69409190e+00
  1.29726359e-02 -3.65247752e-01  6.44755908e-02 -5.20131174e-02
 -1.18159029e-01  6.92374227e+01 -5.56852862e-01  1.09112468e-01
  5.56498217e-01 -1.63840021e-01 -3.70231215e-02  7.79292790e+01
 -5.4702046