In [1]:
import csv
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.metrics.regression import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, Lasso, HuberRegressor
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

from dbn.tensorflow import SupervisedDBNRegression

In [2]:
X_red, Y_red = [], []
X_white, Y_white = [], []


with open('datasets/winequality-red.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=';')
    header = True

    for row in reader:
        if header:
            header = False
        else:
            X_red.append([float(feature) for feature in row[:-1]])
            Y_red.append(int(row[-1]))


with open('datasets/winequality-white.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=';')
    header = True

    for row in reader:
        if header:
            header = False
        else:
            X_white.append([float(feature) for feature in row[:-1]])
            Y_white.append(int(row[-1]))


X_red, Y_red, X_white, Y_white = np.array(X_red), np.array(Y_red), np.array(X_white), np.array(Y_white)

In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X_red, Y_red, test_size=0.2, random_state=0)

X_scaler = MinMaxScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)

regressor = Ridge()

regressor.fit(X_train, Y_train)
print(f'Test set r-squared score: {regressor.score(X_test, Y_test)}')

Test set r-squared score: 0.3293642538047954


In [4]:
regressor = SupervisedDBNRegression(
    hidden_layers_structure=[20, 10],
    learning_rate_rbm=0.0003,
    learning_rate=0.02,
    n_epochs_rbm=20,
    n_iter_backprop=100,
    batch_size=8,
    activation_function='relu',
    verbose=False
)


regressor.fit(X_train, Y_train)
print(f'Test set r-squared score: {regressor.score(X_test, Y_test)}')

Test set r-squared score: 0.3473308034958251


In [5]:
pred = regressor.predict(X_test)

for i in range(30):
    print(Y_test[i], pred[i][0])

6 5.681657
5 5.1179867
7 6.659705
6 5.4389107
5 5.679303
6 5.3897684
5 5.2437735
6 6.1053934
4 4.6525304
5 5.0121393
5 5.1854076
5 5.3249855
6 5.6137395
5 5.096298
6 5.434116
6 6.438636
7 6.922023
5 5.646512
5 5.351233
4 5.141512
7 6.4522367
6 5.2327724
6 5.5465493
4 5.8542404
6 5.4389075
5 4.992988
5 5.123665
7 6.5631995
5 5.169716
6 6.357584


In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X_white, Y_white, test_size=0.2, random_state=0)

X_scaler = MinMaxScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)

regressor = Ridge()

regressor.fit(X_train, Y_train)
print(f'Test set r-squared score: {regressor.score(X_test, Y_test)}')

Test set r-squared score: 0.25788728218319523


In [7]:
regressor = SupervisedDBNRegression(
    hidden_layers_structure=[20, 10],
    learning_rate_rbm=0.0003,
    learning_rate=0.02,
    n_epochs_rbm=20,
    n_iter_backprop=100,
    batch_size=64,
    activation_function='relu',
    verbose=False
)


regressor.fit(X_train, Y_train)
print(f'Test set r-squared score: {regressor.score(X_test, Y_test)}')

Test set r-squared score: 0.30096417836718434


In [8]:
pred = regressor.predict(X_test)

for i in range(30):
    print(Y_test[i], pred[i][0])

5 5.506027
6 5.431491
7 5.928523
8 6.8523192
5 5.602636
4 4.713329
6 6.4981065
5 6.202039
7 5.9339495
5 5.687347
6 5.688942
7 5.6341057
5 5.161984
5 5.475436
6 6.1718626
5 5.6558027
6 6.3385425
5 5.434805
6 6.495904
6 6.1799836
6 5.8362803
7 6.0924616
6 5.8617725
5 5.6726454
7 5.805807
8 6.39778
5 5.0400596
5 5.0820694
7 6.499633
5 5.6022863
