In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import optim
from torch import nn
from sklearn import preprocessing
from torch.utils.data import DataLoader
import torch.utils.data as Data
from torch.autograd import Variable

In [2]:
df = pd.read_csv("./datasets/winequality-white.csv",sep = ";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [3]:
labels = df["quality"]
df.drop("quality",axis=1,inplace=True)

In [4]:
df_transformed = preprocessing.StandardScaler().fit_transform(df)

In [5]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(df_transformed,labels,random_state = 14,test_size = 0.25)

# Linear Regression, Random Forest Regression

In [6]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train,y_train)
preds = model.predict(X_test)

In [7]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_test,preds)

0.5935162263038227

In [8]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators = 500)
model.fit(X_train,y_train)
preds = model.predict(X_test)

In [9]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_test,preds)

0.3745857208163266

# Deep Learning Regression

In [86]:
X_train_tensor =  torch.from_numpy(X_train).float()
X_test_tensor =  torch.from_numpy(X_test).float()
y_train_tensor =  torch.from_numpy(y_train.values).float()
y_test_tensor =  torch.from_numpy(y_test.values).float()

train_set = Data.TensorDataset(X_train_tensor,y_train_tensor)
test_set = Data.TensorDataset(X_test_tensor,y_test_tensor)

train_loader = DataLoader(train_set,batch_size=128,shuffle=True)
test_loader = DataLoader(test_set,batch_size=64,shuffle=True)

In [87]:
model = nn.Sequential(
                    nn.Linear(in_features=11,out_features = 200),
                    nn.LeakyReLU(),
                    nn.Linear(in_features=200,out_features = 100),
                    nn.LeakyReLU(),
                    nn.Linear(in_features=100,out_features = 1),
)

In [97]:
model = nn.Sequential(
                    nn.Linear(in_features=11,out_features = 64),
                    nn.LeakyReLU(),
                    nn.Dropout(0.5),
                    nn.Linear(in_features=64,out_features = 128),
                    nn.LeakyReLU(),
                    nn.Dropout(0.5),
                    nn.Linear(in_features=128,out_features = 256),
                    nn.LeakyReLU(),
                    nn.Linear(in_features=256,out_features = 512),
                    nn.LeakyReLU(),
                    nn.Dropout(0.5),
                    nn.Linear(in_features=512,out_features = 1),
)

In [98]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.01)

In [99]:
epochs = 25
print_every = 40
steps = 0

for e in range(epochs):
    running_loss = 0
    for x,y in iter(train_loader):
        steps+= 1
        optimizer.zero_grad()
        output = model.forward(x)
        loss = criterion(output,y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if steps % print_every == 0:
            print("Epoch: {}/{}... ".format(e+1, epochs),
                  "Loss: {:.4f}".format(running_loss/print_every))
            
            running_loss = 0

Epoch: 2/25...  Loss: 0.5271
Epoch: 3/25...  Loss: 0.7839
Epoch: 5/25...  Loss: 0.1235
Epoch: 6/25...  Loss: 0.4142
Epoch: 7/25...  Loss: 0.6687
Epoch: 9/25...  Loss: 0.1975
Epoch: 10/25...  Loss: 0.4695
Epoch: 12/25...  Loss: 0.0296
Epoch: 13/25...  Loss: 0.2854
Epoch: 14/25...  Loss: 0.5314
Epoch: 16/25...  Loss: 0.1148
Epoch: 17/25...  Loss: 0.3682
Epoch: 18/25...  Loss: 0.6170
Epoch: 20/25...  Loss: 0.2082
Epoch: 21/25...  Loss: 0.4450
Epoch: 23/25...  Loss: 0.0468
Epoch: 24/25...  Loss: 0.3031
Epoch: 25/25...  Loss: 0.5423


In [100]:
model.eval()
with torch.no_grad():
    output = model.forward(X_test_tensor)
    preds = output.max(dim = 1)[0]
    MSE = criterion(y_test_tensor,preds).item()
    print("MSE=" +str(MSE))

MSE=0.7774053812026978
