In [1]:
import pandas as pd
import numpy as np
import torch
from matplotlib import pyplot as plt
from sklearn.utils import resample
import ray
from ray import tune
from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test

%matplotlib inline

ray.shutdown()
ray.init()


TypeError: 'module' object is not callable

In [2]:
filename = "./data/wine.csv"
df = pd.read_csv(filename)
# drop col index
df = df.drop(['index'],axis = 1)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
#Resample:
df_majority = df[df['quality']== 6]
for i in range(3,10):
    majority_len = df[df['quality']== 6].shape[0]
    if i != 6:
        minority_len = df[df['quality'] == i].shape[0]

        df_minority = df[df['quality'] == i]

        df_majority_upsampled = resample(df_minority,replace=True,n_samples = majority_len,random_state=1)

        df_majority = df_majority.append(df_majority_upsampled)
        

df = df_majority
print(df.shape)
print(df['quality'].value_counts())
df = df.sample(frac=1).reset_index(drop=True) # Shuffle dataframe

(19852, 12)
3    2836
4    2836
5    2836
6    2836
7    2836
8    2836
9    2836
Name: quality, dtype: int64


In [4]:
# Creating a test/train split

train_test_split_fraction = 0.80
split_index = int(df.shape[0] * train_test_split_fraction)
df_train = df[:split_index]
df_test = df[split_index:]

target = pd.get_dummies(df['quality']).values # One hot encode

#target[:5]
#target = df['quality'].to_numpy()
#target = target.reshape(6497,1)

target


array([[0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0]], dtype=uint8)

In [5]:
# Selecting the features and the target
X_train = df_train.drop('quality', axis = 1).values
X_test = df_test.drop('quality', axis = 1).values

y_train = target[:split_index]
y_test = target[split_index:]
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(15881, 11)
(3971, 11)
(15881, 7)
(3971, 7)


In [6]:
#X=df.drop('quality',axis=1) 
#y=df['quality'] 
#X,y
#X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=41,test_size=0.2)

In [7]:
X_train = torch.tensor(X_train.astype('int32'))
y_train = torch.tensor(y_train.astype('int32'))
X_test = torch.tensor(X_test.astype('int32'))
y_test = torch.tensor(y_test.astype('int32'))

In [8]:
nb_hidden_neurons = 100
nb_classes = len(pd.unique(df['quality']))

In [9]:
import torch.nn as nn
import torch.nn.functional as F

In [10]:
class Network(nn.Module):

    def __init__(self, nb_features):
        """Here we define the layers
        """

        super().__init__()
        
        self.layer_1 =  nn.Linear(nb_features,nb_hidden_neurons)
        self.layer_2 =  nn.Linear(nb_hidden_neurons,nb_hidden_neurons)
        self.layer_3 =  nn.Linear(nb_hidden_neurons,nb_classes)
        
        

    def forward(self,x):
        """Here we combine the layers
        """
        
        activation_function = nn.SiLU()
        last_layer_activation = nn.Softmax()
        
        x = activation_function(self.layer_1(x))
        x = activation_function(self.layer_2(x))
        x = last_layer_activation(self.layer_3(x))
    
        return x

In [11]:
my_nn = Network(nb_features=X_train.shape[1])
my_nn

Network(
  (layer_1): Linear(in_features=11, out_features=100, bias=True)
  (layer_2): Linear(in_features=100, out_features=100, bias=True)
  (layer_3): Linear(in_features=100, out_features=7, bias=True)
)

In [12]:
# Select your criterion, your learning rate and your optimizer.
criterion = nn.MSELoss()
learning_rate = 0.001

optimizer = torch.optim.Adam(my_nn.parameters(), lr=learning_rate)
#optimizer = torch.optim.Adam(my_nn.parameters(), lr=learning_rate)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)

In [13]:
def training(batch_size : int, nb_steps_loss_sum : int):
    """ Train the neural network, feeding it `batch_size` at a time
    and saving statistics every `nb_steps_loss_sum` steps.
    
    Arguments:
    
    - batch_size [int] : the number of input samples at each training step (called a batch)
    - nb_steps_loss_sum [int] : the number of batches before saving the loss for plotting
    
    Returns:
    - loss_list : [List[double]] : value of the loss every `nb_steps_loss_sum` steps
    """

    loss_list = []
    running_loss = 0
    batch_nb = 0

    for epoch in range(0,100): # Number of times to iterate through the complete dataset
        for idx in range(0, X_train.shape[0], batch_size):
            
            # Get input and output
            input_batch = X_train[idx:idx + batch_size]
            target = y_train[idx:idx + batch_size]
            
            # TO COMPLETE:
            # - zero gradient buffers
            optimizer.zero_grad()
            # - compute the forward pass
            output = my_nn(input_batch.float())
            # - compute the loss
            loss = criterion(output, target.float())
            # - backpropagate
            loss.backward()
            # - do a step
            optimizer.step()
          
            
            # Save the loss every `running_loss_steps` batches
            running_loss += loss.item()
            save_loss_condition = batch_nb % nb_steps_loss_sum == (nb_steps_loss_sum - 1)
            if save_loss_condition:    
                loss_list.append(running_loss)
                running_loss = 0.0


            batch_nb+= 1
        
    return loss_list

In [14]:
nb_steps_loss_sum = 10
loss = training(batch_size=50, nb_steps_loss_sum=nb_steps_loss_sum)

# Plotting the loss over training
plt.figure()
plt.plot(range(0, len(loss)), loss)
plt.xlabel(f"Batches/{nb_steps_loss_sum}")
plt.ylabel("Loss")
plt.title("Training loss")
plt.show()
plt.close()

In [15]:
def computeScore(X, y):
    correct = 0
    total = 0
    batch_size = 50
    with torch.no_grad():
        for idx in range(0, X.shape[0], batch_size):

            # TO COMPLETE:
            
            # - get the `batch_size` number of input samples
            # - compute the prediction of the neural network
            output = my_nn(X.float())
            # - get the max of the prediction (e.g. get the most likely class)
            max_pred = torch.max(output, 1)
            # This can be done using `torch.max`.
            # - get the max of the target (e.g. correct class)
            max_target = torch.max(y, 1)
     
            # - check if the prediction is correct and count it
            if max_pred[1][idx] == max_target[1][idx]:
                correct += 1
            total = total + 1
                
            # - count every sample
 

    accuracy = correct/total * 100
    print(f"Accuracy of the network on the {total} samples: {accuracy:.2f}%")

In [16]:
computeScore(X_train, y_train)
computeScore(X_test, y_test)

Accuracy of the network on the 318 samples: 79.56%
Accuracy of the network on the 80 samples: 77.50%


In [17]:
import torch.optim as optim


def train_mnist(config):
    
    model = Network(nb_features=X_train.shape[1])
    optimizer = optim.SGD(model.parameters(), lr=config["lr"])
    for i in range(10):
        training(batch_size=50, nb_steps_loss_sum=nb_steps_loss_sum)
        acc = compute(X_train, y_train)
        tune.report(mean_accuracy=acc)


analysis = tune.run(
    train_mnist, config={"lr": tune.grid_search([0.001, 0.01, 0.1])})

print("Best config: ", analysis.get_best_config(metric="mean_accuracy"))

# Get a dataframe for analyzing trial results.
test = analysis.dataframe()

NameError: name 'ray' is not defined

In [None]:
test