In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score
from skmultilearn.problem_transform import LabelPowerset

In [None]:
class ActivationFunctions:

    @staticmethod
    def tanh(x):
        return np.tanh(x)

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def softmax(z):
        exp_z = np.exp(z - np.max(z,axis=0))
        return exp_z / np.sum(exp_z,axis=0)
    
    @staticmethod
    def tanh_derivative(x):
        tanh_x = np.tanh(x)
        return 1.0 - tanh_x**2
    
    @staticmethod
    def relu_derivative(x):
        return np.where(x > 0, 1.0, 0.0)
    
    @staticmethod
    def ignore(x):
        return 0

    @staticmethod
    def sigmoid_derivative(x):
        sigmoid_x = 1.0 / (1.0 + np.exp(-x))
        return sigmoid_x * (1.0 - sigmoid_x)
    
    @staticmethod
    def softmax_derivative(z):
        s = ActivationFunctions.softmax(z).flatten()
        jacobian_m = np.diag(s)
        for i in range(len(jacobian_m)):
            for j in range(len(jacobian_m)):
                if i == j:
                    jacobian_m[i][j] = s[i] * (1-s[i])
                else:
                    jacobian_m[i][j] = -s[i]*s[j]
        return jacobian_m

<h3>Data Preprocessing</h3>

In [None]:
df = pd.read_csv('./WineQT.csv')

In [None]:
print(np.mean(df,axis=0))

In [None]:
# printing total number of labels
np.unique(df['quality'])

In [None]:
# extract the important meaningful attributes
attb_df = df[["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"]]

In [None]:
attb_df.describe()

In [None]:
dict_label = df['quality'].value_counts()

In [None]:
# plot a bar graph
dict_label = dict(dict_label)
plt.bar(list(dict_label.keys()),list(dict_label.values()))
plt.title('Occurances of labels in Wine dataset')
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.show()

In [None]:
dataset = np.array(df)

np.random.shuffle(dataset)

"""
CHECK AGAIN BOOKMARK
"""

# Compute the correlation matrix using numpy
corr_matrix = np.corrcoef(dataset[:,:11], rowvar=False)

# Plotting the heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()

In [None]:
X = dataset[:,:11]
y = dataset[:,11]

# standardize
# calc z score for each entry columnwise
X = (X - np.mean(X,axis=0))/np.std(X,axis=0)

In [None]:
X.shape

In [None]:
# lets keep the split as train : 0.7, val : 0.1, test : 0.2
train_ratio = 0.65
val_ratio = 0.15
test_ratio = 0.20

In [None]:
entries = dataset.shape[0]

train_X = X[:int(train_ratio*entries)]
val_X = X[int(train_ratio*entries):int((train_ratio+val_ratio)*entries)]
test_X = X[int((train_ratio+val_ratio)*entries):]

In [None]:
train_y = y[:int(train_ratio*entries)]
val_y = y[int(train_ratio*entries):int((train_ratio+val_ratio)*entries)]
test_y = y[int((train_ratio+val_ratio)*entries):]

In [None]:
train_X.shape

In [None]:
val_y.shape

In [None]:
train_y.shape

In [None]:
train_y = train_y.reshape((-1,1))

<br>

<h3>Model training and WandB tuning</h3>

In [None]:
print(np.unique(train_y))

In [None]:
class MLP(ActivationFunctions):
  def __init__(self,input_dim,hidden_output_layer,optimizer='sgd',ep=5000,run_name="run"):
    
    self.weights = []
    self.biases=  []
    self.activation_func = []
    self.lr = 0.2
    self.ep = ep
    self.activation_func_name = []
    self.mode=0
    self.optimizer = optimizer

    self.derivatives = {
        'tanh': self.tanh_derivative,
        'relu': self.relu_derivative,
        'sigmoid': self.sigmoid_derivative,
        'softmax' : self.softmax_derivative,
        'none': self.ignore
      }
    
    #wandb
    wandb.init(project="mlp-2", config={
        "lr" : self.lr,
        "epochs" : ep
    })
    wandb.run.name = run_name

    # get all the activation functions
    self.activation_func_name.append('none')
    self.activation_func.append(0)

    for layer in hidden_output_layer:
      self.activation_func.append(getattr(self, layer[1]))
      self.activation_func_name.append(layer[1])

    # intialize eights and biases
    self.accumulator_grad_wts = []
    self.accumulator_grad_biases = []

    self.biases.append(np.zeros((input_dim,1)))
    self.accumulator_grad_biases.append(np.zeros((input_dim,1)))

    for i in range(len(hidden_output_layer)):
      l_layer = hidden_output_layer[i][0]

      if i!=0:
        prev_layer = hidden_output_layer[i-1][0]
      else:
        prev_layer = input_dim

      self.accumulator_grad_wts.append(np.zeros((l_layer,prev_layer)))
      self.accumulator_grad_biases.append(np.zeros((hidden_output_layer[i][0],1)))
      
      self.weights.append(np.random.rand(l_layer,prev_layer))
      self.biases.append(np.random.rand(hidden_output_layer[i][0],1))

    self.unactivated_outputs = []
    self.activated_outputs = []
  
  def cross_entropy_loss(self,y,yHat):
    J = -(np.sum(y * np.log(yHat)))/y.shape[0]
    return J

  def forwardProp(self,X):

    curr_output = X
    self.unactivated_outputs.append(X)
    self.activated_outputs.append(X)

    for l in range(1,len(self.weights)+1):
      z_l = self.weights[l-1] @ curr_output + self.biases[l]
      
      if self.mode==0:
          self.unactivated_outputs.append(z_l)
        
      output = self.activation_func[l](z_l)
    
      if self.mode==0:
          self.activated_outputs.append(output)

      curr_output = output
    
    curr_output = self.softmax(curr_output)

    return curr_output

  def backwardProp(self,X,y_prediction,y,sample_id,batch_size=None):

    n = len(self.activated_outputs)
    last_lay_unacout = self.unactivated_outputs[n-1]
    delta_l = (y_prediction-y)*(self.derivatives[self.activation_func_name[n-1]](last_lay_unacout))

    for i in range(n-1,0,-1):

      if self.optimizer=="sgd":
        self.weights[i-1] -= self.lr*(delta_l @ self.activated_outputs[i-1].T)
        self.biases[i] -= self.lr*(delta_l)
      
      else:
        self.accumulator_grad_wts[i-1] += delta_l @ self.activated_outputs[i-1].T
        self.accumulator_grad_biases[i] += delta_l

        if (sample_id+1)%batch_size==0:
          self.weights[i-1]-=self.lr*self.accumulator_grad_wts[i-1]
          self.biases[i]-=self.lr*self.accumulator_grad_biases[i]

      sigma_prime_zl = self.derivatives[self.activation_func_name[i-1]](self.unactivated_outputs[i-1])
      delta_l = (self.weights[i-1].T @ delta_l)*sigma_prime_zl
    
    
    self.activated_outputs.clear()
    self.unactivated_outputs.clear()

    
  def eval(self):
    self.mode=1

  def train(self):
    self.mode=0

  def fit(self,X,y,X_val, y_val):
    
    for epoch in range(self.ep):
      loss = 0
      train_acc = 0
      
      self.train()
      for sample_id in range(X.shape[0]):
        y_prediction = self.forwardProp(X[sample_id].reshape((-1,1)))
        if self.optimizer=="bgd":
          self.backwardProp(X[sample_id].reshape((-1,1)),y_prediction,y[sample_id].reshape((-1,1)),sample_id,batch_size=X.shape[0])
        
        else:
         batch_size = 64
         self.backwardProp(X[sample_id].reshape((-1,1)),y_prediction,y[sample_id].reshape((-1,1)),sample_id,batch_size=batch_size)

        loss += self.cross_entropy_loss(y[sample_id].reshape((-1,1)),y_prediction)
        comp_pred = np.argmax(y_prediction,axis=0)
        comp_act = np.argmax(y[sample_id],axis=0)
        train_acc+=(comp_pred==comp_act)
      
      train_acc = train_acc.item()/len(X)
      loss = loss/len(X)
      
      val_acc = 0
      val_loss = 0
    
      self.eval()
      # validation
      for idx in range(X_val.shape[0]):
            y_pred = self.forwardProp(X_val[idx].reshape((-1,1)))
            val_loss += self.cross_entropy_loss(y_val[idx].reshape((-1,1)),y_pred)
            comp_pred = np.argmax(y_pred,axis=0)
            comp_act = np.argmax(y_val[idx],axis=0)
            val_acc+=(comp_pred==comp_act)
      val_acc = val_acc.item()/len(X_val)
      val_loss = val_loss/len(X_val)

      self.activated_outputs.clear()
      self.unactivated_outputs.clear()
        
      wandb.log({"train_accuracy": train_acc,"train_loss": loss,"val_accuracy": val_acc , "val_loss" : val_loss})
      print(f"Epoch {epoch}, training_accuracy : {train_acc} training_loss : {loss} val_accuracy : {val_acc} val_loss : {val_loss}")


In [None]:
import wandb

# mlp = MLP(11,[(8,'sigmoid'),(8,'sigmoid'),(6,'sigmoid')],optimizer='sgd')

train_y = train_y.reshape((-1,1))
one_hot_encoder = OneHotEncoder(sparse_output=False)
y_true = one_hot_encoder.fit_transform(train_y)

val_y = val_y.reshape((-1,1))
one_hot_encoder = OneHotEncoder(sparse_output=False)
y_true_val = one_hot_encoder.fit_transform(val_y)

y_true_val.shape

In [None]:
mlp = MLP(11,[(9,'sigmoid'),(7,'sigmoid'),(6,'sigmoid')],optimizer='sgd')
mlp.fit(train_X,y_true,val_X,y_true_val)

<h3>Accuracy on Test Set</h3>

In [None]:
test_y = test_y.reshape((-1,1))
one_hot_encoder = OneHotEncoder(sparse_output=False)
y_true_test = one_hot_encoder.fit_transform(test_y)

y_pr_arr = []
y_act = []

test_acc = 0
for idx in range(test_X.shape[0]):
      y_pred = mlp.forwardProp(test_X[idx].reshape((-1,1)))
      
      comp_pred = np.argmax(y_pred,axis=0)
      comp_act = np.argmax(y_true_test[idx],axis=0)
      y_pr_arr.append(comp_pred)
      y_act.append(comp_act)
      
      test_acc+=(comp_pred==comp_act)
test_acc = test_acc.item()/len(test_X)
print("Test accuracy : ",test_acc)

In [None]:
wandb.finish()

### Classification Report on Test Set

In [None]:

y = y_act
y_pred = y_pr_arr

# Classification Report
print("Classification Report:")
print(classification_report(y, y_pred))

# Other metrics
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred,average='weighted')
recall = recall_score(y, y_pred,average='weighted')
f1 = f1_score(y, y_pred,average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

### Combinations of activation function and optimizers

In [None]:
for activn_funcn in ['sigmoid','relu','tanh']:
    for optim in ['sgd','bgd','mbgd']:
        model = MLP(11,[(9,activn_funcn),(7,activn_funcn),(6,activn_funcn)],optimizer=optim,ep=200,run_name=f"{activn_funcn}+{optim}")
        model.fit(train_X,y_true,val_X,y_true_val)

In [None]:
model = MLP(11,[(9,'relu'),(7,'relu'),(6,'relu')],optimizer='sgd',ep=200,run_name=f"trial")
model.fit(train_X,y_true,val_X,y_true_val)

### Multilabel classification

In [None]:
df = pd.read_csv('./advertisement.csv')

In [None]:
np_ds = np.array(df)

In [None]:
unique_labels  = set()

for arr in np_ds:
    lbl_str = arr[-1].split(' ')
    for lbl in lbl_str:
        unique_labels.add(lbl)

print(unique_labels)

In [None]:
df[df["gender"]=="Female"].head(50)

In [None]:
female_df = df[df["gender"]=="Female"]

In [None]:
np_ds_female = np.array(female_df)

map_labels1 = {}

for arr in np_ds_female:
    lbl_str = arr[-1].split(' ')
    for lbl in lbl_str:
        try:
            map_labels1[lbl]+=1
        except:
            map_labels1[lbl]=1

print(map_labels1)

In [None]:
np_ds_female.shape

In [None]:
print(sum(list(map_labels1.values())))

In [None]:
male_df = df[df["gender"]=="Male"]

In [None]:
np_ds_male = np.array(male_df)

map_labels2 = {}

for arr in np_ds_male:
    lbl_str = arr[-1].split(' ')
    for lbl in lbl_str:
        try:
            map_labels2[lbl]+=1
        except:
            map_labels2[lbl]=1

print(map_labels2)

In [None]:

# Find common keys
common_keys = set(map_labels1.keys()) & set(map_labels2.keys())

# Prepare data for plotting
values1 = [map_labels1[key] for key in common_keys]
values2 = [map_labels2[key] for key in common_keys]

# Plotting
import matplotlib.pyplot as plt
import numpy as np

# Setting up the x-axis positions
x = np.arange(len(common_keys))

# Setting up the width for bars
width = 0.35

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, values1, width, label='female')
rects2 = ax.bar(x + width/2, values2, width, label='male')

# Adding labels, title, and legend
ax.set_ylabel('Values')
ax.set_title('Comparison between values of common keys in Map Labels 1 and Map Labels 2')
ax.set_xticks(x)
ax.set_xticklabels(common_keys)
ax.legend()

fig.tight_layout()
plt.show()

In [None]:
# Splitting  the train val test split
# keeping the ration 70 : 15 : 15
test = 0.10
val = 0.15
train = 0.80

df = pd.read_csv('./advertisement.csv')
data = np.array(df)

In [None]:
"""
We can see column : 1,3,4,6,7,9 are categorical , hence need to be encoded.
"""

for column in [1,3,4,6,7,9]:
    label_encode = LabelEncoder()
    encoded_feature = label_encode.fit_transform(data[:,column])
    data[:,column] = encoded_feature

In [None]:
print(data.shape)

In [None]:
print(data[:,:-1])

In [None]:
my_featur_sample = data[:,:-1].copy() 
my_featur_sample  = my_featur_sample.astype(np.float64)

In [None]:
my_featur_sample = (my_featur_sample - np.mean(my_featur_sample,axis=0))/(np.std(my_featur_sample,axis=0))

In [None]:
np.std(np.array([[1],[2],[3]]),axis=1)

In [None]:
print(data[0])

In [None]:
data[:,:-1] = my_featur_sample

In [None]:
label_encode_mlb = MultiLabelBinarizer()

formatted_list = []

for lbl_string in data[:,-1]:
    formatted_list.append(lbl_string.split(' '))
    
encoded_labels = label_encode_mlb.fit_transform(formatted_list)
    
train_x,train_y = data[:int(train*data.shape[0]),:-1],encoded_labels[:int(train*data.shape[0])]
val_x,val_y = data[int(train*data.shape[0]):int(train*data.shape[0])+int(val*data.shape[0]),:-1],encoded_labels[int(train*data.shape[0]):int(train*data.shape[0])+int(val*data.shape[0])]
test_x,test_y = data[int(train*data.shape[0])+int(val*data.shape[0]):int(train*data.shape[0])+int(val*data.shape[0])+int(test*data.shape[0]),:-1],encoded_labels[int(train*data.shape[0])+int(val*data.shape[0]):int(train*data.shape[0])+int(val*data.shape[0])+int(test*data.shape[0])]

print(train_x.shape,train_y.shape)
print(test_x.shape,test_y.shape)
print(val_x.shape,val_y.shape)

In [None]:
label_encode.classes_

In [None]:
print(train_y.shape)
print(train_x.shape)

In [None]:
print(train_x)

In [None]:
class ActivationFunctions:

    @staticmethod
    def tanh(x):
        return np.tanh(x)

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def softmax(z):
        exp_z = np.exp(z - np.max(z,axis=0))
        return exp_z / np.sum(exp_z,axis=0)
    
    @staticmethod
    def tanh_derivative(x):
        tanh_x = np.tanh(x)
        return 1.0 - tanh_x**2
    
    @staticmethod
    def relu_derivative(x):
        return np.where(x > 0, 1.0, 0.0)
    
    @staticmethod
    def ignore(x):
        return 0

    @staticmethod
    def sigmoid_derivative(x):
        sigmoid_x = 1.0 / (1.0 + np.exp(-x))
        return sigmoid_x * (1.0 - sigmoid_x)
    
    @staticmethod
    def softmax_derivative(z):
        s = ActivationFunctions.softmax(z).flatten()
        jacobian_m = np.diag(s)
        for i in range(len(jacobian_m)):
            for j in range(len(jacobian_m)):
                if i == j:
                    jacobian_m[i][j] = s[i] * (1-s[i])
                else:
                    jacobian_m[i][j] = -s[i]*s[j]
        return jacobian_m

In [205]:
class MLP(ActivationFunctions):
  def __init__(self,input_dim,hidden_output_layer,optimizer='sgd',ep=50,run_name="run"):
    
    self.weights = []
    self.biases=  []
    self.activation_func = []
    self.lr = 0.1
    self.ep = ep
    self.activation_func_name = []
    self.mode=0
    self.optimizer = optimizer

    self.derivatives = {
        'tanh': self.tanh_derivative,
        'relu': self.relu_derivative,
        'sigmoid': self.sigmoid_derivative,
        'softmax' : self.softmax_derivative,
        'none': self.ignore
      }
    
    #wandb
    # wandb.init(project="mlp-2", config={
    #     "lr" : self.lr,
    #     "epochs" : ep
    # })
    # wandb.run.name = run_name

    # get all the activation functions
    self.activation_func_name.append('none')
    self.activation_func.append(0)

    for layer in hidden_output_layer:
      self.activation_func.append(getattr(self, layer[1]))
      self.activation_func_name.append(layer[1])

    # intialize eights and biases
    self.accumulator_grad_wts = []
    self.accumulator_grad_biases = []

    self.biases.append(np.zeros((input_dim,1)))
    self.accumulator_grad_biases.append(np.zeros((input_dim,1)))

    for i in range(len(hidden_output_layer)):
      l_layer = hidden_output_layer[i][0]

      if i!=0:
        prev_layer = hidden_output_layer[i-1][0]
      else:
        prev_layer = input_dim

      self.accumulator_grad_wts.append(np.zeros((l_layer,prev_layer)))
      self.accumulator_grad_biases.append(np.zeros((hidden_output_layer[i][0],1)))
      
      self.weights.append(np.random.rand(l_layer,prev_layer))
      self.biases.append(np.random.rand(hidden_output_layer[i][0],1))

    self.unactivated_outputs = []
    self.activated_outputs = []
  
  def binary_cross_entropy_loss(self,y,yHat):
    J = -(np.sum(y * np.log(yHat) + (1-y)*np.log(1-yHat)))/y.shape[0]
    return J

  def forwardProp(self,X):

    curr_output = X
    self.unactivated_outputs.append(X)
    self.activated_outputs.append(X)

    # print(X.shape)
    for l in range(1,len(self.weights)+1):
      z_l = self.weights[l-1] @ curr_output + self.biases[l]
      
      if self.mode==0:
          self.unactivated_outputs.append(z_l)
      
      output = self.activation_func[l](z_l)
    
      if self.mode==0:
          self.activated_outputs.append(output)

      curr_output = output
    
    # curr_output = self.softmax(curr_output)

    return curr_output

  def backwardProp(self,X,y_prediction,y,sample_id,batch_size=None):

    n = len(self.activated_outputs)
    last_lay_unacout = self.unactivated_outputs[n-1]
    delta_l = (y_prediction-y)*(self.derivatives[self.activation_func_name[n-1]](last_lay_unacout))

    for i in range(n-1,0,-1):

      if self.optimizer=="sgd":
        self.weights[i-1] -= self.lr*(delta_l @ self.activated_outputs[i-1].T)
        self.biases[i] -= self.lr*(delta_l)
      
      else:
        self.accumulator_grad_wts[i-1] += delta_l @ self.activated_outputs[i-1].T
        self.accumulator_grad_biases[i] += delta_l

        if (sample_id+1)%batch_size==0:
          self.weights[i-1]-=self.lr*self.accumulator_grad_wts[i-1]
          self.biases[i]-=self.lr*self.accumulator_grad_biases[i]

      sigma_prime_zl = self.derivatives[self.activation_func_name[i-1]](self.unactivated_outputs[i-1])
      delta_l = (self.weights[i-1].T @ delta_l)*sigma_prime_zl
    
    
    self.activated_outputs.clear()
    self.unactivated_outputs.clear()

    
  def eval(self):
    self.mode=1

  def train(self):
    self.mode=0

  def multilabel_accuracy(self,y_true, y_pred, threshold=0.5):
    
    # print(y_true.shape,y_pred.shape)
    # Convert predictions to 1 or 0 based on threshold
    y_pred_thresholded = (y_pred > threshold).astype(int)
    # print(y_pred_thresholded,y_pred)
    
    # Check if the predicted labels match the true labels
    correct_predictions = (y_true == y_pred_thresholded).astype(int)
    
    # Compute accuracy for each label and then take the mean
    if len(y_pred) == correct_predictions.sum():
      return 1
    
    return 0

  def fit(self,X,y,X_val, y_val):
    
    for epoch in range(self.ep):
      loss = 0
      train_acc = 0
      
      self.train()
      for sample_id in range(X.shape[0]):
        y_prediction = self.forwardProp(X[sample_id].reshape((-1,1)))

        if self.optimizer=="bgd":
          self.backwardProp(X[sample_id].reshape((-1,1)),y_prediction,y[sample_id].reshape((-1,1)),sample_id,batch_size=X.shape[0])
        
        else:
         batch_size = 64
         self.backwardProp(X[sample_id].reshape((-1,1)),y_prediction,y[sample_id].reshape((-1,1)),sample_id,batch_size=batch_size)

        loss += self.binary_cross_entropy_loss(y[sample_id].reshape((-1,1)),y_prediction)
        # comp_pred = np.argmax(y_prediction,axis=0)
        # comp_act = np.argmax(y[sample_id],axis=0)
        # print(y_prediction.shape)
        train_acc+=self.multilabel_accuracy(y[sample_id].reshape((-1,1)),y_prediction)
      
      train_acc = train_acc/len(X)
      loss = loss/len(X)
      
      val_acc = 0
      val_loss = 0
    
      self.eval()
      # validation
      for idx in range(X_val.shape[0]):
            y_pred = self.forwardProp(X_val[idx].reshape((-1,1)))
            # print(y_pred.shape,y_val[idx].reshape((-1,1)).shape)
            val_loss += self.binary_cross_entropy_loss(y_val[idx].reshape((-1,1)),y_pred)
            # comp_pred = np.argmax(y_pred,axis=0)
            # comp_act = np.argmax(y_val[idx],axis=0)
            val_acc+=self.multilabel_accuracy(y_val[idx].reshape((-1,1)),y_pred)
      val_acc = val_acc/len(X_val)
      val_loss = val_loss/len(X_val)

      self.activated_outputs.clear()
      self.unactivated_outputs.clear()
        
      # wandb.log({"train_accuracy": train_acc,"train_loss": loss,"val_accuracy": val_acc , "val_loss" : val_loss})
      print(f"Epoch {epoch}, training_accuracy : {train_acc} training_loss : {loss} val_accuracy : {val_acc} val_loss : {val_loss}")


In [206]:

ActivationFunctions.sigmoid(np.array([[1.555],[2.333],[3.77]]))

array([[0.82563471],
       [0.91157346],
       [0.97746736]])

In [210]:
import wandb
model = MLP(10,[(6,'sigmoid'),(6,'sigmoid'),(8,'sigmoid')],optimizer='sgd',ep=5000,run_name=f"multilabel")
model.fit(train_x.astype(np.float64),train_y.astype(np.float64),val_x.astype(np.float64),val_y.astype(np.float64))

Epoch 0, training_accuracy : 0.00125 training_loss : 0.7499206473287562 val_accuracy : 0.0 val_loss : 0.6548931963306939
Epoch 1, training_accuracy : 0.0 training_loss : 0.6530588199076521 val_accuracy : 0.0 val_loss : 0.6548730154632316
Epoch 2, training_accuracy : 0.0 training_loss : 0.6529251140355268 val_accuracy : 0.0 val_loss : 0.6548406444298392
Epoch 3, training_accuracy : 0.0 training_loss : 0.6527874733645114 val_accuracy : 0.0 val_loss : 0.6547801745874343
Epoch 4, training_accuracy : 0.0 training_loss : 0.6526229656882334 val_accuracy : 0.0 val_loss : 0.6546740263684622
Epoch 5, training_accuracy : 0.0 training_loss : 0.6524060666292306 val_accuracy : 0.0 val_loss : 0.6544978190469273
Epoch 6, training_accuracy : 0.0 training_loss : 0.6521022928157909 val_accuracy : 0.0 val_loss : 0.6542133504787645
Epoch 7, training_accuracy : 0.0 training_loss : 0.6516600115320553 val_accuracy : 0.0 val_loss : 0.6537583270767794
Epoch 8, training_accuracy : 0.0 training_loss : 0.651001419