# Q2

In [6]:
import numpy as np
import pandas as pd

dat1 = pd.read_csv("./bin/WineQT.csv")
print(dat1.shape)

(1143, 13)


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

X = dat1.drop(columns=['quality','Id']).to_numpy()
Y = dat1['quality'].to_numpy()

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=4)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=345)

# Handle Missing or Inconsistent Data (if necessary)
# Use SimpleImputer to fill missing values with the mean or other strategies
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_val = imputer.transform(X_val)
X_test = imputer.transform(X_test)

# Normalize and Standardize Data
# Use StandardScaler to standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)


### 2.1: Model Building from Scratch

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

class MLPClassifier2:
    def __init__(self, n_features, n_classes, learning_rate, n_hiddenLayers, n_nodesPerLayer, activateFunc, epochs, optimizer="Batch",printOutput=True) -> None:
        self.n_features = n_features
        self.n_classes = n_classes
        self.learning_rate = learning_rate
        self.n_hiddenLayers = n_hiddenLayers
        self.n_nodesPerLayer = n_nodesPerLayer
        self.activateFunc = activateFunc
        self.printOutput = printOutput
        self.epochs = epochs
        self.uniques = None
        self.optimizer = optimizer

        self.layeredWeights = [np.random.rand(n_features, n_nodesPerLayer)]
        self.layeredBiases = [np.random.rand(n_nodesPerLayer)]

        for i in range(1, n_hiddenLayers):
            self.layeredWeights.append(np.random.rand(n_nodesPerLayer, n_nodesPerLayer))
            self.layeredBiases.append(np.random.rand(n_nodesPerLayer))
        
        self.layeredWeights.append(np.random.rand(n_nodesPerLayer, n_classes))
        self.layeredBiases.append(np.random.rand(n_classes))
    
    def printMetrics(self,preds,true):
        print(f"Accuracy = \t {accuracy_score(preds,true)}")
        print(f"precision micro = \t {precision_score(preds,true,average='micro',zero_division=1)}",end="\t")
        print(f"precision macro = \t {precision_score(preds,true,average='macro',zero_division=1)}")
        print(f"recall micro = \t\t {recall_score(preds,true,average='micro',zero_division=1)}",end=" \t")
        print(f"recall macro = \t\t {recall_score(preds,true,average='macro',zero_division=1)}")
        print(f"f1_score micro = \t {f1_score(preds,true,average='micro',zero_division=1)}",end="\t")
        print(f"f1_score macro = \t {f1_score(preds,true,average='macro',zero_division=1)}\n")

    def softmax(self, logits):
        exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        return exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

    def cross_entropy_loss(self, oneHotTrueProbs, pred_probs):
        epsilon = 1e-15
        n = oneHotTrueProbs.shape[0]
        pred_probs = np.maximum(epsilon, pred_probs)
        loss = -np.sum(oneHotTrueProbs * np.log(pred_probs))
        
        return loss/n
    
    # def oneHotLabels2(self, Y):
    #     oneHotTrueProbs = pd.get_dummies(Y).to_numpy()
    #     return oneHotTrueProbs
    
    def oneHotLabels(self, Y):
        out = []
        categories = self.uniques
        for i in Y:
            num_categories = len(categories)
            encoding = np.eye(num_categories)
            index = np.where(categories==i)
            out.append(encoding[index].squeeze())
        return np.array(out)

    def activationFunc(self,X):
        if self.activateFunc == "ReLU":
            return np.maximum(0, X)
        elif self.activateFunc == "tanh":
            return np.tanh(X)
        elif self.activateFunc == "sigmoid":
            return 1 / (1 + np.exp(-X))

    def derivActivation(self,X):
        if self.activateFunc == "ReLU":
            return np.maximum(0, X)
        elif self.activateFunc == "tanh":
            return 1 - np.tanh(X) ** 2
        elif self.activateFunc == "sigmoid":
            return (1 / (1 + np.exp(-X))) * (1 - (1 / (1 + np.exp(-X))))
    
    def backward(self, x, y, outputs, activations):
        m = x.shape[0]
        yOneHot = self.oneHotLabels(y)
        dZs = [activations[-1] - yOneHot]
        dWs = [(1/m) * activations[-2].T @ dZs[-1]]
        dBs = [(1/m) * np.sum(dZs[-1], axis = 0)]

        for i in range(self.n_hiddenLayers):
            dZs.append(dZs[-1] @ self.layeredWeights[-1-i].T * self.derivActivation(outputs[-2-i]))
            dWs.append((1/m) * activations[-3-i].T @ dZs[-1])
            dBs.append((1/m) * np.sum(dZs[-1], axis=0))


        dWs.reverse()
        dBs.reverse()
        return dWs, dBs

    def forward(self,X):
        curMat = X
        out = []
        activ = [X]
        for i in range(self.n_hiddenLayers):
            curMat = curMat @ self.layeredWeights[i] + self.layeredBiases[i]
            out.append(curMat)
            curMat = self.activationFunc(curMat)
            activ.append(curMat)
        
        curMat = curMat @ self.layeredWeights[-1] + self.layeredBiases[-1]
        out.append(curMat)
        curMat = self.softmax(curMat)
        activ.append(curMat)
        
        return out, activ
    
    def update(self, dWs,dBs):
        for i in range(self.n_hiddenLayers + 1):
            self.layeredWeights[i] -= (self.learning_rate * dWs[i])
            self.layeredBiases[i] -= (self.learning_rate * dBs[i])
        return None
    
    def predict(self, X):
        outputs, activations = self.forward(X)
        return activations[-1]
    
    def evaluate_wandb(self,X,Y):
        probabilities = self.predict(X)
        oneHotY = self.oneHotLabels(Y)

        loss = self.cross_entropy_loss(oneHotY, probabilities)
        preds = self.predict(X)
        preds = np.argmax(preds, axis=1)
        yOut = np.argmax(oneHotY, axis=1)
        acc = accuracy_score(preds, yOut)

        return loss, acc

    def train(self, X_train, Y_train, X_val, Y_val):
        self.uniques = np.unique(Y_train)
        N = int(len(X_train))
        batchSize = int(N/10)
        for epoch in range(self.epochs):
            if (epoch%100 == 0 and self.printOutput==True):
                preds = self.predict(X_val)
                preds = np.argmax(preds, axis=1)

                yOneHot = self.oneHotLabels(Y_val)
                yOut = np.argmax(yOneHot, axis=1)
                print(f"Epoch = {epoch}")
                self.printMetrics(preds,yOut)

            
            if (self.optimizer == "SGD"):
                for i in range(N):
                    out, activations = self.forward(X_train[i].reshape(1,self.n_features))
                    dWs, dBs = self.backward(X_train[i], Y_train[i].reshape(1,1), out, activations)
                    self.update(dWs,dBs)
            elif (self.optimizer == "BatchGD"):
                out, activations = self.forward(X_train)
                dWs, dBs = self.backward(X_train, Y_train, out, activations)
                self.update(dWs,dBs)
            elif (self.optimizer == "MiniBatchGD"):
                batchSize = int(N/10)
                for i in range(0, N, batchSize):
                    out, activations = self.forward(X_train[i:min(batchSize+i, N)])
                    dWs, dBs = self.backward(X_train[i:min(batchSize+i, N)], Y_train[i:min(batchSize+i, N)], out, activations)
                    self.update(dWs,dBs)



        

In [9]:
n_features=X_train.shape[1]
n_classes=len(np.unique(Y_train))
lr = 0.01
model = MLPClassifier2(n_features, n_classes, lr, n_hiddenLayers=2,n_nodesPerLayer=5,activateFunc="ReLU", optimizer="SGD", epochs=1000)
model.train(X_train,Y_train,X_val,Y_val)

Epoch = 0
Accuracy = 	 0.029239766081871343
precision micro = 	 0.029239766081871343	precision macro = 	 0.1388888888888889
recall micro = 		 0.029239766081871343 	recall macro = 		 0.505175983436853
f1_score micro = 	 0.029239766081871343	f1_score macro = 	 0.34331337325349304



Epoch = 100
Accuracy = 	 0.5906432748538012
precision micro = 	 0.5906432748538012	precision macro = 	 0.24174951670809167
recall micro = 		 0.5906432748538012 	recall macro = 		 0.863232074438971
f1_score micro = 	 0.5906432748538012	f1_score macro = 	 0.2168276704419276

Epoch = 200
Accuracy = 	 0.5614035087719298
precision micro = 	 0.5614035087719298	precision macro = 	 0.25702581502084404
recall micro = 		 0.5614035087719298 	recall macro = 		 0.820718350870227
f1_score micro = 	 0.5614035087719298	f1_score macro = 	 0.25930921566826365



wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


Epoch = 300
Accuracy = 	 0.5730994152046783
precision micro = 	 0.5730994152046783	precision macro = 	 0.24112485369734749
recall micro = 		 0.5730994152046783 	recall macro = 		 0.8656233002027395
f1_score micro = 	 0.5730994152046783	f1_score macro = 	 0.22555663242230403

Epoch = 400
Accuracy = 	 0.49707602339181284
precision micro = 	 0.49707602339181284	precision macro = 	 0.20895142094396443
recall micro = 		 0.49707602339181284 	recall macro = 		 0.8315972222222223
f1_score micro = 	 0.49707602339181284	f1_score macro = 	 0.19581835049491284

Epoch = 500
Accuracy = 	 0.5555555555555556
precision micro = 	 0.5555555555555556	precision macro = 	 0.24919286963611734
recall micro = 		 0.5555555555555556 	recall macro = 		 0.7939431913116124
f1_score micro = 	 0.5555555555555556	f1_score macro = 	 0.24718843166547022



wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


Epoch = 600
Accuracy = 	 0.5614035087719298
precision micro = 	 0.5614035087719298	precision macro = 	 0.26747905735063976
recall micro = 		 0.5614035087719298 	recall macro = 		 0.7893448159405606
f1_score micro = 	 0.5614035087719298	f1_score macro = 	 0.2640546592079023

Epoch = 700
Accuracy = 	 0.543859649122807
precision micro = 	 0.543859649122807	precision macro = 	 0.2659223313738641
recall micro = 		 0.543859649122807 	recall macro = 		 0.78883480649785
f1_score micro = 	 0.543859649122807	f1_score macro = 	 0.26412785419028123

Epoch = 800
Accuracy = 	 0.5263157894736842
precision micro = 	 0.5263157894736842	precision macro = 	 0.2531874252048237
recall micro = 		 0.5263157894736842 	recall macro = 		 0.7918367346938776
f1_score micro = 	 0.5263157894736842	f1_score macro = 	 0.25419553711462756



wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


Epoch = 900
Accuracy = 	 0.5146198830409356
precision micro = 	 0.5146198830409356	precision macro = 	 0.2485961520758538
recall micro = 		 0.5146198830409356 	recall macro = 		 0.7987196994467203
f1_score micro = 	 0.5146198830409356	f1_score macro = 	 0.2530609121518212



### 2.2: Model Training & Hyperparameter Tuning using W&B

In [10]:
import wandb
import numpy as np

# Initialize W&B
wandb.init(project="SMAI_A3_q2-MultiLayerPerceptronClassifier", entity="its_mrpsycho")

myDict = {
    "ReLU":1,
    "tanh":2,
    "sigmoid":3,
    "SGD":4,
    "BatchGD":5,
    "MiniBatchGD":6
}

data = [
    ["ReLU", 1],
    ["tanh", 2],
    ["sigmoid", 3],
    ["SGD", 4],
    ["BatchGD", 5],
    ["MiniBatchGD", 6]
]

# Define a function to log to W&B
def log_to_wandb(loss, accuracy, learning_rate, epochs, hLayers, nodesPerLayer, optimizer, activFunc):
    wandb.log({
        "loss": loss,
        "accuracy": accuracy,
        "Hidden Layers": hLayers,
        "nodesPerLayer": nodesPerLayer,
        "Optimization method": optimizer,
        "Activation Function": activFunc,
        "learning_rate": learning_rate,
        "epochs": epochs
    })

# Hyperparameter Search
learning_rates = [0.01, 0.1]
epochs = [1000]
hiddenLayers = [1,2,3]
nodesPerLayer = [3,4,5]
activationFuncs = ["ReLU","tanh","sigmoid"]
optimizers = ["SGD","BatchGD","MiniBatchGD"]

columns = ["Category", "Value"]
table = wandb.Table(data=data, columns=columns)
wandb.log({"custom_table": table})

for lr in learning_rates:
    for num_epochs in epochs:
        for nLayers in hiddenLayers:
            for nNodes in nodesPerLayer:
                for activFunc in activationFuncs:
                    for optimizer in optimizers:
                        # Create and train your model with the current hyperparameters
                        model = MLPClassifier2(n_features, n_classes, lr, n_hiddenLayers=nLayers,n_nodesPerLayer=nNodes,activateFunc=activFunc,epochs=num_epochs, optimizer=optimizer ,printOutput=False)
                        model.train(X_train, Y_train, X_val, Y_val)

                        # Evaluate on the validation set
                        val_loss, val_accuracy = model.evaluate_wandb(X_val, Y_val)

                        # Log the results to W&B
                        log_to_wandb(val_loss, val_accuracy, float(lr), num_epochs, nLayers, nNodes, myDict[optimizer], myDict[activFunc])
# Analyze the results in the W&B dashboard



0,1
Activation Function,▁▁▅▅██▁▁▅▅██▁▁▅▅█▁▁▅▅██▁▁▅▅██▁▁▅▅█▁▁▅▅██
Hidden Layers,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██████
Optimization method,▁▅▁▅▁▅▁█▁█▁█▅█▅█▅▁▅▁▅▁▅▁█▁█▁█▅█▅█▅▁▅▁▅▁▅
accuracy,▇▅▆▆▇▄▇██▇█▇▄▇▆▇▄▆██▆█▃▄▆▇▇▇▃▅▅▅█▃▁▇▆▄▃▃
epochs,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,▁█▂▇▂█▁▂▂▂▂▂▇▂▆▃█▅▃▂█▂▇▆▆▂▂▂▇▅▃▇▂▇▇▆▃█▇▇
nodesPerLayer,▁▁▁▁▁▁▅▅▅▅▅▅█████▁▁▁▁▁▁▅▅▅▅▅▅█████▁▁▁▁▁▁

0,1
Activation Function,3.0
Hidden Layers,3.0
Optimization method,6.0
accuracy,0.4152
epochs,1000.0
learning_rate,0.01
loss,1.22907
nodesPerLayer,3.0


KeyboardInterrupt: 

### 2.3: Evaluating the model

In [11]:
import numpy as np
from sklearn.metrics import classification_report

modelOptimal = MLPClassifier2(n_features, n_classes, learning_rate=0.1, n_hiddenLayers=3,n_nodesPerLayer=7,activateFunc="ReLU",epochs=1500,printOutput=False)

modelOptimal.train(X_train, Y_train, X_val, Y_val)
preds = modelOptimal.predict(X_test)
preds = np.argmax(preds, axis=1)
yOut = np.argmax(modelOptimal.oneHotLabels(Y_test), axis=1)
report = classification_report(yOut, preds,zero_division=1)
print(report)

              precision    recall  f1-score   support

           0       1.00      0.00      0.00         1
           1       1.00      0.00      0.00         3
           2       1.00      0.00      0.00        74
           3       1.00      0.00      0.00        76
           4       1.00      0.00      0.00        17
           5       0.01      1.00      0.01         1

    accuracy                           0.01       172
   macro avg       0.83      0.17      0.00       172
weighted avg       0.99      0.01      0.00       172

