In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import make_classification

In [2]:
# Read data
train_data = pd.read_excel('DataSets/THA2train.xlsx')
validate_data = pd.read_excel('DataSets/THA2validate.xlsx')

NameError: name 'pd' is not defined

In [None]:
# data separation features and labels
X_train = train_data.iloc[:, :-1].values
y_train = pd.get_dummies(train_data.iloc[:, -1]).values

In [None]:
# data separation features and labels
X_validate = validate_data.iloc[:, :-1].values
y_validate = pd.get_dummies(validate_data.iloc[:, -1]).values

In [None]:
#this function is for sigmoid by handeling overflow
def sigmoid(z):
    return np.where(z >= 0, 
                    1 / (1 + np.exp(-z)), 
                    np.exp(z) / (1 + np.exp(z)))

In [None]:
#derivative of sigmoid
def sigmoid_derivative(a):
    return a * (1 - a)

In [None]:
#softmax function
def softmax(z):
    e_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return e_z / np.sum(e_z, axis=1, keepdims=True)

In [None]:
#the MLP class
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2 / (input_size + hidden_size))
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2 / (hidden_size + output_size))
        self.b2 = np.zeros((1, output_size))

        

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = softmax(self.z2)
        return self.a2

    def backward(self, X, y):
        output_error = self.a2 - y
        dW2 = np.dot(self.a1.T, output_error)
        db2 = np.sum(output_error, axis=0, keepdims=True)
        
        hidden_error = np.dot(output_error, self.W2.T) * sigmoid_derivative(self.a1)
        dW1 = np.dot(X.T, hidden_error)
        db1 = np.sum(hidden_error, axis=0, keepdims=True)
        
        return dW1, db1, dW2, db2

    def update_weights(self, dW1, db1, dW2, db2, learning_rate):
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def compute_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        loss = -np.sum(y_true * np.log(y_pred + 1e-8)) / m  
        return loss



In [None]:
# Normalize the data
X_train_norm = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_validate_norm = (X_validate - np.mean(X_validate, axis=0)) / np.std(X_validate, axis=0)


In [None]:
# Training hyperparameters
epochs = 2000
learning_rate = 0.01
batch_size = 32

# Lists to store loss for plotting
training_losses = []
validation_losses = []

In [None]:
param_dist = {
    'hidden_size': [50, 100, 150],
    'learning_rate': [0.001, 0.01, 0.1],
}

mlp = MLP(input_size=X_train_norm.shape[1], hidden_size=10, output_size=y_train.shape[1])
random_search = RandomizedSearchCV.fit(mlp, param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', random_state=42, n_jobs=-1)

# Fit RandomizedSearchCV to your training data
random_search.fit(X_train, y_train)

# Get the best model
best_mlp = random_search.best_estimator_
random_search.fit(X_train_norm, y_train)

# Get the best and worst models based on validation accuracy
# Get the best and worst models based on validation accuracy
best_index = np.argmax(random_search.cv_results_['mean_test_score'])
worst_index = np.argmin(random_search.cv_results_['mean_test_score'])

best_params = random_search.cv_results_['params'][best_index]
worst_params = random_search.cv_results_['params'][worst_index]

best_model = MLP(input_size=X_train_norm.shape[1], output_size=y_train.shape[1], **best_params)
worst_model = MLP(input_size=X_train_norm.shape[1], output_size=y_train.shape[1], **worst_params)

# Train the best and worst models again to obtain training and validation losses
train_losses_best, validation_losses_best = [], []
train_losses_worst, validation_losses_worst = [], []

for epoch in range(epochs):
    indices_best = np.arange(X_train_norm.shape[0])
    np.random.shuffle(indices_best)
    X_train_norm_shuffled_best = X_train_norm[indices_best]
    y_train_shuffled_best = y_train[indices_best]

    indices_worst = np.arange(X_train_norm.shape[0])
    np.random.shuffle(indices_worst)
    X_train_norm_shuffled_worst = X_train_norm[indices_worst]
    y_train_shuffled_worst = y_train[indices_worst]

    for start_idx in range(0, X_train_norm.shape[0], batch_size):
        end_idx = min(start_idx + batch_size, X_train_norm.shape[0])

        # Training for the best model
        X_batch_best = X_train_norm_shuffled_best[start_idx:end_idx]
        y_batch_best = y_train_shuffled_best[start_idx:end_idx]
        predictions_best = best_model.forward(X_batch_best)
        dW1_best, db1_best, dW2_best, db2_best = best_model.backward(X_batch_best, y_batch_best)
        best_model.update_weights(dW1_best, db1_best, dW2_best, db2_best, learning_rate)

        # Training for the worst model
        X_batch_worst = X_train_norm_shuffled_worst[start_idx:end_idx]
        y_batch_worst = y_train_shuffled_worst[start_idx:end_idx]
        predictions_worst = worst_model.forward(X_batch_worst)
        dW1_worst, db1_worst, dW2_worst, db2_worst = worst_model.backward(X_batch_worst, y_batch_worst)
        worst_model.update_weights(dW1_worst, db1_worst, dW2_worst, db2_worst, learning_rate)

    # Compute losses for the best model
    train_loss_best = best_model.compute_loss(y_train, best_model.forward(X_train_norm))
    validation_loss_best = best_model.compute_loss(y_validate, best_model.forward(X_validate_norm))
    train_losses_best.append(train_loss_best)
    validation_losses_best.append(validation_loss_best)

    # Compute losses for the worst model
    train_loss_worst = worst_model.compute_loss(y_train, worst_model.forward(X_train_norm))
    validation_loss_worst = worst_model.compute_loss(y_validate, worst_model.forward(X_validate_norm))
    train_losses_worst.append(train_loss_worst)
    validation_losses_worst.append(validation_loss_worst)

    if epoch % 100 == 0:
        print(
            f"Epoch {epoch}, Best Model: Training Loss: {train_loss_best:.4f}, Validation Loss: {validation_loss_best:.4f}, "
            f"Worst Model: Training Loss: {train_loss_worst:.4f}, Validation Loss: {validation_loss_worst:.4f}"
        )
mlp = MLPClassifier(max_iter=100)

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(mlp, param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', random_state=42, n_jobs=-1)

# Fit RandomizedSearchCV to your training data
random_search.fit(X_train, y_train)

# Get the best model
best_mlp = random_search.best_estimator_

# Predict on validation set and calculate accuracy
y_pred = best_mlp.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy of Best Model: {accuracy:.4f}")
# Plot training and validation losses for the best and worst models
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(train_losses_best, label='Best Model - Training')
plt.plot(validation_losses_best, label='Best Model - Validation')
plt.title('Best Model - Training and Validation Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_losses_worst, label='Worst Model - Training')
plt.plot(validation_losses_worst, label='Worst Model - Validation')
plt.title('Worst Model - Training and Validation Losses')
plt.ylabel('Loss')
plt.legend()



AttributeError: 'MLP' object has no attribute '_validate_params'