In [49]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [50]:
column_names = [
    "erythema", "scaling", "definite borders", "itching", "koebner phenomenon",
    "polygonal papules", "follicular papules", "oral mucosal involvement", "knee and elbow involvement",
    "scalp involvement", "family history", "melanin incontinence", "eosinophils in the infiltrate",
    "PNL infiltrate", "fibrosis of the papillary dermis", "exocytosis", "acanthosis",
    "hyperkeratosis", "parakeratosis", "clubbing of the rete ridges", "elongation of the rete ridges",
    "thinning of the suprapapillary epidermis", "spongiform pustule", "munro microabcess",
    "focal hypergranulosis", "disappearance of the granular layer", "vacuolisation and damage of basal layer",
    "spongiosis", "saw-tooth appearance of retes", "follicular horn plug", "perifollicular parakeratosis",
    "inflammatory monoluclear inflitrate", "band-like infiltrate", "Age", "class"
]

dermatology_data = pd.read_csv('dermatology/dermatology.data', header=None, names=column_names)

In [51]:
dermatology_data.replace("?", pd.NA, inplace=True)
dermatology_data['Age'] = pd.to_numeric(dermatology_data['Age'], errors='coerce')

# Fill the missing values in 'Age' with the median age
age_median = dermatology_data['Age'].median()
dermatology_data['Age'].fillna(age_median, inplace=True)

dermatology_data.drop('perifollicular parakeratosis', axis=1, inplace=True)

In [52]:
dermatology_data_scaled = dermatology_data.copy()
for column in dermatology_data.columns[:-1]:  # Exclude the class label for scaling
    col_min = dermatology_data[column].min()
    col_max = dermatology_data[column].max()
    dermatology_data_scaled[column] = (dermatology_data[column] - col_min) / (col_max - col_min)

X = dermatology_data_scaled.drop('class', axis=1).values
y = dermatology_data_scaled['class'].values

encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Checking the shape of the training and test data
X_train.shape, X_test.shape, y_train.shape, y_test.shape



((292, 33), (74, 33), (292, 6), (74, 6))

In [53]:
# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)

# Softmax activation function for the output layer
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / e_x.sum(axis=1, keepdims=True)

# Function to compute the loss using cross-entropy
def cross_entropy_loss(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred))

In [54]:
# ANN class
class TwoLayerANN:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.weights1 = np.random.randn(input_size, hidden_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size)
        self.bias2 = np.zeros((1, output_size))

    def feedforward(self, X):
        # Forward pass
        self.layer1 = sigmoid(np.dot(X, self.weights1) + self.bias1)
        self.output = softmax(np.dot(self.layer1, self.weights2) + self.bias2)
        return self.output

    def backpropagation(self, X, y, learning_rate):
        # Backward pass
        output_error = self.output - y
        output_delta = output_error

        layer1_error = output_delta.dot(self.weights2.T)
        layer1_delta = layer1_error * sigmoid_derivative(self.layer1)

        # Update weights and biases
        self.weights2 -= self.layer1.T.dot(output_delta) * learning_rate
        self.bias2 -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate
        self.weights1 -= X.T.dot(layer1_delta) * learning_rate
        self.bias1 -= np.sum(layer1_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            self.feedforward(X)
            self.backpropagation(X, y, learning_rate)
            if epoch % 100 == 0:
                loss = cross_entropy_loss(y, self.output)
                print("Epoch:", epoch, "Loss:", loss)

In [55]:
# ANN parameters
input_size = X_train.shape[1]
hidden_size = 10  # Number of nodes in the hidden layer
output_size = y_train.shape[1]

# Creating the ANN
ann = TwoLayerANN(input_size, hidden_size, output_size)

# Training the ANN
ann.train(X_train, y_train, epochs=1000, learning_rate=0.01)

Epoch: 0 Loss: 975.9841230520677
Epoch: 100 Loss: 22.936869873981312
Epoch: 200 Loss: 10.621595110967936
Epoch: 300 Loss: 6.789306972459894
Epoch: 400 Loss: 4.919639174035273
Epoch: 500 Loss: 3.8086832462635196
Epoch: 600 Loss: 3.073378751324366
Epoch: 700 Loss: 2.5533008519717955
Epoch: 800 Loss: 2.1683685198343134
Epoch: 900 Loss: 1.8737067187691712


In [56]:
# Function to predict classes using the trained ANN
def predict(X, model):
    output = model.feedforward(X)
    predictions = np.argmax(output, axis=1)
    return predictions

# Predicting classes on the test set
y_pred = predict(X_test, ann)

# Converting one-hot encoded test labels back to class labels for comparison
y_test_labels = np.argmax(y_test, axis=1)

# Calculating the accuracy
accuracy = np.mean(y_pred == y_test_labels)
accuracy

0.9864864864864865