<a href="https://colab.research.google.com/github/WhizpyH/Neural-Networks/blob/main/Assignment_2_Neural_Network_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [65]:
#####################################################################################################################
#   Assignment 2: Neural Network Analysis
#   This is a starter code in Python 3.6 for a neural network.
#   You need to have numpy and pandas installed before running this code.
#   You need to complete all TODO marked sections
#   You are free to modify this code in any way you want, but need to mention it
#       in the README file.
#
#####################################################################################################################


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [66]:
class NeuralNet:
    def __init__(self, X, y):
        self.raw_input = pd.concat([X, y], axis=1)




    # Pre-processing the dataset, which would include
    # standardization, normalization,
    # categorical to numerical, etc
    def preprocess(self):
        self.processed_data = self.raw_input

        # No null variables
        self.processed_data = self.processed_data.dropna()

        # Removing any redundant rows
        self.processed_data = self.processed_data.drop_duplicates()

        # Categorical to numerical
        for column in self.processed_data.columns:
            if self.processed_data[column].dtype == 'object':
                if column == self.raw_input.columns[-1]: # This is the target column
                    le = LabelEncoder()
                    self.processed_data[column] = le.fit_transform(self.processed_data[column])
                    self.processed_data[column] = to_categorical(self.processed_data[column])

                else:
                    self.processed_data[column] = pd.Categorical(self.processed_data[column]).codes


        # Standardization
        for column in self.processed_data.columns:
            if self.processed_data[column].dtype in [np.float64, np.int64]:
                mean = self.processed_data[column].mean()
                std = self.processed_data[column].std()
                if std != 0:
                  self.processed_data[column] = (self.processed_data[column] - mean) / std

        # Normalization
        for column in self.processed_data.columns:
            if self.processed_data[column].dtype in [np.float64, np.int64]:
                min_val = self.processed_data[column].min()
                max_val = self.processed_data[column].max()
                if max_val - min_val != 0:
                  self.processed_data[column] = (self.processed_data[column] - min_val) / (max_val - min_val)


        return 0

    # Train and evaluate models for all combinations of parameters
    # specified in the init method. Obtain following outputs:
    #   1. Training Accuracy and Error (Loss) for every model
    #   2. Test Accuracy and Error (Loss) for every model
    #   3. History Curve (Plot of Accuracy against training steps) for all
    #       the models in a single plot. The plot should be color coded i.e.
    #       different color for each model

    def train_evaluate(self):
        ncols = len(self.processed_data.columns)
        nrows = len(self.processed_data.index)
        X = self.processed_data.iloc[:, 0:(ncols - 3)] # Adjusting for one-hot encoded target
        y = self.processed_data.iloc[:, (ncols-3):] # Adjusting for one-hot encoded target
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2)

        # Below are the hyperparameters that you need to use for model evaluation
        # You can assume any fixed number of neurons for each hidden layer.

        activations = ['sigmoid', 'tanh', 'relu']
        learning_rate = [0.01, 0.1]
        max_iterations = [100, 200] # also known as epochs
        num_hidden_layers = [2, 3]

        # A loop to go through each activation
        for h in num_hidden_layers:
            for a in activations:
                # 2 Hidden layers
                if h == 2:
                    model = tf.keras.Sequential([
                        keras.Input(shape=(X_train.shape[1],)),
                        tf.keras.layers.Dense(3000, activation=a),
                        tf.keras.layers.Dense(1000, activation=a),
                        tf.keras.layers.Dense(3, activation='softmax') # Changed to 3 units and softmax for multi-class
                    ])
                # 3 Hidden layers
                else:
                    model = tf.keras.Sequential([
                        keras.Input(shape=(X_train.shape[1],)),
                        tf.keras.layers.Dense(3000, activation=a),
                        tf.keras.layers.Dense(1000, activation=a),
                        tf.keras.layers.Dense(300, activation=a),
                        tf.keras.layers.Dense(3, activation='softmax') # Changed to 3 units and softmax for multi-class
                    ])
                for lr in learning_rate:
                    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                      loss='categorical_crossentropy', # Changed loss for multi-class
                      metrics=['accuracy'])

                    for i in max_iterations:
                        history = model.fit(X_train, y_train, epochs=i)
                        test_loss, test_acc = model.evaluate(X_test, y_test)

                        # Plot the Training Accuracy and Error (Loss) for this model
                        plt.plot(history.history['accuracy'])
                        plt.title('Training Accuracy')
                        plt.ylabel('Accuracy')
                        plt.xlabel('Epoch')
                        plt.show()

                        # Test Accuracy and Error (Loss) for this model
                        test_loss, test_acc = model.evaluate(X_test, y_test)
                        print('Test accuracy:', test_acc)

                        # Test Accuracy and Error (Loss) for this model in a plot
                        plt.plot(history.history['loss'])
                        plt.title('Training Error')
                        plt.ylabel('Error')
                        plt.xlabel('Epoch')
                        plt.show()

                        # History Curve (Plot of Accuracy against training steps) for the model in a single plot
                        plt.plot(history.history['accuracy'])
                        plt.title('History Curve')
                        plt.ylabel('Accuracy')
                        plt.xlabel('Epoch')
                        plt.show()


        # Table of model hyperparameters, training and test accuracies, and training and testing errors
        table = pd.DataFrame(columns=['Activation', 'Learning Rate', 'Max Iterations', 'Num Hidden Layers', 'Training Accuracy', 'Training Error', 'Test Accuracy', 'Test Error'])
        print(table)

        return 0

In [67]:
pip install ucimlrepo



In [68]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
predict_students_dropout_and_academic_success = fetch_ucirepo(id=697)

# data (as pandas dataframes)
X = predict_students_dropout_and_academic_success.data.features
y = predict_students_dropout_and_academic_success.data.targets

# metadata
print(predict_students_dropout_and_academic_success.metadata)

# variable information
print(predict_students_dropout_and_academic_success.variables)


{'uci_id': 697, 'name': "Predict Students' Dropout and Academic Success", 'repository_url': 'https://archive.ics.uci.edu/dataset/697/predict+students+dropout+and+academic+success', 'data_url': 'https://archive.ics.uci.edu/static/public/697/data.csv', 'abstract': "A dataset created from a higher education institution (acquired from several disjoint databases) related to students enrolled in different undergraduate degrees, such as agronomy, design, education, nursing, journalism, management, social service, and technologies.\nThe dataset includes information known at the time of student enrollment (academic path, demographics, and social-economic factors) and the students' academic performance at the end of the first and second semesters. \nThe data is used to build classification models to predict students' dropout and academic sucess. The problem is formulated as a three category classification task, in which there is a strong imbalance towards one of the classes.", 'area': 'Social Sc

In [70]:
if __name__ == "__main__":
    neural_network = NeuralNet(X, y)
    neural_network.preprocess()
    neural_network.train_evaluate()

Epoch 1/100
[1m 83/111[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m1s[0m 43ms/step - accuracy: 0.3284 - loss: 5.3382

KeyboardInterrupt: 