In [12]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

In [13]:
# Load the combined feature and label arrays
X = np.concatenate([np.load('X_comcuc.npy'), np.load('X_cowpig1.npy'),
                     np.load('X_eucdov.npy'), np.load('X_eueowl1.npy'),
                     np.load('X_grswoo.npy'), np.load('X_tawowl1.npy')])

y = np.concatenate([np.load('comcuc_combined_labels.npy'), np.load('cowpig1_combined_labels.npy'),
                     np.load('eucdov_combined_labels.npy'), np.load('eueowl1_combined_labels.npy'),
                     np.load('grswoo_combined_labels.npy'), np.load('tawowl1_combined_labels.npy')])

# Combine the feature and label arrays into a single dataset
dataset = np.hstack((X, y.reshape(-1, 1)))

In [14]:
# Get the number of unique labels in the dataset
num_labels = len(np.unique(y))

# Initialize an empty list to store the stratified samples
stratified_samples = []

In [15]:
# Loop over each label
for label in range(num_labels):
    # Get the indices of instances with the current label
    indices = np.where(y == label)[0]
    
    # Get the number of instances with the current label
    num_instances = len(indices)
    
    # Split the instances into training and validation sets using stratified sampling
    train_indices, val_indices = train_test_split(indices, test_size=0.5, stratify=y[indices])
    
    # Add the training instances to the stratified sample list
    stratified_samples.append(train_indices)
    
# Concatenate the stratified samples into a single list of indices
stratified_indices = np.concatenate(stratified_samples)

# Use the stratified indices to select a subset of instances from the dataset
subset_dataset = dataset[stratified_indices, :]

In [16]:
print(subset_dataset.shape)

(59999, 255)


In [17]:
# Split dataset into training and testing sets
X = subset_dataset[:, :-1]  # Features
y = subset_dataset[:, -1]   # Labels

In [18]:
# define the classifier with default parameters
clf = MLPClassifier()

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [19]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.8973483179153818
Average F1 score: 0.8006921301430111
Average training accuracy: 0.999729161979069
Average training F1 score: 0.9994850675582849


In [20]:
# define the classifier with default parameters
clf = MLPClassifier()

# define the number of folds
n_folds = 10

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [21]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
1
1
1
1
1
Average accuracy: 0.9011816747235649
Average F1 score: 0.8084369876724053
Average training accuracy: 0.9997296250685025
Average training F1 score: 0.9996400954325411


In [23]:
# define the classifier with default parameters
clf = MLPClassifier(alpha=0.01)

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [24]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.8990150206961692
Average F1 score: 0.8042310550062298
Average training accuracy: 0.9996958277776621
Average training F1 score: 0.9996345553469828


In [25]:
# define the classifier with default parameters
clf = MLPClassifier(alpha=1)

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [26]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.911348540156124
Average F1 score: 0.8203034176255903
Average training accuracy: 0.9398448284512873
Average training F1 score: 0.8783286200648023


In [30]:
# define the classifier with default parameters
clf = MLPClassifier(alpha=10)

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [31]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.8684311637080867
Average F1 score: 0.6815909694653289
Average training accuracy: 0.8713603496079779
Average training F1 score: 0.6876611606859893


In [32]:
# define the classifier with default parameters
clf = MLPClassifier(hidden_layer_sizes=(100, 50))

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [33]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.9022816665277661
Average F1 score: 0.8121034954261823
Average training accuracy: 0.9998333307291125
Average training F1 score: 0.9998107879159586


In [34]:
# define the classifier with default parameters
clf = MLPClassifier(hidden_layer_sizes=(100, 100, 50))

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [35]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.8992315929105203
Average F1 score: 0.8049075540202008
Average training accuracy: 0.9970957901900734
Average training F1 score: 0.9948547121265833


In [36]:
# define the classifier with default parameters
clf = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100))

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [37]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.8995816429146872
Average F1 score: 0.8069268859950898
Average training accuracy: 0.9967666211796079
Average training F1 score: 0.9937670246610851


In [40]:
# define the classifier with default parameters
clf = MLPClassifier(hidden_layer_sizes=(100, 50), alpha=1, solver='adam', activation='relu', 
                    learning_rate='adaptive', max_iter=200)

# define the number of folds
n_folds = 5

# initialize lists to store the results
acc_scores = []
f1_scores = []
train_acc_scores = []
train_f1_scores = []

# initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [41]:
# loop over the folds
for train_idx, test_idx in kf.split(X):

    # split the data into training and testing sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    # fit the classifier to the training data
    clf.fit(X_train, y_train)

    # predict the labels of the testing data
    y_pred = clf.predict(X_test)

    # compute the accuracy and F1 score of the predictions
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    # predict the labels of the training data
    y_train_pred = clf.predict(X_train)

    # compute the accuracy and F1 score of the predictions on the training data
    train_acc_scores.append(accuracy_score(y_train, y_train_pred))
    train_f1_scores.append(f1_score(y_train, y_train_pred, average='macro'))
    print(1)
# print the average accuracy and F1 score across the folds
print("Average accuracy:", sum(acc_scores)/n_folds)
print("Average F1 score:", sum(f1_scores)/n_folds)

# print the average training accuracy and F1 score across the folds
print("Average training accuracy:", sum(train_acc_scores)/n_folds)
print("Average training F1 score:", sum(train_f1_scores)/n_folds)

1
1
1
1
1
Average accuracy: 0.9104818554323971
Average F1 score: 0.823201031677069
Average training accuracy: 0.9574992892213032
Average training F1 score: 0.9124209346160888
