## Import libraries and load data


In [153]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.base import clone

from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt



from sklearn.metrics import accuracy_score, mean_squared_error
seed = 1462474
np.random.seed(seed)

In [154]:
data_path = "data/train/Features/all_features.csv"
train_df = pd.read_csv(data_path) 


In [155]:
# Split the data into features and output
y = train_df["ClassId"]
X = train_df.drop(columns=["ClassId", "image_path", "id"])

## Feature Selection

In [156]:
# Since our data is mostly continuous, we will use mutual information to select features

# CHANGE NN while testing
mi_array = mutual_info_classif(X, y, random_state=0, n_neighbors= 7)

# Convert to series
mi_series = pd.Series(mi_array, index=X.columns)

# Sort features by importance
mi_sorted = mi_series.sort_values(ascending=False)

# Show top features
print(mi_sorted.head(10))  

# Select features with values over 0.15
top_features = mi_sorted.index[mi_sorted > 0.1]
X_selected = X[top_features]



hog_pca_0          0.882652
hog_pca_3          0.796677
Edge_Hist_Bin_6    0.596259
Edge_Hist_Bin_2    0.566049
hog_pca_1          0.554871
Edge_Hist_Bin_7    0.530975
Edge_Hist_Bin_3    0.519416
hog_pca_2          0.493234
Edge_Hist_Bin_5    0.429996
H_hist_bin_16      0.402375
dtype: float64


In [157]:
X_selected.head()

Unnamed: 0,hog_pca_0,hog_pca_3,Edge_Hist_Bin_6,Edge_Hist_Bin_2,hog_pca_1,Edge_Hist_Bin_7,Edge_Hist_Bin_3,hog_pca_2,Edge_Hist_Bin_5,H_hist_bin_16,...,hog_pca_16,ch_9,hog_pca_14,ch_34,ch_40,ch_30,ch_2,hog_pca_15,ch_3,ch_33
0,-0.763458,-0.638673,0.038306,0.199623,0.92788,0.060268,0.256466,0.264329,0.111521,0.022676,...,-0.213796,0.129677,-0.97962,0.335851,0.095503,0.0,0.268901,0.450825,0.355617,0.002388
1,1.049284,0.90438,0.047018,0.185522,3.6082,0.233097,0.071324,-1.81719,0.112443,0.18335,...,-0.886182,0.018133,0.101737,0.070214,0.024119,0.0,0.657901,-0.335682,0.641467,0.0
2,-1.55244,0.671877,0.125505,0.058121,-0.432374,0.08851,0.089124,-0.318422,0.146615,0.016829,...,-0.421448,0.273625,0.377116,0.269911,0.271127,0.052706,0.385766,0.501386,0.091956,0.323407
3,-1.556871,0.613876,0.113744,0.122933,0.214406,0.117298,0.081,0.973758,0.129953,0.189296,...,-0.481589,0.03695,-0.5956,0.801271,0.056455,0.0,0.947646,0.285638,0.21735,0.509585
4,-0.944294,-0.607014,0.10688,0.130112,-0.334833,0.128805,0.126042,0.415215,0.101269,0.0,...,0.062209,0.565515,0.601939,0.0,0.110113,0.026269,0.0,-0.520063,0.0,0.0


In [158]:
# See if we have any NaN values
print(X_selected.isnull().sum().sum())  

0


In [159]:

# Remove constant features, and highly correlated features
selector = VarianceThreshold(threshold=0)
X_var = selector.fit_transform(X_selected)
selected_columns = X_selected.columns[selector.get_support()]

# Remove highly correlated features
X_var_df = pd.DataFrame(X_var, columns=selected_columns)
corr_matrix = X_var_df.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
X_final = X_var_df.drop(columns=to_drop)


In [160]:
# Get X final column names for test data
X_final_columns = X_final.columns
X_final_columns

Index(['hog_pca_0', 'hog_pca_3', 'Edge_Hist_Bin_6', 'Edge_Hist_Bin_2',
       'hog_pca_1', 'Edge_Hist_Bin_7', 'Edge_Hist_Bin_3', 'hog_pca_2',
       'Edge_Hist_Bin_5', 'H_hist_bin_16',
       ...
       'hog_pca_16', 'ch_9', 'hog_pca_14', 'ch_34', 'ch_40', 'ch_30', 'ch_2',
       'hog_pca_15', 'ch_3', 'ch_33'],
      dtype='object', length=156)

## Setup Stacking Pipeline

In [161]:
# Setup K folds for oof predictions
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
# Generate and store splits
splits = list(skf.split(X_final, y))
num_classes = len(np.unique(y))

#### 1. SVM

In [162]:
# Calculates accuracy, bias, variance, MSE, and sparse categorical cross-entropy loss
from sklearn.metrics import log_loss

def sparse_categorical_crossentropy(y_true, y_proba):
    # y_true: array-like of shape (n_samples,)
    # y_proba: array-like of shape (n_samples, n_classes)
    return log_loss(y_true, y_proba, labels=np.arange(y_proba.shape[1]))

def calculate_metrics(y_true, y_pred, y_proba):
    y_true_onehot = np.eye(num_classes)[y_true]
    accuracy = np.mean(y_pred == y_true)
    bias2 = np.mean((y_proba - y_true_onehot) ** 2)
    variance = np.mean(np.var(y_proba, axis=1))
    mse = bias2 + variance
    cross_entropy = sparse_categorical_crossentropy(y_true, y_proba)
    return accuracy, np.sqrt(bias2), variance, cross_entropy

In [163]:
# Create pipeline for each model

"""
param_grid_svm = {
    'svc__C': [1, 5, 10, 100],
    'svc__gamma': ['scale'],    
}


# Grid search for SVM pipeline
grid_search_svm = GridSearchCV(
    svm_pipeline,
    param_grid_svm,
    cv= splits,
    scoring='accuracy',
    verbose=4
)

grid_search_svm.fit(X_final, y)
"""

# SVM pipeline
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', C = 5, gamma ='scale', probability=True, random_state=seed))
])



svm_oof_probs = np.zeros((X_final.shape[0], num_classes))
svm_accuracies, svm_biases, svm_variances, svm_crossentropies = [], [], [], []

# Fit the model and get out-of-fold predictions
# Ensures we are not using the same data for training and validation
for train_idx, val_idx in splits:
    svm_model = clone(svm_pipeline)
    svm_model.fit(X_final.iloc[train_idx], y.iloc[train_idx])
    y_pred = svm_model.predict(X_final.iloc[val_idx])
    y_pred_probs = svm_model.predict_proba(X_final.iloc[val_idx])
    svm_oof_probs[val_idx] = y_pred_probs

    # Calculate performance metrics
    acc, bias, var, ce = calculate_metrics(y.iloc[val_idx], y_pred, y_pred_probs)
    svm_accuracies.append(acc)
    svm_biases.append(bias)
    svm_variances.append(var)
    svm_crossentropies.append(ce)

print(f"SVM Accuracies: {svm_accuracies}")
print(f"Mean Accuracy: {np.mean(svm_accuracies):.4f}")
print(f"SVM Biases: {svm_biases}")
print(f"Mean Bias: {np.mean(svm_biases):.4f}")
print(f"SVM Variances: {svm_variances}")
print(f"Mean Variance: {np.mean(svm_variances):.4f}")
print(f"SVM Cross-Entropy Losses: {svm_crossentropies}")
print(f"Mean Cross-Entropy Loss: {np.mean(svm_crossentropies):.4f}")


SVM Accuracies: [0.8633879781420765, 0.8806921675774135, 0.8788706739526412, 0.8650865998176845, 0.8678213309024613]
Mean Accuracy: 0.8712
SVM Biases: [0.07099380977963383, 0.0694180229265965, 0.06810788418274377, 0.07018323816080485, 0.07005293562073735]
Mean Bias: 0.0698
SVM Variances: [0.014204018616744666, 0.013865727754214194, 0.013927436097491754, 0.01410532617506165, 0.013910578142156838]
Mean Variance: 0.0140
SVM Cross-Entropy Losses: [0.5618294322974211, 0.5518763085912034, 0.5185287938446979, 0.5385448755310718, 0.5490627699989261]
Mean Cross-Entropy Loss: 0.5440


Code for svm graph

In [164]:
"""
results = grid_search_svm.cv_results_

# Plot mean test accuracy for each C value
C_values = results['param_svc__C'].data
mean_scores = results['mean_test_score']

plt.figure(figsize=(8, 5))
plt.plot(C_values, mean_scores, marker='o')
plt.xlabel('SVM: C value')
plt.ylabel('Mean CV Accuracy')
plt.title('SVM Grid Search: Accuracy vs C')
plt.xscale('log')
plt.grid(True)
plt.show()



print("Best SVM parameters:", grid_search_svm.best_params_)
print("Best SVM accuracy:", grid_search_svm.best_score_)

# Use the best SVM pipeline for stacking
svm_pipeline = grid_search_svm.best_estimator_"""



'\nresults = grid_search_svm.cv_results_\n\n# Plot mean test accuracy for each C value\nC_values = results[\'param_svc__C\'].data\nmean_scores = results[\'mean_test_score\']\n\nplt.figure(figsize=(8, 5))\nplt.plot(C_values, mean_scores, marker=\'o\')\nplt.xlabel(\'SVM: C value\')\nplt.ylabel(\'Mean CV Accuracy\')\nplt.title(\'SVM Grid Search: Accuracy vs C\')\nplt.xscale(\'log\')\nplt.grid(True)\nplt.show()\n\n\n\nprint("Best SVM parameters:", grid_search_svm.best_params_)\nprint("Best SVM accuracy:", grid_search_svm.best_score_)\n\n# Use the best SVM pipeline for stacking\nsvm_pipeline = grid_search_svm.best_estimator_'

#### 2. RF

In [165]:
# Commented out grid search block
# param_grid = {
#     'rf__n_estimators': [100, 200,300],
#     'rf__max_depth': [None, 10, 20],
#     'rf__min_samples_split': [2, 5, 10],
#     'rf__min_samples_leaf': [1, 2, 4],
#     'rf__max_features': ['sqrt', 'log2'],
# }

# grid_search = GridSearchCV(rf_pipeline, param_grid, cv=splits, scoring='accuracy', verbose=4)
# grid_search.fit(X_final, y)
# print("Best RF params:", grid_search.best_params_)
# print("Best RF accuracy:", grid_search.best_score_)
# rf_pipeline = grid_search.best_estimator_


# Uses best stacking pipeline
rf_pipeline = Pipeline([
    ('rf', RandomForestClassifier(
        n_estimators=400,
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        max_features='sqrt',
        random_state=seed
    ))
])


# Generate out of fold predicted probabilities for rf
rf_oof_probs = np.zeros((X_final.shape[0], num_classes))
rf_accuracies, rf_biases, rf_variances, rf_crossentropies = [], [], [], []

for train_idx, val_idx in splits:
    rf_model = clone(rf_pipeline)
    rf_model.fit(X_final.iloc[train_idx], y.iloc[train_idx])
    y_pred = rf_model.predict(X_final.iloc[val_idx])
    y_pred_proba = rf_model.predict_proba(X_final.iloc[val_idx])
    rf_oof_probs[val_idx] = y_pred_proba

    # Calculate metrics
    acc, bias, var, ce = calculate_metrics(y.iloc[val_idx], y_pred, y_pred_proba)
    rf_accuracies.append(acc)
    rf_biases.append(bias)
    rf_variances.append(var)
    rf_crossentropies.append(ce)

print(f"RF Accuracies: {rf_accuracies}")
print(f"Mean Accuracy: {np.mean(rf_accuracies):.4f}")
print(f"RF Biases: {rf_biases}")
print(f"Mean Bias: {np.mean(rf_biases):.4f}")
print(f"RF Variances: {rf_variances}")
print(f"Mean Variance: {np.mean(rf_variances):.4f}")
print(f"RF Cross-Entropy Losses: {rf_crossentropies}")
print(f"Mean Cross-Entropy Loss: {np.mean(rf_crossentropies):.4f}")



RF Accuracies: [0.8306010928961749, 0.8387978142076503, 0.8324225865209471, 0.8295350957155879, 0.8176845943482224]
Mean Accuracy: 0.8298
RF Biases: [0.10118599117473713, 0.1018328584425691, 0.10166850385520275, 0.10250704793373537, 0.10366542259934101]
Mean Bias: 0.1022
RF Variances: [0.0059781125043222305, 0.0058335671893732744, 0.005859107548165158, 0.005731420726569784, 0.005720939975684705]
Mean Variance: 0.0058
RF Cross-Entropy Losses: [1.0684956938660009, 1.0852161803179308, 1.0724528506669464, 1.0915140080127173, 1.1137000604081098]
Mean Cross-Entropy Loss: 1.0863


### CNN

We will train this separately and use its oof probabilities as features for our metamodel

In [166]:
X_nn = np.load("X_nn.npy")


In [167]:

print(f"X_nn shape: {X_nn.shape}")
print(f"y shape: {y.shape}")
print(f"y dtype: {y.dtype}")
print(f"X_nn dtype: {X_nn.dtype}")

# Check unique labels
print(np.unique(y))


# Convert to np.int64 and np.float32 and normalise X
X_nn = X_nn.astype(np.float32) / 255
y_nn = y.astype(np.int64)
X_nn = X_nn.astype(np.float32)


X_nn shape: (5488, 64, 64, 3)
y shape: (5488,)
y dtype: int64
X_nn dtype: uint8
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42]


In [168]:
"""from cnn import build_cnn
param_grid = {
    "num_filters": [32],
    "kernel_size": [5]
}
fixed_params = {
    "dropout_rate": 0.2,
    "optimizer": "adam",
    "epochs": 10
}

best_score = 0
best_params = None

for num_filters in param_grid["num_filters"]:
    for kernel_size in param_grid["kernel_size"]:
        cv_scores = []
        for train_idx, val_idx in splits:
            X_train, X_val = X_nn[train_idx], X_nn[val_idx]
            y_train = y_nn.iloc[train_idx] if hasattr(y_nn, "iloc") else y_nn[train_idx]
            y_val = y_nn.iloc[val_idx] if hasattr(y_nn, "iloc") else y_nn[val_idx]
            model = build_cnn(
                dropout_rate=fixed_params["dropout_rate"],
                num_filters=num_filters,
                kernel_size=kernel_size,
                optimizer=fixed_params["optimizer"]
            )
            model.fit(X_train, y_train, epochs=fixed_params["epochs"], batch_size=32, verbose=0)
            val_probs = model.predict(X_val)
            val_preds = val_probs.argmax(axis=1)
            acc = np.mean(val_preds == y_val)
            cv_scores.append(acc)
        mean_score = np.mean(cv_scores)
        print(f"Params: nf={num_filters}, ks={kernel_size} | CV Acc: {mean_score:.4f}")
        if mean_score > best_score:
            best_score = mean_score
            best_params = {
                "num_filters": num_filters,
                "kernel_size": kernel_size,
                **fixed_params
            }

print("Best CNN params:", best_params)
print("Best CNN CV accuracy:", best_score)

"""

'from cnn import build_cnn\nparam_grid = {\n    "num_filters": [32],\n    "kernel_size": [5]\n}\nfixed_params = {\n    "dropout_rate": 0.2,\n    "optimizer": "adam",\n    "epochs": 10\n}\n\nbest_score = 0\nbest_params = None\n\nfor num_filters in param_grid["num_filters"]:\n    for kernel_size in param_grid["kernel_size"]:\n        cv_scores = []\n        for train_idx, val_idx in splits:\n            X_train, X_val = X_nn[train_idx], X_nn[val_idx]\n            y_train = y_nn.iloc[train_idx] if hasattr(y_nn, "iloc") else y_nn[train_idx]\n            y_val = y_nn.iloc[val_idx] if hasattr(y_nn, "iloc") else y_nn[val_idx]\n            model = build_cnn(\n                dropout_rate=fixed_params["dropout_rate"],\n                num_filters=num_filters,\n                kernel_size=kernel_size,\n                optimizer=fixed_params["optimizer"]\n            )\n            model.fit(X_train, y_train, epochs=fixed_params["epochs"], batch_size=32, verbose=0)\n            val_probs = model.

In [169]:
from cnn import build_cnn

cnn_oof_probs = np.zeros((X_nn.shape[0], num_classes))
cnn_accuracies, cnn_biases, cnn_variances, cnn_crossentropies = [], [], [], []

for train_idx, val_idx in splits:
    X_train, X_val = X_nn[train_idx], X_nn[val_idx]
    y_train, y_val = y_nn[train_idx], y_nn[val_idx]

    # Build and train a new CNN for each fold
    cnn_model = build_cnn(
                dropout_rate=0.2,
                num_filters=32,
                kernel_size=5
            )
    # Fit the model
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_data=(X_val, y_nn[val_idx]))

    # Predict on the validation fold
    y_pred_probs = cnn_model.predict(X_val)
    # Get the predicted classes
    y_pred = y_pred_probs.argmax(axis=1)
    # Store the out-of-fold predictions
    cnn_oof_probs[val_idx] = y_pred_probs

    # Calculate metrics
    acc, bias, var, ce = calculate_metrics(y_val, y_pred, y_pred_probs)
    cnn_accuracies.append(acc)
    cnn_biases.append(bias)
    cnn_variances.append(var)
    cnn_crossentropies.append(ce)

print(f"CNN Accuracies: {cnn_accuracies}")
print(f"Mean Accuracy: {np.mean(cnn_accuracies):.4f}")
print(f"CNN Biases: {cnn_biases}")
print(f"Mean Bias: {np.mean(cnn_biases):.4f}")
print(f"CNN Variances: {cnn_variances}")
print(f"Mean Variance: {np.mean(cnn_variances):.4f}")
print(f"CNN Cross-Entropy Losses: {cnn_crossentropies}")
print(f"Mean Cross-Entropy Loss: {np.mean(cnn_crossentropies):.4f}")



Epoch 1/10
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.1180 - loss: 3.4501 - val_accuracy: 0.5109 - val_loss: 1.7852
Epoch 2/10
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.5612 - loss: 1.5328 - val_accuracy: 0.8179 - val_loss: 0.6463
Epoch 3/10
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.7998 - loss: 0.6636 - val_accuracy: 0.9016 - val_loss: 0.3782
Epoch 4/10
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.8889 - loss: 0.3601 - val_accuracy: 0.9208 - val_loss: 0.2946
Epoch 5/10
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - accuracy: 0.9185 - loss: 0.2469 - val_accuracy: 0.9454 - val_loss: 0.2381
Epoch 6/10
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - accuracy: 0.9557 - loss: 0.1490 - val_accuracy: 0.9426 - val_loss: 0.2531
Epoch 7/10
[1m138/138

In [170]:
# Now concatenate with tabular features
X_stacking = np.concatenate([svm_oof_probs, rf_oof_probs, cnn_oof_probs], axis=1)

## Setup & Train Stacking Classifier

In [171]:
# Stratified K-Folds cross-validator, different from the one used for base models
# so we can train on unbiased predictions
stacking_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed + 7)


In [172]:
print(f"X_stacking shape: {X_stacking.shape}")

X_stacking shape: (5488, 129)


In [173]:
validation_scores = []
stacking_biases =[]
stacking_variances = []
stacking_accuracies = []
stacking_crossentropies = []

for train_idx, val_idx in stacking_cv.split(X_stacking, y):
    # Split the stacking data
    X_train, X_val = X_stacking[train_idx], X_stacking[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Train the meta-classifier on the training fold
    meta_classifier = LogisticRegression(max_iter=1000, random_state=seed, C=10, penalty="l2")
    meta_classifier.fit(X_train, y_train)

    # Validate on the validation fold
    y_pred = meta_classifier.predict(X_val)
    y_pred_proba = meta_classifier.predict_proba(X_val)

    # Calculate metrics
    acc, bias, var, ce = calculate_metrics(y_val, y_pred, y_pred_proba)
    stacking_accuracies.append(acc)
    stacking_biases.append(bias)
    stacking_variances.append(var)
    stacking_crossentropies.append(ce)

print(f"Validation Accuracies: {stacking_accuracies}")
print(f"Mean Accuracy: {np.mean(stacking_accuracies):.4f}")
print(f"Biases: {stacking_biases}")
print(f"Mean Bias: {np.mean(stacking_biases):.4f}")
print(f"Variances: {stacking_variances}")
print(f"Mean Variance: {np.mean(stacking_variances):.4f}")
print(f"Stacking Cross-Entropy Losses: {stacking_crossentropies}")
print(f"Mean Cross-Entropy Loss: {np.mean(stacking_crossentropies):.4f}")


Validation Accuracies: [0.97632058287796, 0.970856102003643, 0.9754098360655737, 0.9708295350957156, 0.97538742023701]
Mean Accuracy: 0.9738
Biases: [0.029321283887853482, 0.03141951399793983, 0.02977982676325855, 0.030899207133407353, 0.030568753886076872]
Mean Bias: 0.0304
Variances: [0.021622438760832263, 0.0216650178474359, 0.021497890391464283, 0.021536767241778578, 0.021615689147973263]
Mean Variance: 0.0216
Stacking Cross-Entropy Losses: [0.09728800327747238, 0.11048873977705401, 0.09588311260398208, 0.10429084308763643, 0.10538783508076081]
Mean Cross-Entropy Loss: 0.1027


### Final Models for Testing

In [174]:
# Retrain base models on the full training data
svm_pipeline.fit(X_final, y)
rf_pipeline.fit(X_final, y)
cnn_model = build_cnn(
    dropout_rate=0.2,
    num_filters=32,
    kernel_size=5
)
cnn_model.fit(X_nn, y_nn, epochs=10, batch_size=32, verbose=1)

# Retrain the meta classifier on the full OOF predictions
final_classifier = LogisticRegression(max_iter=1000,
                                      random_state=seed, C=10, penalty="l2")
final_classifier.fit(X_stacking, y)

Epoch 1/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.1745 - loss: 3.2057
Epoch 2/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.6349 - loss: 1.2763
Epoch 3/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - accuracy: 0.8359 - loss: 0.5391
Epoch 4/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - accuracy: 0.9108 - loss: 0.3101
Epoch 5/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - accuracy: 0.9452 - loss: 0.1750
Epoch 6/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - accuracy: 0.9559 - loss: 0.1399
Epoch 7/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - accuracy: 0.9694 - loss: 0.0986
Epoch 8/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - accuracy: 0.9800 - loss: 0.0656
Epoch 9/10
[1m172/172[0m [32m

### Read in Test Data

In [175]:
test_path = "data/test/Features/all_features.csv"
test_df = pd.read_csv(test_path)

In [176]:
y_test = test_df["ClassId"]
X_test = test_df.drop(columns=["ClassId", "image_path"])

# Check for NaN values
print(X_test.isnull().sum().sum())

# Get id's
test_ids = test_df["id"]

# Get the same features as the training data
X_test = X_test[X_final_columns]

X_test_nn = np.load("X_nn_test.npy")

# Convert to np.int64 and np.float32 and normalise
X_test_nn = X_test_nn.astype(np.float32) / 255.0


0


In [177]:
# Generate test predictions from base models
svm_test_probs = svm_pipeline.predict_proba(X_test)
rf_test_probs = rf_pipeline.predict_proba(X_test)
cnn_test_probs = cnn_model.predict(X_test_nn)


[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step


In [178]:
# Combine test predictions into a single feature set
X_test_stacking = np.concatenate([svm_test_probs, rf_test_probs, cnn_test_probs], axis=1)

# Use the metamodel to predict final test outputs
test_predictions = final_classifier.predict(X_test_stacking)

In [179]:
submission_df = pd.DataFrame({
    "id": test_ids, 
    "ClassId": test_predictions
})

# Save to CSV
submission_file_path = "submission.csv"
submission_df.to_csv(submission_file_path, index=False)

print(f"Submission file saved to {submission_file_path}")

Submission file saved to submission.csv


In [180]:
submission_df.shape

(2353, 2)