In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import LocalOutlierFactor
from sklearn.manifold import Isomap
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import QuantileTransformer
from scipy import stats

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/demodata/test.csv
/kaggle/input/sml-project/sample_submission.csv
/kaggle/input/sml-project/train.csv
/kaggle/input/sml-project/test.csv


# Data Analysis

In [2]:
train_df = pd.read_csv('/kaggle/input/sml-project/train.csv') #1216 samples
test_df = pd.read_csv('/kaggle/input/demodata/test.csv') 

#Splitting into features & categories
train_x_df = train_df.drop(['ID', 'category'], axis=1)
train_y_df = train_df['category']
test_df = test_df.drop('ID', axis=1)

#Finding unique categories & count of each category [COUNTS ARE CORRECT]
category_count = {}
for i in range(len(train_y_df)):
    category = train_y_df[i]
    keys = category_count.keys()
    if (category not in keys):
        category_count[category] = 1
    else:
        category_count[category] += 1

total = 0
for key in category_count.keys():
    total += category_count[key]

#Converting category names into int labels
labels = {}
id = 0
for category in category_count.keys():
    if category not in labels.keys():
        labels[category] = id
        id += 1

#Replacing category names in train_y_df with corresponding labels
train_y_df = train_y_df.replace(to_replace = labels.keys(), value=labels.values())
print(train_y_df)

0        0
1        1
2        2
3        3
4        4
        ..
1211    14
1212     1
1213     1
1214     6
1215     6
Name: category, Length: 1216, dtype: int64


# Normalising Data

In [3]:
def normalise_data(x_train, x_test=None):
    scaler = preprocessing.StandardScaler().fit(x_train)
    x_train_scaled = scaler.transform(x_train)
    
    if (x_test is not None):
        x_test_scaled = scaler.transform(x_test)
    else:
        x_test_scaled = None
    
    return x_train_scaled, x_test_scaled

# Outlier Detection

In [4]:
def lof(x_train, y_train, n = 20):
    lof_ = LocalOutlierFactor(n_neighbors=n)
    predictions = lof_.fit_predict(x_train)
    
    inlier_idx = np.where(predictions==1)
    x_train = x_train[inlier_idx]
    y_train = y_train[inlier_idx]
    outlier_num = np.sum(predictions==-1)
    print(f"{outlier_num} outliers detected.")
    return x_train, y_train

# LDA

In [5]:
def lda(x_train, y_train, x_val, y_val, n, x_test=None, y_test=None):
    #performs linear discriminant analysis
    LDA = LinearDiscriminantAnalysis(solver='svd', n_components = n)
    x_train = LDA.fit_transform(x_train, y_train)
    x_val = LDA.transform(x_val)
    
    if x_test is not None and y_test is not None:
        x_test = LDA.transform(x_test)
    
    return x_train, x_val, x_test

# PCA

In [6]:
def pca(x_train, x_val, n='mle', x_test=None):
    # returns transformed data, new directions
    pca_ = PCA(n_components=n)
    x_train  = pca_.fit_transform(x_train)
    x_val = pca_.transform(x_val)
    
    if x_test is not None:
        x_test = pca_.transform(x_test)
    
    new_components = pca_.components_
    return new_components, x_train, x_val, x_test

# Models

In [7]:
def model_1():
    #Training model
    tf.keras.backend.clear_session()
    mlp_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.4, mode="columns")),
        tf.keras.layers.Dropout(rate=0.5, seed=42),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer =tf.keras.regularizers.l2(0.075)),
        tf.keras.layers.Dense(32, activation = 'relu'),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    mlp_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return mlp_model

    

def model_2():
    model_2 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.05)),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.025)),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_2.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_2

def model_3():
    model_3 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.1, mode="columns")),
        tf.keras.layers.Dropout(rate=0.4, seed=42),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_3.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_3

def model_4():
    model_4 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.1, mode="columns")),
        tf.keras.layers.Dropout(rate=0.4, seed=42),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.025)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_4.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_4
 
def model_5():
    model_5 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.1, mode="columns")),
        tf.keras.layers.Dropout(rate=0.4, seed=42),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.025)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_5.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_5

def model_6():
    #Training model
    tf.keras.backend.clear_session()
    model_6 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.25, mode="columns")),
        tf.keras.layers.Dropout(rate=0.4, seed=9),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer =tf.keras.regularizers.l2(0.075)),
        tf.keras.layers.Dropout(rate=0.2, seed=9),
        tf.keras.layers.Dense(32, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.05)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_6.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_6

    

def model_7():
    model_7 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.1)),
        tf.keras.layers.Dropout(rate=0.4, seed=3),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.05)),
        tf.keras.layers.Dense(32, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_7.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_7

def model_8():
    model_8 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.1, mode="columns")),
        tf.keras.layers.Dropout(rate=0.3, seed=42),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.005)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_8.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_8

def model_9():
    model_9 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.2, mode="columns")),
        tf.keras.layers.Dropout(rate=0.2, seed=70),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.075)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_9.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_9

def model_10():
    model_10 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.OrthogonalRegularizer(factor=0.1, mode="columns")),
        tf.keras.layers.Dropout(rate=0.4, seed=105),
        tf.keras.layers.Dense(64, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.Dense(20, activation = 'softmax')
    ])
    
    model_10.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_10

# Data cleanup and pipeline definitions

In [8]:
k = 5
x_train_np = train_x_df.to_numpy() #1216x4096
y_train_np = train_y_df.to_numpy().reshape(-1,1) #1216x1
x_test_np = test_df.to_numpy()

#LOF Outlier detection and removal from training dataset
n_components_lof = 2
x_train_np, y_train_np = lof(x_train_np, y_train_np, n_components_lof)

#K-Means Clustering
kmeans_seed = 42
kmeans = KMeans(n_clusters=15, random_state = kmeans_seed)
kmeans.fit(x_train_np)
x_train_clusters = kmeans.predict(x_train_np)
x_test_clusters = kmeans.predict(x_test_np)

#Adding cluster labels as feature column
x_train_np = np.c_[x_train_np, x_train_clusters]
x_test_np = np.c_[x_test_np, x_test_clusters]

nn_1 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_1, epochs = 10)
nn_2 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_2, epochs = 10)
nn_3 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_3, epochs = 10)
nn_4 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_4, epochs = 10)
nn_5 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_5, epochs = 10)
nn_6 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_6, epochs = 10)
nn_7 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_7, epochs = 10)
nn_8 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_8, epochs = 10)
nn_9 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_9, epochs = 10)
nn_10 = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn = model_10, epochs = 10)

#Pipelines for the ten neural networks
pipe_1 = Pipeline([
    ('pca', PCA(n_components=150)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_1)
])

pipe_2 = Pipeline([
    ('pca', PCA(n_components=256)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_2)
])

pipe_3 = Pipeline([
    ('normalisation', preprocessing.StandardScaler()), 
    ('pca', PCA(n_components=100)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_3)
])

pipe_4 = Pipeline([
    ('quantile', QuantileTransformer(n_quantiles = 500, random_state = 2)),
    ('pca', PCA(n_components=100)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_4)
])

pipe_5 = Pipeline([
    ('pca', PCA(n_components=100)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_5)
])

pipe_6 = Pipeline([
    ('pca', PCA(n_components=200)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_6)
])

pipe_7 = Pipeline([
    ('pca', PCA(n_components=150)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_7)
])

pipe_8 = Pipeline([
    ('normalisation', preprocessing.StandardScaler()), 
    ('pca', PCA(n_components=100)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_8)
])

pipe_9 = Pipeline([
    ('normalisation', preprocessing.StandardScaler()), 
    ('pca', PCA(n_components=150)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_9)
])

pipe_10 = Pipeline([
    ('quantile', QuantileTransformer(n_quantiles = 500, random_state = 42)),
    ('pca', PCA(n_components=150)),
    ('lda', LinearDiscriminantAnalysis(solver='svd', n_components=19)),
    ('nn', nn_10)
])

45 outliers detected.




# Training the Neural Networks 

In [9]:
#NN_1
print("Training Neural Network #1")
results_1 = cross_validate(pipe_1, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_1.fit(x_train_np, y_train_np.ravel())
y_test_pred_1 = pipe_1.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_1['train_score']))
print("Testing score:", np.mean(results_1['test_score']))

Training Neural Network #1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9359523892402649
Testing score: 0.7591816782951355


In [10]:
#NN_2
print("Training Neural Network #2")
results_2 = cross_validate(pipe_2, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_2.fit(x_train_np, y_train_np.ravel())
y_test_pred_2 = pipe_2.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_2['train_score']))
print("Testing score:", np.mean(results_2['test_score']))

Training Neural Network #2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9752341985702515
Testing score: 0.7848154187202454


In [11]:
#NN_3
print("Training Neural Network #3")
results_3 = cross_validate(pipe_3, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_3.fit(x_train_np, y_train_np.ravel())
y_test_pred_3 = pipe_3.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_3['train_score']))
print("Testing score:", np.mean(results_3['test_score']))


Training Neural Network #3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9024361729621887
Testing score: 0.7591853141784668


In [12]:
#NN_4
print("Training Neural Network #4")
results_4 = cross_validate(pipe_4, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_4.fit(x_train_np, y_train_np.ravel())
y_test_pred_4 = pipe_4.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_4['train_score']))
print("Testing score:", np.mean(results_4['test_score']))

Training Neural Network #4
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9257071495056153
Testing score: 0.7796835660934448


In [13]:
#NN_5
print("Training Neural Network #5")
results_5 = cross_validate(pipe_5, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_5.fit(x_train_np, y_train_np.ravel())
y_test_pred_5 = pipe_5.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_5['train_score']))
print("Testing score:", np.mean(results_5['test_score']))

Training Neural Network #5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9212215662002563
Testing score: 0.7813857078552247


In [14]:
#NN_6
print("Training Neural Network #6")
results_6 = cross_validate(pipe_6, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_6.fit(x_train_np, y_train_np.ravel())
y_test_pred_6 = pipe_6.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_6['train_score']))
print("Testing score:", np.mean(results_6['test_score']))

Training Neural Network #6
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9415008783340454
Testing score: 0.7532169580459595


In [15]:
#NN_7
print("Training Neural Network #7")
results_7 = cross_validate(pipe_7, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_7.fit(x_train_np, y_train_np.ravel())
y_test_pred_7 = pipe_7.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_7['train_score']))
print("Testing score:", np.mean(results_7['test_score']))

Training Neural Network #7
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.8979519605636597
Testing score: 0.7326968550682068


In [16]:
#NN_8
print("Training Neural Network #8")
results_8 = cross_validate(pipe_8, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_8.fit(x_train_np, y_train_np.ravel())
y_test_pred_8 = pipe_8.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_8['train_score']))
print("Testing score:", np.mean(results_8['test_score']))

Training Neural Network #8
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.908840274810791
Testing score: 0.7583342552185058


In [17]:
#NN_9
print("Training Neural Network #9")
results_9 = cross_validate(pipe_9, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_9.fit(x_train_np, y_train_np.ravel())
y_test_pred_9 = pipe_9.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_9['train_score']))
print("Testing score:", np.mean(results_9['test_score']))

Training Neural Network #9
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9111886501312256
Testing score: 0.736119294166565


In [18]:
#NN_10
print("Training Neural Network #10")
results_10 = cross_validate(pipe_10, x_train_np, y_train_np.ravel(), cv=k, return_train_score=True)
pipe_10.fit(x_train_np, y_train_np.ravel())
y_test_pred_10 = pipe_10.predict(x_test_np).reshape(-1,1)

print("Training score:", np.mean(results_10['train_score']))
print("Testing score:", np.mean(results_10['test_score']))

Training Neural Network #10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.9485494256019592
Testing score: 0.773733401298523


# Generation of final predictions by calculating modes 

In [19]:
y_test_pred = []

for i in range(y_test_pred_1.shape[0]):
    #Calculation of modes
    class_labels = np.array([y_test_pred_1[i,:], y_test_pred_2[i,:], y_test_pred_3[i,:], y_test_pred_4[i,:], y_test_pred_5[i,:], y_test_pred_6[i,:], y_test_pred_7[i,:], y_test_pred_8[i,:], y_test_pred_9[i,:], y_test_pred_10[i,:]])
    final_label = stats.mode(class_labels, axis=None)
    y_test_pred.append(final_label.mode)

#Reshaping prediction array
y_test_pred = np.array(y_test_pred).reshape(-1,1)
print(x_test_np.shape, y_test_pred.shape)

#
y_pred = []
category_names = list(labels.keys())
category_ids = list(labels.values())
for i in range(y_test_pred.shape[0]):
    cat = category_names[category_ids.index(y_test_pred[i,:])]
    y_pred.append(cat)

y_pred = np.array(y_pred).reshape(-1,1)
y_pred = np.c_[np.arange(0,y_test_pred_1.shape[0]), y_pred].reshape(-1,2)
y_pred_df = pd.DataFrame(y_pred, columns=['ID', 'Category'])
y_pred_df.to_csv('/kaggle/working/output_final_final.csv')
print("All done!")

(415, 4097) (415, 1)
All done!
