In [121]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import p_power
from scikeras.wrappers import KerasClassifier
from keras.initializers import Zeros
from tensorflow.keras.utils import to_categorical
from keras import Input, optimizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score, roc_curve, auc
from sklearn import svm
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split, KFold, cross_val_score, cross_validate
from sklearn import preprocessing, linear_model, tree
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier


In [77]:
# np.random.seed(7)
%pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


1. Read in Data

In [3]:
df = pd.read_csv('../data/liqdata_augmented.csv')

In [4]:
print(df.describe())

         velocity  weightpercentw  diametermm  thicknessmm    heightin  \
count  256.000000      256.000000  256.000000   256.000000  256.000000   
mean     4.447728        0.375000    0.017500     0.002250   43.750000   
std      1.424205        0.365578    0.005601     0.001349   25.636983   
min      2.545294        0.100000    0.010000     0.000500   13.000000   
25%      3.487517        0.137500    0.013750     0.001250   25.000000   
50%      4.446090        0.200000    0.017500     0.002250   40.500000   
75%      5.406302        0.437500    0.021250     0.003250   59.250000   
max      6.353439        1.000000    0.025000     0.004000   81.000000   

       craterdiameterfromouteredgesmm  craterdiameterfromouteredgesmmno  \
count                      256.000000                        256.000000   
mean                        53.556250                         64.258594   
std                         21.430322                         25.712146   
min                         13.50

2. Separate Column Paramters into separate int pd vects and scalars

In [5]:
out_full = df.iloc[:, 0]
vel = df.iloc[:, 2]
wp = df.iloc[:, 3]
d_mm = df.iloc[:, 4]
t_mm = df.iloc[:, 5]
sigma = df.iloc[:, 24]
nu = df.iloc[:, 25]
pi1 = df.iloc[:, 26]
pi2 = df.iloc[:, 27]
pi3 = df.iloc[:, 28]
pi4 = df.iloc[:, 29]
pi5 = df.iloc[:, 30]
pi6 = df.iloc[:, 31]
out_trunk = df.iloc[:, 41]
rho = 1000
g = 9.82

print(out_trunk)

0                      Splash
1                      Splash
2                      Splash
3      Broken or Intact Sheet
4                      Splash
                ...          
251                      Lump
252                      Lump
253                      Lump
254                      Lump
255                      Lump
Name: newcat1, Length: 256, dtype: object


3. Concat pi groups together and the dimension paramters together

In [6]:
params = pd.concat([vel, wp, d_mm, t_mm, sigma, nu], axis=1)
print(params)

     velocity  weightpercentw  diametermm  thicknessmm  sigma    nu
0    6.353439             0.1       0.010       0.0005     13  0.23
1    6.353439             0.1       0.010       0.0015     13  0.23
2    6.353439             0.1       0.010       0.0030     13  0.23
3    6.353439             0.1       0.010       0.0040     13  0.23
4    6.353439             0.1       0.015       0.0005     13  0.23
..        ...             ...         ...          ...    ...   ...
251  2.545294             1.0       0.020       0.0040    106  2.10
252  2.545294             1.0       0.025       0.0005    106  2.10
253  2.545294             1.0       0.025       0.0015    106  2.10
254  2.545294             1.0       0.025       0.0030    106  2.10
255  2.545294             1.0       0.025       0.0040    106  2.10

[256 rows x 6 columns]


In [7]:
pi_groups = pd.concat([pi1, pi2, pi3, pi4, pi5, pi6], axis=1)
print(pi_groups)

             pi1           pi2       pi3       pi4         pi5         pi6
0    3105.091400  2.457000e+09  0.061437  0.050000   13.750660  275.013180
1    3105.091400  2.457000e+09  0.552930  0.150000   40.889824  272.598820
2    3105.091400  2.457000e+09  2.211720  0.300000   80.716718  269.055720
3    3105.091400  2.457000e+09  3.931947  0.400000  106.697760  266.744380
4    3105.091400  1.092000e+09  0.061437  0.033333   13.750660  412.519770
..           ...           ...       ...       ...         ...         ...
251    61.118151  6.009071e+07  0.384581  0.200000    4.491864   22.459320
252    61.118151  3.845805e+07  0.006009  0.020000    0.600072   30.003622
253    61.118151  3.845805e+07  0.054082  0.060000    1.765548   29.425803
254    61.118151  3.845805e+07  0.216327  0.120000    3.431956   28.599632
255    61.118151  3.845805e+07  0.384581  0.160000    4.491864   28.074151

[256 rows x 6 columns]


4. Create the Model

In [8]:
def train_and_predict_using_model(model_name = "", model =None):
    model.fit(X_train, Y_train)
    Y_pred_train = model.predict(X_train)
    cm_train = confusion_matrix(Y_train, Y_pred_train)
    print(model_name)
    print("====================================")
    print("Training Confusion Matrix: ")
    print(cm_train)
    acc_train = accuracy_score(Y_train, Y_pred_train)
    
    print("Training Accuracy: %.2f%%" % (acc_train*100))
    print("====================================")
    
    Y_pred = model.predict(X_test)
    cm_test = confusion_matrix(Y_test, Y_pred)
    print("Testing Confusion Matrix: ")
    print(cm_test)
    acc_test = acc_train = accuracy_score(Y_test, Y_pred)
    
    print("Testing Accuracy: %.2f%%" % (acc_test*100))
    print("====================================")

In [9]:
def train_and_predict_using_pimodel(model_name = "", model =None):
    model.fit(scaled_X_train_pi, Y_train_pi)
    Y_pred_train_pi = model.predict(scaled_X_train_pi)
    cm_train_pi = confusion_matrix(Y_train_pi, Y_pred_train_pi)
    print(model_name)
    print("====================================")
    print("Training Confusion Matrix: ")
    print(cm_train_pi)
    acc_train = (np.trace(cm_train_pi)) / np.sum(np.sum(cm_train_pi))
    
    print("Training Accuracy: %.2f%%" % (acc_train*100))
    print("====================================")
    
    Y_pred_pi = model.predict(scaled_X_test_pi)
    cm_test_pi = confusion_matrix(Y_test_pi, Y_pred_pi)
    print("Testing Confusion Matrix: ")
    print(cm_test_pi)
    acc_test = acc_train = np.trace(cm_test_pi) / np.sum(np.sum(cm_test_pi))
    
    print("Testing Accuracy: %.2f%%" % (acc_test*100))
    print("====================================")

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(params, out_trunk, test_size=0.2, random_state=42)
X_train_pi, X_test_pi, Y_train_pi, Y_test_pi = train_test_split(pi_groups, out_trunk, test_size=0.2, random_state=0)

# scaler = preprocessing.StandardScaler().fit(X_train)
# scaled_X_train = scaler.transform(X_train)
# scaled_X_test = scaler.transform(X_test)

scaler_pi = preprocessing.StandardScaler().fit(X_train_pi)
scaled_X_train_pi = scaler_pi.transform(X_train_pi)
scaled_X_test_pi = scaler_pi.transform(X_test_pi)
encoder = LabelEncoder()
encoder.fit(out_trunk)
encoded_ytest_pi = encoder.transform(Y_test_pi)
encoded_ytrain_pi = encoder.transform(Y_train_pi)
encoded_y_pi = encoder.transform(out_trunk)
# One-hot encode the target variable
encoded_ytrain_pi_onehot = to_categorical(encoded_ytrain_pi)
encoded_ytest_pi_onehot = to_categorical(encoded_ytest_pi)
encoded_y_pi_onehot = to_categorical(encoded_y_pi)

In [11]:


print(encoded_y_pi_onehot)



[[0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 ...
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]]


5. Try some stuff


In [12]:
# Logistic Regression
linear_classifier = linear_model.LogisticRegression(random_state=123)
train_and_predict_using_model("Logistic Regression", linear_classifier)

Logistic Regression
Training Confusion Matrix: 
[[54  6  0 20]
 [11 13  0  0]
 [ 0  2 14  0]
 [15  0  0 69]]
Training Accuracy: 73.53%
Testing Confusion Matrix: 
[[10  4  0  6]
 [ 2  6  0  0]
 [ 0  1  2  0]
 [ 4  0  0 17]]
Testing Accuracy: 67.31%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [13]:
linear_classifier_pi = linear_model.LogisticRegression()
train_and_predict_using_pimodel("Pi Logistic Regression", linear_classifier_pi)

Pi Logistic Regression
Training Confusion Matrix: 
[[73  0  0  5]
 [15 10  0  0]
 [ 5  5  6  0]
 [11  0  0 74]]
Training Accuracy: 79.90%
Testing Confusion Matrix: 
[[20  1  0  1]
 [ 3  4  0  0]
 [ 0  1  2  0]
 [ 0  0  0 20]]
Testing Accuracy: 88.46%


Decision Tree



In [14]:
# Decision Tree
decision_tree_clf = tree.DecisionTreeClassifier()
train_and_predict_using_model('Decision Tree', decision_tree_clf)

Decision Tree
Training Confusion Matrix: 
[[80  0  0  0]
 [ 0 24  0  0]
 [ 0  0 16  0]
 [ 0  0  0 84]]
Training Accuracy: 100.00%
Testing Confusion Matrix: 
[[13  3  0  4]
 [ 2  5  1  0]
 [ 0  0  3  0]
 [ 2  0  0 19]]
Testing Accuracy: 76.92%


In [15]:
# Decision Tree and Random Forests
decision_tree_clf_pi = tree.DecisionTreeClassifier()
train_and_predict_using_pimodel('Decision Tree', decision_tree_clf_pi)

Decision Tree
Training Confusion Matrix: 
[[78  0  0  0]
 [ 0 25  0  0]
 [ 0  0 16  0]
 [ 0  0  0 85]]
Training Accuracy: 100.00%
Testing Confusion Matrix: 
[[19  0  0  3]
 [ 1  6  0  0]
 [ 0  0  3  0]
 [ 1  0  0 19]]
Testing Accuracy: 90.38%


In [16]:
model4 = GradientBoostingClassifier(n_estimators=500, learning_rate=1.0, max_depth=6, random_state=357, loss='log_loss', criterion='squared_error', min_samples_split=5, min_samples_leaf=3, max_features=4, max_leaf_nodes=None, min_impurity_decrease=0.0, init=None, subsample=1.0)
train_and_predict_using_pimodel('Gradient Boosting', model4)

# 93.515% accuracy

# model4 = GradientBoostingClassifier(n_estimators=100, learning_rate=1.20, max_depth=6, random_state=0, loss='log_loss', criterion='squared_error', min_samples_split=3, min_samples_leaf=2, max_features=5, max_leaf_nodes=None, min_impurity_decrease=0.0, init=None, subsample=1.0)
# train_and_predict_using_pimodel('Gradient Boosting', model4)

# 93.75% accuracy

# model4 = GradientBoostingClassifier(n_estimators=250, learning_rate=1.255, max_depth=9, random_state=0, loss='log_loss', criterion='friedman_mse', min_samples_split=4, min_samples_leaf=2, max_features=4, max_leaf_nodes=None, min_impurity_decrease=0, init=None, subsample=1.0)
# train_and_predict_using_pimodel('Gradient Boosting', model4)

# model4 = GradientBoostingClassifier(n_estimators=300, learning_rate=1.3, max_depth=12, random_state=357, loss='log_loss', criterion='friedman_mse', min_samples_split=4, min_samples_leaf=2, max_features=4, max_leaf_nodes=None, min_impurity_decrease=0, init=None, subsample=1.0)
# train_and_predict_using_pimodel('Gradient Boosting', model4)

Gradient Boosting
Training Confusion Matrix: 
[[74  0  4  0]
 [ 0 23  2  0]
 [ 0  0 16  0]
 [ 0  0  0 85]]
Training Accuracy: 97.06%
Testing Confusion Matrix: 
[[18  0  1  3]
 [ 1  6  0  0]
 [ 0  0  2  1]
 [ 1  0  0 19]]
Testing Accuracy: 86.54%


Random Forest

In [17]:
forest = RandomForestClassifier(n_estimators=100, random_state=123, max_depth=5, max_features=6)
train_and_predict_using_model('Random Forest', forest)

Random Forest
Training Confusion Matrix: 
[[78  0  0  2]
 [ 2 22  0  0]
 [ 0  0 16  0]
 [ 7  0  0 77]]
Training Accuracy: 94.61%
Testing Confusion Matrix: 
[[15  2  0  3]
 [ 4  3  1  0]
 [ 0  0  3  0]
 [ 2  0  0 19]]
Testing Accuracy: 76.92%


In [18]:
forest = RandomForestClassifier(n_estimators=100, random_state=123, max_depth=5, max_features=8)
train_and_predict_using_pimodel('Random Forest', forest)

Random Forest
Training Confusion Matrix: 
[[77  1  0  0]
 [ 2 23  0  0]
 [ 0  0 16  0]
 [ 0  0  0 85]]
Training Accuracy: 98.53%
Testing Confusion Matrix: 
[[19  0  0  3]
 [ 1  6  0  0]
 [ 0  0  3  0]
 [ 3  0  0 17]]
Testing Accuracy: 86.54%


Neural Net


In [19]:
pi1_wghts = tf.exp(tf.constant([1.0, -1.0, 2.0, 0.0, 0.0, 0.0])) / tf.linalg.norm(tf.exp(tf.constant([1.0, -1.0, 2.0, 0.0, 0.0, 0.0])))
pi2_wghts = tf.exp(tf.constant([1.0, 1.0, 0.0, -2.0, -2.0, 0.0])) / tf.linalg.norm(tf.exp(tf.constant([1.0, 1.0, 0.0, -2.0, -2.0, 0.0])))
pi3_wghts =tf.exp(tf.constant([1.0, 1.0, 0.0, -2.0, 0.0, 2.0])) / tf.linalg.norm(tf.exp(tf.constant([1.0, 1.0, 0.0, -2.0, 0.0, 2.0])))

pi_weights = tf.concat([[pi1_wghts], [pi2_wghts], [pi3_wghts]], 0)

print(pi_weights.shape)

(3, 6)


In [112]:
def train_and_predict_pinets(opt=None, model=None):
    
    if opt is None:
        model.compile(loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    else:
    
        model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['categorical_accuracy'])
    
    model.fit(scaled_X_train_pi, encoded_ytrain_pi_onehot, epochs=200, batch_size=4, verbose=0)
    
    
    Y_pred_train_pi = model.predict(scaled_X_train_pi)

    # Evaluate the model
    scores = model.evaluate(scaled_X_train_pi, encoded_ytrain_pi_onehot)

    print("Neural Network Trainset: \n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))

    Y_pred_pi = model.predict(scaled_X_test_pi)

    # Convert predictions to class labels
    Y_pred_pi_labels = np.argmax(Y_pred_pi, axis=1)
    Y_test_pi_labels = np.argmax(encoded_ytest_pi_onehot, axis=1)
    # import pdb; pdb.set_trace()
    cm_test = confusion_matrix(Y_test_pi_labels, Y_pred_pi_labels)
    print("Testing Confusion Matrix: ")
    print(cm_test)
    acc_test = accuracy_score(Y_test_pi_labels, Y_pred_pi_labels)

    print("Testing Accuracy: %.2f%%" % (acc_test * 100))
    print("====================================")
    
    print("Classification Report")
    print(classification_report(Y_test_pi_labels, Y_pred_pi_labels))
    
    print("====================================")
    
    return Y_test_pi_labels, Y_pred_pi_labels

    # print(Y_pred_pi)

In [118]:
# Neural Network
def create_pinet(comp = False):

    pi_net = Sequential()
   
    # 94.23% accuracy
    # pi_net.add(Input(shape=(6,)))
    pi_net.add(Dense(32, activation='silu'))
    pi_net.add(Dense(64, activation='tanh'))
    pi_net.add(Dense(128, activation='leaky_relu'))
    pi_net.add(Dropout(0.5))
    pi_net.add(Dense(64, activation='relu'))
    pi_net.add(Dropout(0.5))
    pi_net.add(Dense(32, activation='gelu'))
    pi_net.add(Dense(4, activation='softmax'))
    if comp is True:
        pi_net.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(), metrics=['categorical_accuracy'])
    
    return pi_net




In [83]:
train_and_predict_pinets(model=create_pinet(False), opt=optimizers.Adam())

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - categorical_accuracy: 0.9548 - loss: 0.0819  
Neural Network Trainset: 
compile_metrics: 96.08%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Testing Confusion Matrix: 
[[20  0  0  2]
 [ 0  7  0  0]
 [ 0  0  3  0]
 [ 1  0  0 19]]
Testing Accuracy: 94.23%
Classification Report
              precision    recall  f1-score   support

           0       0.95      0.91      0.93        22
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00         3
           3       0.90      0.95      0.93        20

    accuracy                           0.94        52
   macro avg       0.96      0.96      0.96        52
weighted avg       0.94      0.94      0.94        52



In [None]:
# 90.625% accuracy
""" pi_net.add(Input(shape=(6,))) 
pi_net.add(Dense(32, activation='leaky_relu'))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(256, activation='tanh'))
pi_net.add(Dropout(0.5))
pi_net.add(Dense(256, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dense(32, activation='leaky_relu'))
pi_net.add(Dense(4, activation='softmax')) """

In [None]:
# 92.19% accuracy
""" pi_net.add(Input(shape=(6,))) 
pi_net.add(Dense(32, activation='leaky_relu'))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(256, activation='gelu'))
pi_net.add(Dropout(0.5))
pi_net.add(Dense(256, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dense(32, activation='leaky_relu'))
pi_net.add(Dense(16))
pi_net.add(Dense(4, activation='softmax')) """

In [733]:
"""  92.31% accuracy
    pi_net.add(Input(shape=(6,)))
    pi_net.add(Dense(32, activation='relu'))
    pi_net.add(Dropout(0.3))
    pi_net.add(Dense(64, activation='relu'))
    pi_net.add(Dropout(0.3))
    pi_net.add(Dense(32, activation='leaky_relu'))
    pi_net.add(Dense(64, activation='leaky_relu'))
    pi_net.add(Dropout(0.3))
    pi_net.add(Dense(128, activation='tanh'))
    pi_net.add(Dropout(0.5))
    pi_net.add(Dense(64, activation='relu'))
    pi_net.add(Dropout(0.5))
    pi_net.add(Dense(32, activation='gelu'))
    pi_net.add(Dense(4, activation='softmax')) """

"  92.31% accuracy\n    pi_net.add(Input(shape=(6,)))\n    pi_net.add(Dense(32, activation='relu'))\n    pi_net.add(Dropout(0.3))\n    pi_net.add(Dense(64, activation='relu'))\n    pi_net.add(Dropout(0.3))\n    pi_net.add(Dense(32, activation='leaky_relu'))\n    pi_net.add(Dense(64, activation='leaky_relu'))\n    pi_net.add(Dropout(0.3))\n    pi_net.add(Dense(128, activation='tanh'))\n    pi_net.add(Dropout(0.5))\n    pi_net.add(Dense(64, activation='relu'))\n    pi_net.add(Dropout(0.5))\n    pi_net.add(Dense(32, activation='gelu'))\n    pi_net.add(Dense(4, activation='softmax')) "

In [None]:
# 93.75% accuracy
""" pi_net.add(Dense(32, activation='leaky_relu'))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.5))
pi_net.add(Dense(64, activation='relu'))
pi_net.add(Dropout(0.5))
pi_net.add(Dense(32, activation='gelu'))
pi_net.add(Dense(4, activation='softmax')) """

In [None]:
""" # 94.23% accuracy
    pi_net.add(Input(shape=(6,)))
    pi_net.add(Dense(32, activation='silu'))
    pi_net.add(Dense(64, activation='silu'))
    pi_net.add(Dense(128, activation='tanh'))
    pi_net.add(Dropout(0.25))
    pi_net.add(Dense(256, activation='leaky_relu'))
    pi_net.add(Dense(512, activation='silu'))
    pi_net.add(Dropout(0.5))
    pi_net.add(Dense(64, activation='leaky_relu'))
    pi_net.add(Dropout(0.25))
    pi_net.add(Dense(32, activation='gelu'))
    pi_net.add(Dense(4, activation='softmax')) """

In [150]:
# 95.83% accuracy
""" pi_net.add(Input(shape=(6,))) 
pi_net.add(Dense(32, activation='leaky_relu'))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(256, activation='tanh'))
pi_net.add(Dropout(0.5))
pi_net.add(Dense(128, activation='tanh'))
pi_net.add(Dropout(0.5))
pi_net.add(Dense(64, activation='leaky_relu'))
pi_net.add(Dropout(0.25))
pi_net.add(Dense(32, activation='gelu'))
pi_net.add(Dense(16, activation='leaky_relu'))
pi_net.add(Dense(4, activation='softmax')) """

In [None]:
# 96.15% accuracy 2 wrong
# pi_net.add(Dense(32, activation='silu'))
# pi_net.add(Dense(64, activation='tanh'))
# pi_net.add(Dense(128, activation='leaky_relu'))
# pi_net.add(Dropout(0.5))
# pi_net.add(Dense(64, activation='relu'))
# pi_net.add(Dropout(0.5))
# pi_net.add(Dense(32, activation='gelu'))
# pi_net.add(Dense(4, activation='softmax'))


Keras Classifier

In [123]:
# Use KerasClassifier for scikit-learn compatibility
net = create_pinet(False)
Y_test_pi_labels, Y_pred_pi_labels = train_and_predict_pinets(model=net, opt=optimizers.Adam())
model = KerasClassifier(model=net, epochs=200, batch_size=-1, verbose=1, metrics=['categorical_accuracy'])

# Perform cross-validation
kfold = KFold(n_splits=4, shuffle=True)
results = cross_validate(model, X=pi_groups, y=encoded_y_pi_onehot, cv=kfold, scoring=recall_score(y_true=Y_test_pi_labels, y_pred=Y_pred_pi_labels, average='weighted'))
# recall_score(y_true=Y_test_pi_labels, y_pred=Y_pred_pi_labels, average='weighted')
print(f'Cross-Validation Accuracy: {results.mean():.2f} (+/- {results.std():.2f})')

# train_and_predict_pinets(model=create_pinet(True), opt='adam')

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - categorical_accuracy: 0.9849 - loss: 0.0783  
Neural Network Trainset: 
compile_metrics: 97.55%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Testing Confusion Matrix: 
[[20  0  0  2]
 [ 0  7  0  0]
 [ 0  0  3  0]
 [ 1  0  0 19]]
Testing Accuracy: 94.23%
Classification Report
              precision    recall  f1-score   support

           0       0.95      0.91      0.93        22
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00         3
           3       0.90      0.95      0.93        20

    accuracy                           0.94        52
   macro avg       0.96      0.96      0.96        52
weighted avg       0.94      0.94      0.94        52



InvalidParameterError: The 'scoring' parameter of cross_validate must be a str among {'roc_auc_ovr', 'positive_likelihood_ratio', 'jaccard_micro', 'neg_brier_score', 'fowlkes_mallows_score', 'balanced_accuracy', 'neg_mean_squared_log_error', 'neg_mean_absolute_error', 'precision_weighted', 'explained_variance', 'jaccard_weighted', 'rand_score', 'jaccard', 'recall_macro', 'adjusted_rand_score', 'roc_auc_ovo_weighted', 'normalized_mutual_info_score', 'f1_samples', 'matthews_corrcoef', 'precision_macro', 'f1_macro', 'neg_root_mean_squared_log_error', 'neg_mean_poisson_deviance', 'precision', 'neg_negative_likelihood_ratio', 'precision_micro', 'roc_auc_ovr_weighted', 'f1_micro', 'recall', 'r2', 'neg_log_loss', 'jaccard_samples', 'f1', 'neg_mean_gamma_deviance', 'f1_weighted', 'precision_samples', 'top_k_accuracy', 'neg_median_absolute_error', 'neg_root_mean_squared_error', 'roc_auc', 'completeness_score', 'mutual_info_score', 'neg_mean_squared_error', 'recall_samples', 'homogeneity_score', 'accuracy', 'd2_absolute_error_score', 'adjusted_mutual_info_score', 'jaccard_macro', 'roc_auc_ovo', 'max_error', 'neg_mean_absolute_percentage_error', 'recall_micro', 'recall_weighted', 'average_precision', 'v_measure_score'}, a callable, an instance of 'list', an instance of 'tuple', an instance of 'dict' or None. Got 0.9423076923076923 instead.

Support Vector Machines

In [549]:
clf = svm.SVC()
train_and_predict_using_pimodel('SVM (Classiefier)', clf)

SVM (Classiefier)
Training Confusion Matrix: 
[[74  1  0  1]
 [14 14  0  0]
 [ 6  9  0  0]
 [10  0  0 63]]
Training Accuracy: 78.65%
Testing Confusion Matrix: 
[[23  0  0  1]
 [ 2  2  0  0]
 [ 2  2  0  0]
 [ 4  0  0 28]]
Testing Accuracy: 82.81%


In [551]:
rbf_clf = svm.SVC(kernel='rbf')
train_and_predict_using_pimodel('SVM (Classiefier) - RBF', rbf_clf)

SVM (Classiefier) - RBF
Training Confusion Matrix: 
[[74  1  0  1]
 [14 14  0  0]
 [ 6  9  0  0]
 [10  0  0 63]]
Training Accuracy: 78.65%
Testing Confusion Matrix: 
[[23  0  0  1]
 [ 2  2  0  0]
 [ 2  2  0  0]
 [ 4  0  0 28]]
Testing Accuracy: 82.81%


In [552]:
rbf_svc = svm.SVC(kernel='poly')
train_and_predict_using_model('SVM (Classiefier) - Poly', rbf_svc)

SVM (Classiefier) - Poly
Training Confusion Matrix: 
[[43  0  0 31]
 [20  0  0  0]
 [ 1  0 12  0]
 [13  0  0 59]]
Training Accuracy: 63.69%
Testing Confusion Matrix: 
[[15  0  0 11]
 [12  0  0  0]
 [ 2  0  4  0]
 [ 6  0  0 27]]
Testing Accuracy: 59.74%


In [553]:
rbf_svc = svm.SVC(kernel='sigmoid')
train_and_predict_using_model('SVM (Classiefier) - Sigmoid', rbf_svc)

SVM (Classiefier) - Sigmoid
Training Confusion Matrix: 
[[41  0  0 33]
 [10  0  0 10]
 [ 0  0  0 13]
 [42  0  0 30]]
Training Accuracy: 39.66%
Testing Confusion Matrix: 
[[13  0  0 13]
 [ 5  0  0  7]
 [ 0  0  0  6]
 [17  0  0 16]]
Testing Accuracy: 37.66%


ValueError: The classes, ['Broken or Intact Sheet', 'Crater', 'Lump', 'Splash'], are not in class_weight