In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import decomposition
from skimage.filters import gabor_kernel
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import VarianceScaling

In [None]:
X = pd.read_parquet("./data/Class1.parquet", engine="pyarrow")
y = pd.read_parquet("./data/validation.parquet", engine="pyarrow")

In [None]:
y.head()

In [None]:
fig, axs = plt.subplots(figsize=(12, 4))
y[10].plot(kind="bar", rot=0, ax=axs)


In [None]:
# RandomForest Test
num_classes = len(np.unique(y))
model = RandomForestClassifier(random_state = 42, class_weight="balanced", criterion = 'gini', max_depth = 3, max_features = 'auto', n_estimators = 500)

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

for i, (train_index, val_index) in enumerate(kf.split(X, y)):
    x_train_kf, x_val_kf = X[train_index], X[val_index]
    y_train_kf, y_val_kf = y[train_index], y[val_index]

y_train_categorical = keras.utils.to_categorical(y, num_classes)

output = cross_validate(model, X, y, cv=kf, scoring = 'roc_auc', return_estimator =True)


In [None]:
# Deep Learning Model
cvscores = []
y_max = y.idxmax(axis=1)

for train, test in kf.split(X, y_max):
    train, test = X.iloc[list(train)], X.iloc[list(test)]

    model = Sequential()
    model.add(Dense(10, input_dim=20, 
                kernel_regularizer=l2(0.001),
                kernel_initializer=VarianceScaling(), 
                activation='sigmoid'))
    model.add(Dense(5, 
                kernel_regularizer=l2(0.01),
                kernel_initializer=VarianceScaling(),                 
                activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', 
              metrics=['acc'])
    
    model.fit(X[train], y[train], epochs=50, batch_size=25, verbose = 0,
              validation_data=(X[test], y[test]))

    scores = model.evaluate(X[test], y[test], verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]*100))
    cvscores.append(scores[2] * 100)

In [None]:
X.columns