In [5]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import numpy as np

dataset_path = '/content/dataset.csv'
dataset = pd.read_csv(dataset_path)


label_encoder = LabelEncoder()
dataset['classification'] = label_encoder.fit_transform(dataset['classification'])

X = dataset.drop(columns=['hash', 'classification'])
y = dataset['classification']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)

test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


def permutation_importance(model, X, y, metric, n_repeats=10):
    baseline_score = metric(model, X, y)
    scores = np.zeros((n_repeats, X.shape[1]))

    for i in range(X.shape[1]):
        X_permuted = X.copy()
        for n in range(n_repeats):
            np.random.shuffle(X_permuted[:, i])
            score = metric(model, X_permuted, y)
            scores[n, i] = baseline_score - score

    importances = np.mean(scores, axis=0)
    return importances

def accuracy_metric(model, X, y):
    _, accuracy = model.evaluate(X, y, verbose=0)
    return accuracy

importances = permutation_importance(model, X_test, y_test, accuracy_metric, n_repeats=10)

feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

print(feature_importance_df)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


2000/2000 - 6s - 3ms/step - accuracy: 0.9199 - loss: 0.2060 - val_accuracy: 0.9903 - val_loss: 0.0385
Epoch 2/50
2000/2000 - 4s - 2ms/step - accuracy: 0.9846 - loss: 0.0505 - val_accuracy: 0.9961 - val_loss: 0.0142
Epoch 3/50
2000/2000 - 3s - 2ms/step - accuracy: 0.9907 - loss: 0.0290 - val_accuracy: 0.9977 - val_loss: 0.0083
Epoch 4/50
2000/2000 - 4s - 2ms/step - accuracy: 0.9933 - loss: 0.0215 - val_accuracy: 0.9983 - val_loss: 0.0061
Epoch 5/50
2000/2000 - 4s - 2ms/step - accuracy: 0.9951 - loss: 0.0162 - val_accuracy: 0.9973 - val_loss: 0.0064
Epoch 6/50
2000/2000 - 3s - 2ms/step - accuracy: 0.9951 - loss: 0.0160 - val_accuracy: 0.9980 - val_loss: 0.0051
Epoch 7/50
2000/2000 - 3s - 2ms/step - accuracy: 0.9955 - loss: 0.0146 - val_accuracy: 0.9985 - val_loss: 0.0033
Epoch 8/50
2000/2000 - 5s - 3ms/step - accuracy: 0.9957 - loss: 0.0138 - val_accuracy: 0.9991 - val_loss: 0.0038
Epoch 9/50
2000/2000 - 4s - 2ms/step - accuracy: 0.9961 - loss: 0.0121 - val_accuracy: 0.9988 - val_loss: 0