In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Model
from keras.layers import Conv1D, Activation, GlobalMaxPool1D, Dense
from keras import Input, optimizers

file_path = '/Users/pratiktale/Research Project/dataset_malwares.csv'
df = pd.read_csv(file_path)

selected_features = df[["MinorOperatingSystemVersion", "MajorSubsystemVersion",
                        "SizeOfStackReserve", "MajorLinkerVersion",
                        "MajorOperatingSystemVersion", "MinorImageVersion",
                        "Subsystem", "MinorSubsystemVersion"]]
labels = df["Malware"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, labels, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

max_size = len(selected_features.columns)
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, max_size))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, max_size))

inputs = Input(shape=(1, max_size))
conv = Conv1D(kernel_size=3, filters=32, strides=1, padding='same')(inputs)
act = Activation('relu')(conv)
p = GlobalMaxPool1D()(act)
d = Dense(16, activation='relu')(p)
predictions = Dense(1, activation='sigmoid')(d)

model = Model(inputs=inputs, outputs=predictions)

my_opt = optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=my_opt, loss="binary_crossentropy", metrics=["acc"])

history = model.fit(X_train_scaled, y_train, batch_size=32, epochs=20, validation_split=0.2)

test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"\nTest Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

model.save('my_malconv_model.h5')

history_df = pd.DataFrame(history.history)
history_df.index.name = 'Epoch'
print("\nTraining and Validation Metrics:")
print(history_df.to_markdown())




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Test Loss: 0.0974, Test Accuracy: 0.9750

Training and Validation Metrics:
|   Epoch |     loss |      acc |   val_loss |   val_acc |
|--------:|---------:|---------:|-----------:|----------:|
|       0 | 0.302613 | 0.92255  |   0.18626  |  0.939452 |
|       1 | 0.172564 | 0.946693 |   0.170326 |  0.945188 |
|       2 | 0.159509 | 0.952908 |   0.15981  |  0.951562 |
|       3 | 0.150141 | 0.957211 |   0.150249 |  0.952836 |
|       4 | 0.141728 | 0.959123 |   0.143903 |  0.956342 |
|       5 | 0.135097 | 0.961753 |   0.135117 |  0.960166 |
|       6 | 0.130213 | 0.963108 |   0.129395 |  0.962715 |
|       7 | 0.126649 | 0.96255  |   0.126842 |  0.962715 |
|       8 | 0.122501 | 0.96502  |   0.123282 |  0.96622  |
|       9 | 0.119595 | 0.964701 |   0.1217  

  saving_api.save_model(
