In [None]:


from google.colab import files
import pandas as pd

print("➡️ Upload your dataset file (e.g., malware_dataset.csv)")
uploaded = files.upload()

data_file = list(uploaded.keys())[0]
print("Dataset uploaded as:", data_file)

df = pd.read_csv(data_file)

print("Shape:", df.shape)
df.head()


➡️ Upload your dataset file (e.g., malware_dataset.csv)


Saving malware_dataset.csv to malware_dataset.csv
Dataset uploaded as: malware_dataset.csv
Shape: (198350, 12)


Unnamed: 0,Label,FileSizeKB,PackerUsed,SuspiciousSections,DLLCount,DroppedFiles,APICallCount,MutexCount,FileAccessCount,RegistryAccessCount,NetworkConnectionCount,ProcessActivityCount
0,1,936,1,0,6,2,350,1,18,13,25,0
1,0,3388,0,1,48,0,80,0,4,3,3,5
2,0,3172,0,0,22,0,140,0,18,1,1,1
3,1,665,1,2,46,7,104,1,10,3,8,5
4,0,737,0,0,34,0,279,1,10,5,1,3


In [None]:

import pandas as pd

df = pd.read_csv("malware_dataset.csv")

print("Shape:", df.shape)
print("\nColumns:\n", df.columns.tolist())

label_col = "Label"

print("\nUsing label column:", label_col)
print(df[label_col].value_counts())

df.head()


Shape: (198350, 12)

Columns:
 ['Label', 'FileSizeKB', 'PackerUsed', 'SuspiciousSections', 'DLLCount', 'DroppedFiles', 'APICallCount', 'MutexCount', 'FileAccessCount', 'RegistryAccessCount', 'NetworkConnectionCount', 'ProcessActivityCount']

Using label column: Label
Label
1    100200
0     98150
Name: count, dtype: int64


Unnamed: 0,Label,FileSizeKB,PackerUsed,SuspiciousSections,DLLCount,DroppedFiles,APICallCount,MutexCount,FileAccessCount,RegistryAccessCount,NetworkConnectionCount,ProcessActivityCount
0,1,936,1,0,6,2,350,1,18,13,25,0
1,0,3388,0,1,48,0,80,0,4,3,3,5
2,0,3172,0,0,22,0,140,0,18,1,1,1
3,1,665,1,2,46,7,104,1,10,3,8,5
4,0,737,0,0,34,0,279,1,10,5,1,3


In [None]:
# Morphological ELM

import numpy as np
from sklearn.preprocessing import StandardScaler
from typing import List
import json
from pathlib import Path

class MorphologicalELM:
    def __init__(self, input_dim: int, hidden_dim: int = 500, random_state: int = 42):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.random_state = random_state

        np.random.seed(random_state)

        self.W = None
        self.b = None
        self.beta = None
        self.scaler = None
        self.class_names = None

    def _init_weights(self):
        """Initialize random morphological weights."""
        self.W = np.random.uniform(-1, 1, (self.hidden_dim, self.input_dim)).astype(np.float32)
        self.b = np.random.uniform(-1, 1, (self.hidden_dim,)).astype(np.float32)

    def _hidden_dilation(self, X):
        """
        Morphological dilation hidden layer:
        h_j(x) = max_i (x_i + w_ji) + b_j
        """
        tmp = X[:, None, :] + self.W[None, :, :]
        return tmp.max(axis=2) + self.b[None, :]

    def fit(self, X, y):
        """Train the morphological ELM."""
        X = X.astype(np.float32)

        # scale features
        self.scaler = StandardScaler()
        Xs = self.scaler.fit_transform(X)

        # initialize random morphological kernels
        self._init_weights()

        # compute hidden activations
        H = self._hidden_dilation(Xs)

        # one-hot encoding labels (ensure numeric)
        unique = np.unique(y).astype(int)
        self.class_names = list(unique)

        mapping = {c: i for i, c in enumerate(self.class_names)}

        Y = np.zeros((len(y), len(unique)), dtype=np.float32)
        for i, lab in enumerate(y):
            Y[i, mapping[int(lab)]] = 1.0

        # compute output weights using pseudoinverse
        self.beta = np.linalg.pinv(H) @ Y

    def predict(self, X):
        """Predict class labels."""
        Xs = self.scaler.transform(X.astype(np.float32))
        H = self._hidden_dilation(Xs)
        scores = H @ self.beta
        idx = np.argmax(scores, axis=1)
        return np.array([self.class_names[i] for i in idx])

    def export_to_json(self, path="melm_model.json"):
        """Export model to JSON for browser inference."""
        data = {
            "input_dim": int(self.input_dim),
            "hidden_dim": int(self.hidden_dim),
            "class_names": [int(c) for c in self.class_names],
            "W": self.W.astype(float).tolist(),
            "b": self.b.astype(float).tolist(),
            "beta": self.beta.astype(float).tolist(),
            "scaler_mean": self.scaler.mean_.astype(float).tolist(),
            "scaler_scale": self.scaler.scale_.astype(float).tolist(),
        }

        with open(path, "w") as f:
            json.dump(data, f, indent=2)

        print(f"Saved JSON model → {path}")


In [None]:
df["Label"].unique()


array([1, 0])

In [None]:
# Cell 4: Prepare data, train mELM, evaluate

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# label column fixed
y = df[label_col].values

# feature matrix (all except Label)
X = df.drop(columns=[label_col]).values.astype(np.float32)

print("Feature matrix shape:", X.shape)
print("Labels shape:", y.shape)

# split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=42
)

# train mELM
melm = MorphologicalELM(input_dim=X.shape[1], hidden_dim=500, random_state=123)
melm.fit(X_train, y_train)

# evaluate
y_pred = melm.predict(X_test)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


Feature matrix shape: (198350, 11)
Labels shape: (198350,)

Classification Report:

              precision    recall  f1-score   support

           0       0.97      0.99      0.98     19630
           1       0.99      0.97      0.98     20040

    accuracy                           0.98     39670
   macro avg       0.98      0.98      0.98     39670
weighted avg       0.98      0.98      0.98     39670

Confusion Matrix:

[[19438   192]
 [  533 19507]]


In [None]:

melm.export_to_json("melm_model.json")

from google.colab import files
files.download("melm_model.json")


Saved JSON model → melm_model.json


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>