In [58]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Load saved LSTM model (ensure the model was saved using model.save("LSTM_model.h5"))
model = tf.keras.models.load_model("../../models/dridex/LSTM.h5")

# Load encoders and scaler
with open("../../variables/dridex/Protocol_Encoder.pkl", "rb") as f:
    protocol_encoder = pickle.load(f)

with open("../../variables/dridex/Flags_Encoder.pkl", "rb") as f:
    flags_encoder = pickle.load(f)

with open("../../variables/dridex/Direction_Encoder.pkl", "rb") as f:
    direction_encoder = pickle.load(f)

with open("../../variables/dridex/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Paths to data folders
folders = [
    "../../data/collected/normal_machines",
    "../../data/collected/infected_machines",
]

# Prediction threshold
THRESHOLD = 0.5  # Since sigmoid activation is used


results = []


def process_file(file_path):
    df = pd.read_csv(file_path)
    df.loc[((df["Flags"] == "SYN,RST") | (df["Flags"] == "RST,ACK")), "Flags"] = (
        "RST"
    )
    df.loc[((df["Protocol"] == "DATA-TEXT-LINES")|(df["Protocol"] == "XML")), "Protocol"] = "MEDIA"
    print(df.columns)

    df["Protocol"] = protocol_encoder.transform(df["Protocol"])
    df["Flags"] = flags_encoder.transform(df["Flags"])
    df["Direction"] = direction_encoder.transform(df["Direction"])

    columns = df.columns
    df = scaler.transform(df.to_numpy())
    df = pd.DataFrame(df, columns=columns)

    # Reshape for LSTM: (samples, time steps, features)
    X_lstm = np.expand_dims(df.values, axis=1)  # Fix input shape

    # Make predictions
    predictions = model.predict(X_lstm)

    # Convert probabilities to class labels
    predicted_labels = (predictions > THRESHOLD).astype(int)  # 1 = Dridex, 0 = Benign

    # Count
    benign_count = np.sum(predicted_labels == 0)
    dridex_count = np.sum(predicted_labels == 1)

    # Determine file classification
    file_status = "Malicious" if dridex_count > 0 else "Benign"

    # Store results in a list
    results.append(
        {
            "File": file_path,
            "Benign_Count": benign_count,
            "Dridex_Count": dridex_count,
            "Final_Classification": file_status,
        }
    )


# Process all files in both folders
for folder in folders:
    for file_name in os.listdir(folder):
        file_path = os.path.join(folder, file_name)
        process_file(file_path)

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save to CSV (optional)
results_df.to_csv("classification_results.csv", index=False)

# Display the DataFrame
print(results_df)



Index(['Duration', 'Source Port', 'Destination Port', 'Protocol', 'Flags',
       'Packets', 'Bytes', 'Mean Payload Size', 'Std Payload Size',
       'Min Payload Size', 'Max Payload Size', 'Mean Entropy', 'Min Entropy',
       'Max Entropy', 'Mean Inter-Packet Interval',
       'Min Inter-Packet Interval', 'Max Inter-Packet Interval',
       'Bytes per Packet', 'Packets per Second', 'Bytes per Second',
       'Destination Common Port Usage', 'Flags Count', 'SYN Count',
       'ACK Count', 'FIN Count', 'Is HTTP', 'Is Internal IP', 'Direction',
       'Short Duration', 'Single Packet'],
      dtype='object')
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Index(['Duration', 'Source Port', 'Destination Port', 'Protocol', 'Flags',
       'Packets', 'Bytes', 'Mean Payload Size', 'Std Payload Size',
       'Min Payload Size', 'Max Payload Size', 'Mean Entropy', 'Min Entropy',
       'Max Entropy', 'Mean Inter-Packet Interval',
       'Min Inter-Packet Interval', 'M

ValueError: X has 31 features, but StandardScaler is expecting 30 features as input.

In [48]:
results_df

Unnamed: 0,File,Benign_Count,Dridex_Count,Final_Classification
0,../../data/collected/normal_machines/benign_3.csv,528,133,Malicious
1,../../data/collected/normal_machines/benign_2.csv,517,107,Malicious
2,../../data/collected/normal_machines/benign_5.csv,480,114,Malicious
3,../../data/collected/normal_machines/benign_4.csv,417,104,Malicious
4,../../data/collected/normal_machines/benign_1.csv,492,139,Malicious
5,../../data/collected/infected_machines/infecte...,485,141,Malicious
6,../../data/collected/infected_machines/infecte...,430,118,Malicious
7,../../data/collected/infected_machines/infecte...,533,161,Malicious
8,../../data/collected/infected_machines/infecte...,547,133,Malicious
9,../../data/collected/infected_machines/infecte...,525,137,Malicious


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

with open("../../models/dridex/logreg.pkl", "rb") as f:
    model = pickle.load(f)

# Load encoders and scaler
with open("../../variables/dridex/Protocol_Encoder.pkl", "rb") as f:
    protocol_encoder = pickle.load(f)

with open("../../variables/dridex/Flags_Encoder.pkl", "rb") as f:
    flags_encoder = pickle.load(f)

with open("../../variables/dridex/Direction_Encoder.pkl", "rb") as f:
    direction_encoder = pickle.load(f)

with open("../../variables/dridex/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Paths to data folders
folders = [
    "../../data/collected/normal_machines",
    "../../data/collected/infected_machines",
]

# Prediction threshold
THRESHOLD = 0.5  # Since sigmoid activation is used


results = []


def process_file(file_path):
    df = pd.read_csv(file_path)
    df.loc[((df["Flags"] == "SYN,RST") | (df["Flags"] == "RST,ACK")), "Flags"] = (
        "RST"
    )
    df.loc[((df["Protocol"] == "DATA-TEXT-LINES")|(df["Protocol"] == "XML")), "Protocol"] = "MEDIA"

    df["Protocol"] = protocol_encoder.transform(df["Protocol"])
    df["Flags"] = flags_encoder.transform(df["Flags"])
    df["Direction"] = direction_encoder.transform(df["Direction"])

    columns = df.columns
    df = scaler.transform(df.to_numpy())
    df = pd.DataFrame(df, columns=columns)
    # Make predictions
    predicted_labels = model.predict(df)


    benign_count = np.sum(predicted_labels == 0)
    dridex_count = np.sum(predicted_labels == 1)

    # Determine file classification
    file_status = "Malicious" if dridex_count > 0 else "Benign"

    # Store results in a list
    results.append(
        {
            "File": file_path,
            "Benign_Count": benign_count,
            "Dridex_Count": dridex_count,
            "Final_Classification": file_status,
        }
    )


# Process all files in both folders
for folder in folders:
    for file_name in os.listdir(folder):
        file_path = os.path.join(folder, file_name)
        process_file(file_path)

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save to CSV (optional)
results_df.to_csv("classification_results.csv", index=False)

# Display the DataFrame
print(results_df)

                                                File  Benign_Count  \
0  ../../data/collected/normal_machines/benign_3.csv           606   
1  ../../data/collected/normal_machines/benign_2.csv           572   
2  ../../data/collected/normal_machines/benign_5.csv           544   
3  ../../data/collected/normal_machines/benign_4.csv           481   
4  ../../data/collected/normal_machines/benign_1.csv           574   
5  ../../data/collected/infected_machines/infecte...           457   
6  ../../data/collected/infected_machines/infecte...           384   
7  ../../data/collected/infected_machines/infecte...           486   
8  ../../data/collected/infected_machines/infecte...           487   
9  ../../data/collected/infected_machines/infecte...           474   

   Dridex_Count Final_Classification  
0            55            Malicious  
1            52            Malicious  
2            50            Malicious  
3            40            Malicious  
4            57            Malicio

In [51]:
results_df

Unnamed: 0,File,Benign_Count,Dridex_Count,Final_Classification
0,../../data/collected/normal_machines/benign_3.csv,606,55,Malicious
1,../../data/collected/normal_machines/benign_2.csv,572,52,Malicious
2,../../data/collected/normal_machines/benign_5.csv,544,50,Malicious
3,../../data/collected/normal_machines/benign_4.csv,481,40,Malicious
4,../../data/collected/normal_machines/benign_1.csv,574,57,Malicious
5,../../data/collected/infected_machines/infecte...,457,169,Malicious
6,../../data/collected/infected_machines/infecte...,384,164,Malicious
7,../../data/collected/infected_machines/infecte...,486,208,Malicious
8,../../data/collected/infected_machines/infecte...,487,193,Malicious
9,../../data/collected/infected_machines/infecte...,474,188,Malicious


In [56]:
folders = [
    "../../data/collected/normal_machines",
]                         
def process_file(file_path):
    df = pd.read_csv(file_path)
    dfc = df.copy()
    df.loc[((df["Flags"] == "SYN,RST") | (df["Flags"] == "RST,ACK")), "Flags"] = "RST"
    df.loc[
        ((df["Protocol"] == "DATA-TEXT-LINES") | (df["Protocol"] == "XML")), "Protocol"
    ] = "MEDIA"

    df["Protocol"] = protocol_encoder.transform(df["Protocol"])
    df["Flags"] = flags_encoder.transform(df["Flags"])
    df["Direction"] = direction_encoder.transform(df["Direction"])

    columns = df.columns
    df = scaler.transform(df.to_numpy())
    df = pd.DataFrame(df, columns=columns)
    # Make predictions
    predicted_labels = model.predict(df)
    dfc["Label"] = predicted_labels
    dfc = dfc[dfc["Label"] <= 0.5]
    dfc.drop("Label", axis=1)
    dfc.reset_index()
    dfc.to_csv(f"{file_path}.csv")


# Process all files in both folders
for folder in folders:
    for file_name in os.listdir(folder):
        file_path = os.path.join(folder, file_name)
        process_file(file_path)

ValueError: X has 31 features, but StandardScaler is expecting 30 features as input.