# Emotet
- Probably Emotet
- MD5 8baa9b809b591a11af423824f4d9726a
- [VirusTotal](https://www.virustotal.com/gui/file/6393fe8dd4721190f240e22feeb769675b6194a70cabd5a415c2364686a9089c/detection)
- [Malware Link](https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-114-3/)

## Imports and DF

In [8]:
import pandas as pd

In [9]:
emotet = pd.read_csv(r"../../../data/modelling/emotet.csv")

emotet.drop("Label", axis=1, inplace=True)

## Labelling

In [11]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.cluster import (
    KMeans,
    HDBSCAN,
    MeanShift,
    AffinityPropagation,
    MiniBatchKMeans,
    AgglomerativeClustering,
    BisectingKMeans,
    DBSCAN,
    HDBSCAN,
    OPTICS,
)
from sklearn.metrics import (
    silhouette_score,
    calinski_harabasz_score,
    davies_bouldin_score,
)
from sklearn.decomposition import PCA


pca = PCA(n_components=20)

# Apply PCA on the data:
df = pca.fit_transform(emotet)

# Print the shape of the transformed data:
df.shape


kmeans = KMeans(n_clusters=2)
kmeans.fit(emotet)

y_pred = kmeans.predict(emotet)
# Silhouette Score:
sil_score = silhouette_score(emotet, y_pred)

print(f"Silhouette Score: {sil_score}")

# CH Score:
ch_score = calinski_harabasz_score(emotet, y_pred)

print(f"CH Score: {ch_score}")

# DB Score:
db_score = davies_bouldin_score(emotet, y_pred)

print(f"DB Score: {db_score}")

Silhouette Score: 0.6267249071636996
CH Score: 238994.5068490245
DB Score: 0.5000000297918739


In [13]:
emotet["Label"]=kmeans.labels_

In [14]:
emotet["Label"].value_counts()

Label
0    39862
1    39805
Name: count, dtype: int64

## Flag Formatting

In [7]:
# List of TCP flags
tcp_flag_mapping = [
    'FIN',
    'SYN',
    'RST',
    'PSH',
    'ACK',
    'URG',
    'ECE',
    'CWR',
    'NS',
]

# Function to format the flags
def format_flags(flags_str):
    # Capitalize the flags string first
    flags_str = flags_str.upper()

    # Initialize an empty list to store found flags
    formatted_flags = []

    # Check each individual flag from the mapping
    for flag in tcp_flag_mapping:
        if flag in flags_str:
            formatted_flags.append(flag)

    # Join the found flags with commas
    return ','.join(formatted_flags)

# Apply the format_flags function to the "flags" column
emotet['Flags'] = emotet['Flags'].apply(format_flags)

In [8]:
emotet

Unnamed: 0,Duration,Source IP,Destination IP,Source Port,Destination Port,Protocol,Flags,Packets,Bytes,Mean Payload Size,Std Payload Size,Min Payload Size,Max Payload Size,Mean Entropy,Min Entropy,Max Entropy,Mean Inter-Packet Interval,Min Inter-Packet Interval,Max Inter-Packet Interval,Label
0,0.003800,10.0.2.102,195.113.232.90,49157,80,TCP,"FIN,SYN,ACK",4,148,37.000000,5.196152,34,46,0.000000,0.000000,0.000000,0.001267,0.000128,0.002249,Benign
1,0.002695,195.113.232.90,10.0.2.102,80,49157,TCP,"FIN,SYN,ACK",4,140,35.000000,1.732051,34,38,0.000000,0.000000,0.000000,0.000898,0.000457,0.001246,Benign
2,0.000000,10.0.2.102,195.113.232.90,49157,80,HTTP,"PSH,ACK",1,131,131.000000,0.000000,131,131,3.293545,3.293545,3.293545,0.000000,0.000000,0.000000,Benign
3,0.000000,195.113.232.90,10.0.2.102,80,49157,DATA-TEXT-LINES,"PSH,ACK",1,213,213.000000,0.000000,213,213,3.328824,3.328824,3.328824,0.000000,0.000000,0.000000,Benign
4,8.993695,10.0.2.102,68.169.52.10,49158,8080,TCP,SYN,3,134,44.666667,1.885618,42,46,0.000000,0.000000,0.000000,4.496848,2.996990,5.996705,Benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79662,9.001526,10.0.2.102,103.1.186.61,54564,8080,TCP,SYN,3,134,44.666667,1.885618,42,46,0.000000,0.000000,0.000000,4.500763,2.993013,6.008513,Benign
79663,16.209324,10.0.2.102,202.44.54.4,54565,8080,TCP,"SYN,ACK",4,148,37.000000,5.196152,34,46,0.000000,0.000000,0.000000,5.403108,0.230861,14.808685,Emotet
79664,15.978455,202.44.54.4,10.0.2.102,8080,54565,TCP,"FIN,SYN,ACK",3,106,35.333333,1.885618,34,38,0.000000,0.000000,0.000000,7.989228,0.000472,15.977983,Emotet
79665,0.000000,10.0.2.102,202.44.54.4,54565,8080,HTTP,"PSH,ACK",1,454,454.000000,0.000000,454,454,3.523060,3.523060,3.523060,0.000000,0.000000,0.000000,Emotet


## Saving to CSV

In [16]:
emotet.to_csv('../../../data/labelled/kmeans/emotet.csv', index=False)