In [1]:
%pip install --upgrade pip
%pip install --force-reinstall "nfstream==6.5.3" pandas

Collecting pip
  Downloading pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-24.3.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.3
    Uninstalling pip-23.3:
      Successfully uninstalled pip-23.3
Successfully installed pip-24.3.1
Note: you may need to restart the kernel to use updated packages.
Collecting nfstream==6.5.3
  Downloading nfstream-6.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (29 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting cffi>=1.15.0 (from nfstream==6.5.3)
  Downloading cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting psutil>=5.8.0 (from nfstream==6.5.3)
  Downloadin

In [2]:
import nfstream
print(nfstream.__version__)

6.5.3


In [10]:
import os

# Print the current directory content
print(os.listdir('work'))

['hfm.ipynb', 'things.txt', 'nonvpn_netflix_capture1.pcap']


In [None]:
from nfstream import NFStreamer, NFPlugin
from math import log2

def sample_entropy(X):
    p = {}
    # calculate frequencies
    for xi in X:
        p[xi] = p.get(xi, 0) + 1
    # normalize frequencies
    for xi in p:
        p[xi] /= len(X)

    # calculate Sample entropy
    return -sum(p[xi] * log2(p[xi] / len(X)) for xi in p)

class FingerprintPlugin(NFPlugin):
    def __init__(self, win_size, buf_size, u, t, sigma, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.payload_buffers = {}
        
        self.win_size = win_size
        self.buf_size = buf_size
        self.u = u
        self.t = t
        self.sigma = sigma

    def processing(self, packet, flow):
        # append the hex encoded payload to the flow
        # note: packet.ip_packet is a bytes object
        if len(self.payload_buffers[flow.id]) < self.buf_size:
            self.payload_buffers[flow.id] += packet.ip_packet.hex()
        
    def on_init(self, packet, flow):
        self.payload_buffers[flow.id] = ''
        self.processing(packet, flow)
        

    def on_update(self, packet, flow):
        self.processing(packet, flow)

    def on_expire(self, flow):
        # if len(self.payload_buffers[flow.id]) < self.buf_size:  # add padding to payload buffer if it does not exceed the buffer size
        #     self.payload_buffers[flow.id] += ['\0'.encode().hex()] * (self.buf_size - len(self.payload_buffers[flow.id]))
        actual_buf_size = min(self.buf_size, len(self.payload_buffers[flow.id]))

        count = 0
        Hf = []
        for i in range(actual_buf_size - self.win_size + 1):
            win_buffer = self.payload_buffers[flow.id][i : i + self.win_size]
            Hi = sample_entropy(win_buffer)
            Hf.append(Hi)
            if Hi > self.u - self.t * self.sigma:
                count += 1

        flow.udps.sus = (count == actual_buf_size - self.win_size + 1)
        flow.udps.Hf = Hf

# parameters
IDLE_TIMEOUT = 10000
ACTIVE_TIMEOUT = 100000
WIN_SIZE = 32  # Luo
BUF_SIZE = 1024  # during the analysis
U = 4.8817  # LUo
T = 3  # to gain 99.4% confidence
SIGMA = 0.08134
PATH = "work/test.pcapng"
# PATH = "work/nonvpn_netflix_capture1.pcap"
# PATH = "work/vpn_netflix_capture2.pcap"

df = NFStreamer(
    source=PATH,
    # how to handle packets of a flow
    decode_tunnels=True,
    idle_timeout=IDLE_TIMEOUT,
    active_timeout=ACTIVE_TIMEOUT,
    n_dissections=20,
    accounting_mode=1,
    # what to look for
    statistical_analysis=True,
    splt_analysis=20,
    udps=FingerprintPlugin(WIN_SIZE, BUF_SIZE, U, T, SIGMA),
).to_pandas()

df

Unnamed: 0,id,expiration_id,src_ip,src_mac,src_oui,src_port,dst_ip,dst_mac,dst_oui,dst_port,...,application_category_name,application_is_guessed,application_confidence,requested_server_name,client_fingerprint,server_fingerprint,user_agent,content_type,udps.sus,udps.Hf
0,0,0,2001:4c4e:1e84:3b00:4eb4:ccca:4c6c:989f,b4:8c:9d:a3:b7:2f,b4:8c:9d,59726,2604:a880:4:1d0::1f1:2000,10:d7:b0:9a:30:cd,10:d7:b0,80,...,Web,1,1,,,,,,True,"[8.211897470347699, 8.161596205521548, 8.22206..."
1,1,0,2001:4c4e:1e84:3b00:4eb4:ccca:4c6c:989f,b4:8c:9d:a3:b7:2f,b4:8c:9d,48002,2a00:1450:400d:80d::2003,10:d7:b0:9a:30:cd,10:d7:b0,80,...,Web,1,1,,,,,,True,"[8.007048827786958, 7.980337797403416, 8.04942..."
2,2,0,2001:4c4e:1e84:3b00:4eb4:ccca:4c6c:989f,b4:8c:9d:a3:b7:2f,b4:8c:9d,56968,2600:1f13:37c:1400:ba21:7165:5fc7:736e,10:d7:b0:9a:30:cd,10:d7:b0,80,...,Web,0,6,quietsilverlushtreasure.neverssl.com,,,Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Geck...,text/html,True,"[7.9275496777656045, 7.900838647382062, 7.9505..."
3,3,0,2001:4c4e:1e84:3b00:4eb4:ccca:4c6c:989f,b4:8c:9d:a3:b7:2f,b4:8c:9d,59720,2604:a880:4:1d0::1f1:2000,10:d7:b0:9a:30:cd,10:d7:b0,80,...,Web,0,6,httpforever.com,,,Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Geck...,text/html,True,"[8.078626767019873, 8.051915736636332, 8.09706..."


In [22]:
# test
df["udps.Hf"].describe()

count                                                     4
unique                                                    4
top       [8.211897470347699, 8.161596205521548, 8.22206...
freq                                                      1
Name: udps.Hf, dtype: object

In [None]:
# vpn_netflix_capture2
df["udps.Hf"].describe()

count                                                     1
unique                                                    1
top       [7.956747562960807, 7.910606130062574, 7.91060...
freq                                                      1
Name: udps.Hf, dtype: object

In [16]:
# nonvpn_netflix_capture1
df["udps.Hf"].describe()

count                                                   140
unique                                                    7
top       [8.117536532577267, 8.028959443322996, 7.99004...
freq                                                     22
Name: udps.Hf, dtype: object