# Fingerprinting

In [17]:
# Imports

import psutil
import numpy as np
import pyshark
import tarfile
import nest_asyncio
nest_asyncio.apply()
from collections import Counter
import json
from scipy.stats import entropy
import copy
from sklearn.preprocessing import LabelEncoder

from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import DBSCAN

## 0 Current RAM usage monitoring

In [2]:
class RamUsage:
    def __init__(self):
        self.memory_info: float
        
    def __call__(self):
        """
        Return Occupied RAM in GiB
        """
        self.memory_info = psutil.Process().memory_info().rss / (1024 * 1024 * 1024)
        return self.memory_info

    def __str__(self):
        _ = self.__call__()
        return f"Current RAM usage: {self.memory_info:.2f} / 24 GB ({self.memory_info/24*100:.1f} %)"
ram = RamUsage()
print(ram)

Current RAM usage: 0.15 / 24 GB (0.6 %)


## 1. Data loading and feature extraction

In [4]:
class TorFiles:
    def __init__(self, file_directory_train: str, file_directory_test: str):
        self.file_directory_train = file_directory_train
        self.file_directory_test = file_directory_test
        self.filenames_train: list
        self.filenames_test: list
        self.nb_file_train: int
        self.nb_file_test: int
        self.single_cap_features: dict
        self.all_cap_features_train: dict
        self.all_cap_features_test: dict
        self.final_features: np.ndarray

    def get_filenames(self, train=True):
        if train:
            with tarfile.open(self.file_directory_train, 'r:gz') as tar:
                filenames = tar.getnames()
            return filenames
        else:
            with tarfile.open(self.file_directory_test, 'r:gz') as tar:
                filenames = tar.getnames()
                if filenames[0] == 'test_data':
                    filenames = filenames[1:]
            return filenames

    def __call__(self, mode: str = 'train'):
        self.filenames_train = self.get_filenames(train=True)
        self.filenames_test = self.get_filenames(train=False)
        self.nb_file_train = len(self.filenames_train)
        self.nb_file_test = len(self.filenames_test)
            
    def __str__(self):
        return f"Number of training example: {self.nb_file_train}\nNumber of test example: {self.nb_file_test}\n{ram.__str__()}"

    def init_cap_features(self, single=True, train=True):
        """
        Initialize the features dictionary 'self.single_cap_features' if single=True, 'self.all_cap_features(train/test)' if single=False
        """
        keys = [
            'src_ip', 'dst_ip', 'src_port', 'dst_port', 'transport_protocol', 'application_protocol', 'packet_length_mean', 'packet_length_std', 
            'packet_length_entropy', 'payload_length_mean', 'payload_length_std', 'payload_length_entropy', 'tcp_flags', 'ip_options', 'dns_query', 
            'dns_response', 'http_method', 'http_status_code', 'http_user_agent', 'http_host', 'tls_handshake', 'inter_arrival_time_mean', 
            'inter_arrival_time_std', 'window_size_mean', 'window_size_std', 'window_size_entropy', 'rtt_mean', 'rtt_std', 'jitter_mean', 'jitter_std', 'packet_loss',
            'retransmission', 'syn_ack_flags', 'icmp_type', 'icmp_code', 
            'dns_query_type', 'dns_response_code', 'http_content_type', 'http_content_length_mean', 'http_content_length_std', 'tls_version', 
            'tls_cipher_suite', 'ip_ttl_mean', 'ip_ttl_std', 'ip_ttl_entropy', 'ip_fragmentation', 'tcp_sequence_number_mean', 
            'tcp_sequence_number_std', 'tcp_acknowledgment_number_mean', 'tcp_acknowledgment_number_std', 'udp_checksum_mean', 'udp_checksum_std', 
            'icmp_checksum_mean', 'icmp_checksum_std',
        ]
        values_single = [[] for i in range(len(keys))]
        values_all = [Counter({}) for i in range(len(keys))]
        if train:
            if single:
                self.single_cap_features = dict(zip(keys, values_single))
            else:
                self.all_cap_features_train = dict(zip(keys, values_all))
        else:
            if single:
                self.single_cap_features = dict(zip(keys, values_single))
            else:
                self.all_cap_features_test = dict(zip(keys, values_all))
        
    def add_packet_features(self, packet, previous_timestamp=None):
        """
        For a given packet, append the extracted features (1 word/int per feature) to the lists in the dict 'single_packet_features'
        """
        single_packet_features = {
            'src_ip': packet.ip.src if 'IP' in packet else '', # BoW (with the whole string, default option)
            'dst_ip': packet.ip.dst if 'IP' in packet else '', # BoW
            'src_port': packet.tcp.srcport if 'TCP' in packet else (packet.udp.srcport if 'UDP' in packet else ''), # BoW
            'dst_port': packet.tcp.dstport if 'TCP' in packet else (packet.udp.dstport if 'UDP' in packet else ''), # BoW
            'transport_protocol': packet.transport_layer if 'IP' in packet else '', # BoW
            'application_protocol': packet.highest_layer if 'IP' in packet else '', # BoW
            'packet_length_mean': int(packet.length), # Mean + distribution entropy
            'packet_length_std': int(packet.length),
            'packet_length_entropy': int(packet.length),
            'payload_length_mean': len(packet) if hasattr(packet, 'data') else 0, # Mean + distribution entropy
            'payload_length_std': len(packet) if hasattr(packet, 'data') else 0,
            'payload_length_entropy': len(packet) if hasattr(packet, 'data') else 0,
            'tcp_flags': packet.tcp.flags if 'TCP' in packet else '', # BoW
            'ip_options': 'True' if 'IP' in packet and hasattr(packet.ip, 'options') else 'False', #None if not  else None), # BoW
            'dns_query': packet.dns.qry_name if 'DNS' in packet else '', # BoW
            'dns_response': packet.dns.resp_name if 'DNS' in packet else '', # BoW
            'http_method': packet.http.request_method if 'HTTP' in packet else '', # BoW
            'http_status_code': packet.http.response_code if 'HTTP' in packet else '', # BoW
            'http_user_agent': packet.http.user_agent if 'HTTP' in packet else '', # BoW with 'word' and n=3?n=10?
            'http_host': packet.http.host if 'HTTP' in packet else '', # BoW
            'tls_handshake': packet.tls.handshake if 'TLS' in packet and hasattr(packet.tls, 'handshake') else '', # BoW
            'tls_certificates': packet.tls.certificates if 'TLS' in packet and hasattr(packet.tls, 'certificates') else '', # BoW
            'inter_arrival_time_mean': float(packet.sniff_timestamp) - float(previous_timestamp) if previous_timestamp else 0, # Mean + distribution entropy
            'inter_arrival_time_std': float(packet.sniff_timestamp) - float(previous_timestamp) if previous_timestamp else 0,
            # Mistral's new features
            'window_size_mean': packet.tcp.window_size if 'TCP' in packet and hasattr(packet.tcp, 'window_size') else '',
            'window_size_std': packet.tcp.window_size if 'TCP' in packet and hasattr(packet.tcp, 'window_size') else '',
            'window_size_entropy': packet.tcp.window_size if 'TCP' in packet and hasattr(packet.tcp, 'window_size') else '',
            'rtt_mean': packet.tcp.analysis_ack_rtt if 'TCP' in packet and hasattr(packet.tcp, 'analysis_ack_rtt') else '',
            'rtt_std': packet.tcp.analysis_ack_rtt if 'TCP' in packet and hasattr(packet.tcp, 'analysis_ack_rtt') else '',
            'jitter_mean': packet.tcp.analysis_ack_rtt_var if 'TCP' in packet and hasattr(packet.tcp, 'analysis_ack_rtt_var') else '',
            'jitter_std': packet.tcp.analysis_ack_rtt_var if 'TCP' in packet and hasattr(packet.tcp, 'analysis_ack_rtt_var') else '',
            'packet_loss': 'True' if 'TCP' in packet and hasattr(packet.tcp, 'analysis_lost_segment') else 'False',
            'retransmission': 'True' if 'TCP' in packet and hasattr(packet.tcp, 'analysis_retransmission') else 'False',
            'syn_ack_flags': packet.tcp.flags_syn if 'TCP' in packet and hasattr(packet.tcp, 'flags_syn') else '',
            'icmp_type': packet.icmp.type if 'ICMP' in packet and hasattr(packet.icmp, 'type') else '',
            'icmp_code': packet.icmp.code if 'ICMP' in packet and hasattr(packet.icmp, 'code') else '',
            'dns_query_type': packet.dns.qry_type if 'DNS' in packet and hasattr(packet.dns, 'qry_type') else '',
            'dns_response_code': packet.dns.resp_code if 'DNS' in packet and hasattr(packet.dns, 'resp_code') else '',
            'http_content_type': packet.http.content_type if 'HTTP' in packet and hasattr(packet.http, 'content_type') else '',
            'http_content_length_mean': packet.http.content_length if 'HTTP' in packet and hasattr(packet.http, 'content_length') else '',
            'http_content_length_std': packet.http.content_length if 'HTTP' in packet and hasattr(packet.http, 'content_length') else '',
            'tls_version': packet.tls.version if 'TLS' in packet and hasattr(packet.tls, 'version') else '',
            'tls_cipher_suite': packet.tls.cipher_suite if 'TLS' in packet and hasattr(packet.tls, 'cipher_suite') else '',
            'ip_ttl_mean': packet.ip.ttl if 'IP' in packet and hasattr(packet.ip, 'ttl') else '',
            'ip_ttl_std': packet.ip.ttl if 'IP' in packet and hasattr(packet.ip, 'ttl') else '',
            'ip_ttl_entropy': packet.ip.ttl if 'IP' in packet and hasattr(packet.ip, 'ttl') else '',
            'ip_fragmentation': packet.ip.frag_offset if 'IP' in packet and hasattr(packet.ip, 'frag_offset') else '',
            'tcp_sequence_number_mean': packet.tcp.seq if 'TCP' in packet and hasattr(packet.tcp, 'seq') else '',
            'tcp_sequence_number_std': packet.tcp.seq if 'TCP' in packet and hasattr(packet.tcp, 'seq') else '',
            'tcp_acknowledgment_number_mean': packet.tcp.ack if 'TCP' in packet and hasattr(packet.tcp, 'ack') else '',
            'tcp_acknowledgment_number_std': packet.tcp.ack if 'TCP' in packet and hasattr(packet.tcp, 'ack') else '',
            'udp_checksum_mean': packet.udp.checksum if 'UDP' in packet and hasattr(packet.udp, 'checksum') else '',
            'udp_checksum_std': packet.udp.checksum if 'UDP' in packet and hasattr(packet.udp, 'checksum') else '',
            'icmp_checksum_mean': packet.icmp.checksum if 'ICMP' in packet and hasattr(packet.icmp, 'checksum') else '',
            'icmp_checksum_std': packet.icmp.checksum if 'ICMP' in packet and hasattr(packet.icmp, 'checksum') else '',
        }
        for key in self.single_cap_features.keys():
            self.single_cap_features[key].append(single_packet_features[key])

    def read_single_pcap(self, filename, train=True):
        """
        Compute, for the capture associated with a filename, a dict of Counter - each Counter corresponds to a feature e.g. 'src_ip'
        Each Counter gives the number of time each feature has been found in the packets, e.g. '192.168.1.0': 400
        """
        capture = pyshark.FileCapture(filename)
        # Initialize an empty dict for self.single_cap_features
        self.init_cap_features(single=True, train=train)
        previous_timestamp = None
        for i, packet in enumerate(capture):
            # Get features for a single packet
            self.add_packet_features(packet, previous_timestamp)
            previous_timestamp = packet.sniff_timestamp
        # Now we have features for all the packets of a single capture. Let's do a bit of post-processing
        self.single_cap_features["packet_length_max_value"] = max(self.single_cap_features["packet_length_mean"])
        self.single_cap_features["flow_length_value"] = sum(self.single_cap_features["packet_length_mean"])
        self.single_cap_features["packet_count_value"] = i
        self.single_cap_features["flow_time_value"] = sum(self.single_cap_features["inter_arrival_time_mean"])
        # Post-processing of mean/std/entropy other intersting features
        for key in self.single_cap_features.keys():
            if key == 'retransmission' and (np.array(self.single_cap_features[key]) == 'True').any():
                print(f"One {key} here.")
            if key == 'packet_loss' and (np.array(self.single_cap_features[key]) == 'True').any():
                print(f"One {key} here.")
            if key.endswith('_mean'):
                # We compute the mean
                temp_array = np.array(self.single_cap_features[key])
                # Deal with dtype=str case
                if np.issubdtype(temp_array.dtype, np.unicode_):
                    # Remove empty substring
                    temp_array = temp_array[temp_array != '']
                    self.single_cap_features[key] = temp_array.astype(float).mean() if not len(temp_array) == 0 else 0
                # dtype is not string
                else:
                    self.single_cap_features[key] = np.array(self.single_cap_features[key]).mean()
            elif key.endswith('_std'):
                # We compute the Standart Deviation
                temp_array = np.array(self.single_cap_features[key])
                # Deal with dtype=str case
                if np.issubdtype(temp_array.dtype, np.unicode_):
                    # Remove empty substring
                    temp_array = temp_array[temp_array != '']
                    self.single_cap_features[key] = temp_array.astype(float).std() if not len(temp_array) == 0 else 0
                # dtype is not string
                else:
                    self.single_cap_features[key] = np.array(self.single_cap_features[key]).std()
            elif key.endswith('_value'):
                pass
            else:
                # We the count for each feature
                counter = Counter(self.single_cap_features[key])
                self.single_cap_features[key] = counter
                if key.endswith('_entropy'):
                    # We compute the Shannon entropy
                    prob = np.array(list(self.single_cap_features[key].values())) / sum(self.single_cap_features[key].values())
                    self.single_cap_features[key] = entropy(prob)

    def extract_features(self, final_features_filename: str, train: bool = True, load: bool = False):
        """
        Create a feature dictionary count for each file/capture and then saves it
        If load=True, don't compute anything. Just loads the dict from the given filename
        """
        if train:
            if load:
                self.final_features_train = np.load(final_features_filename)
            else:
                intermediate_features = []
                self.init_cap_features(single=False, train=train)
                for i, filename in enumerate(self.filenames_train):
                    self.read_single_pcap(filename, train=train)
                    intermediate_features.append(copy.deepcopy(self.single_cap_features))
                    for key in self.all_cap_features_train.keys():
                        if not key.endswith(('_mean', '_std', '_entropy', '_value')):
                            self.all_cap_features_train[key] += self.single_cap_features[key]
                    
                # We now extracted all the features of all the captures
                # The mean/std/entropy/value (one per capture) are ready
                # For the other features, we now have one dict for each feature for each feature, while we would like a vector -> vectorize
                # We have all the possible keys for each feature stored in 'self.all_cap_features_train'
                final_features_train = [np.empty(0) for _ in range(self.nb_file_train)]
                print('In the final post-processing loop of self.extract_features')
                for key in self.all_cap_features_train.keys():
                    if key.endswith(('_mean', '_std', '_entropy')): # these keys have already been post-processed
                        for i, single_file_features in enumerate(intermediate_features):
                            value = single_file_features[key]
                            final_features_train[i] = np.concatenate((final_features_train[i], np.array([value])))
                    else:
                        le_key = LabelEncoder()
                        le_key.fit(list(self.all_cap_features_train[key].keys())) # self.all_cap_features_train['src_ip']: Counter({'134.169.109.51': 2188, '134.169.109.25': 1644})
                        for i, single_file_features in enumerate(intermediate_features):
                            single_file_feature = single_file_features[key]
                            final_single_capture = np.zeros(len(le_key.classes_))
                            for sub_key in single_file_feature.keys():
                                encoded_class = le_key.transform([sub_key])
                                final_single_capture[encoded_class] = single_file_feature[sub_key]
                            final_features_train[i] = np.concatenate((final_features_train[i], copy.deepcopy(final_single_capture)))
    
                self.final_features_train = np.array(final_features_train)
                np.save(final_features_filename, self.final_features_train)
                print(f'{self.final_features_train.shape = }')

        # Test mode
        else:
            if load:
                self.final_features_test = np.load(final_features_filename)
            else:
                intermediate_features = []
                self.init_cap_features(single=False, train=train)
                for i, filename in enumerate(self.filenames_test):
                    print(f'{i = }, {filename = }')
                    self.read_single_pcap(filename, train=train)
                    intermediate_features.append(copy.deepcopy(self.single_cap_features))
                    # We don't need it for test, since our keys are already in self.all_cap_features_train
                    # for key in self.all_cap_features_test.keys():
                    #     if not key.endswith(('_mean', '_std', '_entropy', '_value')):
                    #         self.all_cap_features_test[key] += self.single_cap_features[key]
                    
                # We now extracted all the features of all the captures
                # The mean/std/entropy/value (one per capture) are ready
                # For the other features, we now have one dict for each feature for each feature, while we would like a vector -> vectorize
                # We have all the possible keys for each feature stored in 'self.all_cap_features_train' (not all_cap_features_test!)
                final_features_test = [np.empty(0) for _ in range(self.nb_file_test)]
                print('In the final post-processing loop of self.extract_features')
                for key in self.all_cap_features_train.keys():
                    if key.endswith(('_mean', '_std', '_entropy', '_value')): # these keys have already been post-processed
                        for i, single_file_features in enumerate(intermediate_features):
                            value = single_file_features[key]
                            final_features_test[i] = np.concatenate((final_features_test[i], np.array([value])))
                    else:
                        le_key = LabelEncoder()
                        le_key.fit(list(self.all_cap_features_train[key].keys())) # self.all_cap_features_train['src_ip']: Counter({'134.169.109.51': 2188, '134.169.109.25': 1644})
                        for i, single_file_features in enumerate(intermediate_features):
                            single_file_feature = single_file_features[key]
                            final_single_capture = np.zeros(len(le_key.classes_))
                            for sub_key in single_file_feature.keys():
                                if sub_key in le_key.classes_:
                                    encoded_class = le_key.transform([sub_key])
                                    final_single_capture[encoded_class] = single_file_feature[sub_key]
                            final_features_test[i] = np.concatenate((final_features_test[i], copy.deepcopy(final_single_capture)))
    
                self.final_features_test = np.array(final_features_test)
                np.save(final_features_filename, self.final_features_test)
                print(f'{self.final_features_test.shape = }')

torfiles = TorFiles('open-train.tar.gz', 'open-test.tar.gz')
torfiles()
print(torfiles)

Number of training example: 590
Number of test example: 591
Current RAM usage: 0.15 / 24 GB (0.6 %)


In [5]:
# torfiles.extract_features('final_features_train.npy', train=True, load=False)
torfiles.extract_features('final_features_train_10.npy', train=True, load=False)

i = 0, filename = 'train/108.pcap', label = ''
i = 1, filename = 'train/109.pcap', label = ''
i = 2, filename = 'train/11.pcap', label = ''
i = 3, filename = 'train/110.pcap', label = ''
One retransmission here.
i = 4, filename = 'train/111.pcap', label = ''
i = 5, filename = 'train/112.pcap', label = ''
i = 6, filename = 'train/113.pcap', label = ''
i = 7, filename = 'train/114.pcap', label = ''
i = 8, filename = 'train/115.pcap', label = ''
i = 9, filename = 'train/116.pcap', label = ''
i = 10, filename = 'train/117.pcap', label = ''
i = 11, filename = 'train/118.pcap', label = ''
i = 12, filename = 'train/119.pcap', label = ''
i = 13, filename = 'train/12.pcap', label = ''
i = 14, filename = 'train/120.pcap', label = ''
i = 15, filename = 'train/121.pcap', label = ''
i = 16, filename = 'train/122.pcap', label = ''
i = 17, filename = 'train/123.pcap', label = ''
i = 18, filename = 'train/124.pcap', label = ''
i = 19, filename = 'train/125.pcap', label = ''
i = 20, filename = 'train/1

## 2. Model training and selection

In [71]:
###
# One-Class SVM
###

# ocsvm_model = OneClassSVM()
ocsvm_model = OneClassSVM(nu=0.5, kernel="rbf", gamma='scale')
ocsvm_model.fit(torfiles.final_features_train)
y_pred0_oc = ocsvm_model.predict(torfiles.final_features_train)
# Get the features of task 1, that is the censored websites, to see if they all are detcted as outliers
# y_pred1 = ocsvm_model.predict(np.concatenate((np.load('final_features_train1.npy'), np.load('final_features_test1.npy'))))
# print(f'Accuracy of {"One-Class SVM"}: {((y_pred0==0).sum() + (y_pred1==1).sum()) / (len(y_pred0)+len(y_pred1)) * 100:.2f}%')

###
# Isolation Forest
###

if_model = IsolationForest(contamination=0.1)
if_model.fit(torfiles.final_features_train)
y_pred0_if = if_model.predict(torfiles.final_features_train)
# Get the features of task 1, that is the censored websites, to see if they all are detcted as outliers
# y_pred1 = if_model.predict(np.concatenate((np.load('final_features_train1.npy'), np.load('final_features_test1.npy'))))
# print(f'Accuracy of {"Isolation Forest"}: {((y_pred0==0).sum() + (y_pred1==1).sum()) / (len(y_pred0)+len(y_pred1)) * 100:.2f}%')

###
# Local Outlier Factor (LOF)
###

# lof_model = LocalOutlierFactor(n_neighbors=600, contamination=0.0001)
lof_model = LocalOutlierFactor(n_neighbors=20, contamination=0.0001, novelty=True)
lof_model.fit(torfiles.final_features_train)
y_pred0 = lof_model.predict(torfiles.final_features_train)
# Get the features of task 1, that is the censored websites, to see if they all are detcted as outliers
# y_pred1 = lof_model.predict(np.concatenate((np.load('final_features_train1.npy'), np.load('final_features_test1.npy'))))
# print(f'Accuracy of {"Local Outlier Factor (LOF)"}: {((y_pred0==0).sum() + (y_pred1==1).sum()) / (len(y_pred0)+len(y_pred1)) * 100:.2f}%')

###
# DBSCAN
###

# dbscan_model = DBSCAN(eps=.1)
# dbscan_model.fit(torfiles.final_features_train)
# y_pred0 = dbscan_model.predict(torfiles.final_features_train)
# Get the features of task 1, that is the censored websites, to see if they all are detcted as outliers
# y_pred1 = dbscan_model.predict(np.concatenate((np.load('final_features_train1.npy'), np.load('final_features_test1.npy'))))
# print(f'Accuracy of {"DBSCAN"}: {((y_pred0==0).sum() + (y_pred1==1).sum()) / (len(y_pred0)+len(y_pred1)) * 100:.2f}%')

## 3. Predition on test dataset + save submission

In [72]:
(y_pred0_oc == -1).sum()

296

In [73]:
##### Extraction of test features #####

# torfiles.extract_features('final_features_test.npy', train=False, load=False)
# torfiles.extract_features('final_features_test_2.npy', train=False, load=False)

##### Predict of test dataset  #####

X_test = features_test
y_pred = ocsvm_model.predict(X_test)

# Code specific to unit4-fingerprint.ipynb
# with open('class_dict.json', 'r') as file:
#     class_encoder_lukas = json.load(file)
# y_pred = torfiles.decode_labels(y_pred)
# y_pred = [class_encoder_lukas[website] for website in y_pred]
# y_pred = np.array(y_pred)

In [74]:
(y_pred == -1).sum()

307

In [75]:
##### Save submission #####

submission = []
# Write the prediction as expected output
with tarfile.open('open-test.tar.gz', 'r:gz') as tar:
    filenames = tar.getnames()
    if filenames[0] == 'test':
        filenames = filenames[1:]
filenames_test = filenames
print(len(filenames_test))

for i, filename in enumerate(filenames_test):
    submission += [filename[filename.find('/') + 1:] + ';' + ('1' if y_pred[i] == -1 else '0')]
# Save the output as a text file
print(f'Length of our submission: {len(submission)} | Length of zip file: {len(filenames_test)}')
np.savetxt('output_ocsvm_600_05.csv', np.array(submission), fmt='%s', delimiter=',')

600
Length of our submission: 600 | Length of zip file: 600


### Deal with 10 missing files pb

In [37]:
### Saved
final_features_test = torfiles.final_features_test

In [39]:
with tarfile.open('open-test.tar.gz', 'r:gz') as tar:
    filenames = tar.getnames()
    if filenames[0] == 'test':
        filenames = filenames[1:]

In [40]:
filenames_rem = filenames[:9]
torfiles.filenames_test = filenames_rem
torfiles.filenames_test

['test/1.pcap',
 'test/10.pcap',
 'test/100.pcap',
 'test/101.pcap',
 'test/102.pcap',
 'test/103.pcap',
 'test/104.pcap',
 'test/105.pcap',
 'test/106.pcap']

In [41]:
torfiles.nb_file_test = 9

In [42]:
torfiles.extract_features('final_features_test_rem.npy', train=False, load=False)

i = 0, filename = 'test/1.pcap'
i = 1, filename = 'test/10.pcap'
i = 2, filename = 'test/100.pcap'
i = 3, filename = 'test/101.pcap'
i = 4, filename = 'test/102.pcap'
i = 5, filename = 'test/103.pcap'
i = 6, filename = 'test/104.pcap'
i = 7, filename = 'test/105.pcap'
i = 8, filename = 'test/106.pcap'
In the final post-processing loop of self.extract_features
self.final_features_test.shape = (9, 2357)


In [43]:
final_features_test_rem = torfiles.final_features_test
final_features_test_rem.shape

(9, 2357)

In [45]:
features_test = np.concatenate((final_features_test_rem, final_features_test), axis=0)
features_test.shape

(600, 2357)

In [46]:
np.save('final_features_test_9_591.npy', features_test)