In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic Bluetooth log data
num_samples = 5000  # Total logs
attack_ratio = 0.1  # 10% attack logs

# Features: Device ID, Connection Duration, Pairing Attempts, Unauthorized Access, RSSI Signal Strength
device_ids = np.random.randint(1000, 2000, num_samples)
connection_duration = np.random.randint(1, 300, num_samples)  # in seconds
pairing_attempts = np.random.randint(1, 5, num_samples)
unauthorized_access = np.random.choice([0, 1], size=num_samples, p=[0.95, 0.05])
rssi_signal = np.random.uniform(-90, -30, num_samples)  # RSSI (dBm)

# Generate attack logs (high pairing attempts, unauthorized access, abnormal connection time)
attack_indices = np.random.choice(range(num_samples), int(num_samples * attack_ratio), replace=False)
pairing_attempts[attack_indices] = np.random.randint(5, 15, len(attack_indices))
unauthorized_access[attack_indices] = 1
connection_duration[attack_indices] = np.random.randint(300, 600, len(attack_indices))

# Labels: 0 (Normal), 1 (Attack)
labels = np.zeros(num_samples)
labels[attack_indices] = 1

# Create DataFrame
bluetooth_logs = pd.DataFrame({
    'Device_ID': device_ids,
    'Connection_Duration': connection_duration,
    'Pairing_Attempts': pairing_attempts,
    'Unauthorized_Access': unauthorized_access,
    'RSSI_Signal': rssi_signal,
    'Label': labels  # 0 = Normal, 1 = Attack
})

# Select features and labels
features = ['Connection_Duration', 'Pairing_Attempts', 'Unauthorized_Access', 'RSSI_Signal']
X = bluetooth_logs[features].values
y = bluetooth_logs['Label'].values

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for Transformer (samples, timesteps, features)
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Define Transformer block
def transformer_block(inputs, num_heads=4, ff_dim=32, dropout=0.1):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return out2

# Model Input
inputs = Input(shape=(1, X_reshaped.shape[2]))
x = transformer_block(inputs)
x = GlobalAveragePooling1D()(x)
x = Dense(25, activation='relu')(x)
x = Dropout(0.2)(x)
outputs = Dense(1, activation='sigmoid')(x)

# Build Model
model = Model(inputs=inputs, outputs=outputs)

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - accuracy: 0.9245 - loss: 0.4086 - val_accuracy: 0.9960 - val_loss: 0.0724
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9980 - loss: 0.0611 - val_accuracy: 1.0000 - val_loss: 0.0137
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9987 - loss: 0.0208 - val_accuracy: 1.0000 - val_loss: 0.0061
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9985 - loss: 0.0115 - val_accuracy: 1.0000 - val_loss: 0.0029
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9994 - loss: 0.0074 - val_accuracy: 1.0000 - val_loss: 0.0035
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0046 - val_accuracy: 0.9990 - val_loss: 0.0019
Epoch 7/20
[1m125/125[0m 

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

# Load BrakTooth Attack Dataset
file_path = 'BrakTooth_Dataset.xlsx'  # Update with actual file path
data = pd.read_excel(file_path)

# Display dataset info
data.info()

# Select relevant features based on dataset columns
features = ['Length', 'Delta']  # Using numerical features
label_column = 'Type'  # Assuming 'Type' is the attack label

# Extract feature matrix and labels
X = data[features].values

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data[label_column])

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for Transformer (samples, timesteps, features)
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Define Transformer block
def transformer_block(inputs, num_heads=4, ff_dim=32, dropout=0.1):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return out2

# Model Input
inputs = Input(shape=(1, X_reshaped.shape[2]))
x = transformer_block(inputs)
x = GlobalAveragePooling1D()(x)
x = Dense(25, activation='relu')(x)
x = Dropout(0.2)(x)
outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)  # Multi-class classification

# Build Model
model = Model(inputs=inputs, outputs=outputs)

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9002 entries, 0 to 9001
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Protocol  9002 non-null   object 
 1   Info      9002 non-null   object 
 2   Length    9002 non-null   int64  
 3   Delta     9002 non-null   float64
 4   Type      9002 non-null   object 
dtypes: float64(1), int64(1), object(3)
memory usage: 351.8+ KB
Epoch 1/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.3721 - loss: 2.0856 - val_accuracy: 0.6891 - val_loss: 1.0926
Epoch 2/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6952 - loss: 1.1307 - val_accuracy: 0.6891 - val_loss: 1.0550
Epoch 3/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6944 - loss: 1.0893 - val_accuracy: 0.6891 - val_loss: 1.0512
Epoch 4/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Conv1D, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

# Load BrakTooth Attack Dataset
file_path = 'BrakTooth_Dataset.xlsx'  # Update with actual file path
data = pd.read_excel(file_path)

# Display dataset info
data.info()

# Select relevant features based on dataset columns
features = ['Length', 'Delta']  # Using numerical features
label_column = 'Type'  # Assuming 'Type' is the attack label

# Extract feature matrix and labels
X = data[features].values

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data[label_column])

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for Transformer (samples, timesteps, features)
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Define Transformer block
def transformer_block(inputs, num_heads=8, ff_dim=64, dropout=0.2):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return out2

# Model Input
inputs = Input(shape=(1, X_reshaped.shape[2]))
x = Conv1D(filters=32, kernel_size=1, activation='relu')(inputs)  # Added CNN layer for feature extraction
x = BatchNormalization()(x)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dense(50, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)  # Multi-class classification

# Build Model
model = Model(inputs=inputs, outputs=outputs)

# Compile model with improved optimizer and learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model with increased epochs and batch size
model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9002 entries, 0 to 9001
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Protocol  9002 non-null   object 
 1   Info      9002 non-null   object 
 2   Length    9002 non-null   int64  
 3   Delta     9002 non-null   float64
 4   Type      9002 non-null   object 
dtypes: float64(1), int64(1), object(3)
memory usage: 351.8+ KB
Epoch 1/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.5394 - loss: 1.6462 - val_accuracy: 0.6891 - val_loss: 1.0803
Epoch 2/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - accuracy: 0.6892 - loss: 1.0929 - val_accuracy: 0.6891 - val_loss: 1.0674
Epoch 3/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.6995 - loss: 1.0597 - val_accuracy: 0.6891 - val_loss: 1.0531
Epoch 4/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [None]:
!pip install pyshark

Collecting pyshark
  Downloading pyshark-0.6-py3-none-any.whl.metadata (806 bytes)
Collecting appdirs (from pyshark)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Downloading pyshark-0.6-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: appdirs, pyshark
Successfully installed appdirs-1.4.4 pyshark-0.6


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
!unzip "/content/drive/My Drive/Bluebugging.zip" -d "/content/"

Archive:  /content/drive/My Drive/Bluebugging.zip
   creating: /content/Bluebugging/
   creating: /content/Bluebugging/train/
  inflating: /content/Bluebugging/train/Bluetooth_Benign_train.pcap  
  inflating: /content/Bluebugging/train/Bluetooth_DoS_train.pcap  
   creating: /content/Bluebugging/test/
  inflating: /content/Bluebugging/test/Bluetooth_Benign_test.pcap  
  inflating: /content/Bluebugging/test/Bluetooth_DoS_test.pcap  


In [None]:
!apt-get update
!apt-get install -y tshark


0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [8,784 kB]
Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,239 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:13 http://archive.ubuntu.com/ub

In [None]:
!setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /usr/bin/dumpcap


In [None]:
!tshark -v


Running as user "root" and group "root". This could be dangerous.
TShark (Wireshark) 3.6.2 (Git v3.6.2 packaged as 3.6.2-2)

Copyright 1998-2022 Gerald Combs <gerald@wireshark.org> and contributors.
License GPLv2+: GNU GPL version 2 or later <https://www.gnu.org/licenses/gpl-2.0.html>
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Compiled (64-bit) using GCC 11.2.0, with libpcap, with POSIX capabilities
(Linux), with libnl 3, with GLib 2.71.2, with zlib 1.2.11, with Lua 5.2.4, with
GnuTLS 3.7.3 and PKCS #11 support, with Gcrypt 1.9.4, with MIT Kerberos, with
MaxMind DB resolver, with nghttp2 1.43.0, with brotli, with LZ4, with Zstandard,
with Snappy, with libxml2 2.9.12, with libsmi 0.4.8.

Running on Linux 6.1.85+, with Intel(R) Xeon(R) CPU @ 2.20GHz (with SSE4.2),
with 12978 MB of physical memory, with GLib 2.72.4, with zlib 1.2.11, with
libpcap 1.10.1 (with TPACKET_V3), with c-are

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Conv1D, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import pyshark
import os
import nest_asyncio

nest_asyncio.apply()

# Function to extract features from PCAP file
def extract_pcap_features(pcap_file, label):
    cap = pyshark.FileCapture(pcap_file, display_filter="bluetooth")
    data = []

    for pkt in cap:
        try:
            protocol = pkt.highest_layer
            length = int(pkt.length)
            timestamp = float(pkt.sniff_time.timestamp())

            data.append([protocol, length, timestamp, label])
        except AttributeError:
            continue

    cap.close()
    return pd.DataFrame(data, columns=['Protocol', 'Length', 'Timestamp', 'Label'])

# Define dataset paths
data_folder = "/content/Bluebugging/"
train_files = {
    "Benign": os.path.join(data_folder, "train", "Bluetooth_Benign_train.pcap"),
    "Attack": os.path.join(data_folder, "train", "Bluetooth_DoS_train.pcap")
}
test_files = {
    "Benign": os.path.join(data_folder, "test", "Bluetooth_Benign_test.pcap"),
    "Attack": os.path.join(data_folder, "test", "Bluetooth_DoS_test.pcap")
}

# Process train data
data_train = pd.concat([
    extract_pcap_features(train_files["Benign"], "Benign"),
    extract_pcap_features(train_files["Attack"], "Attack")
])

# Process test data
data_test = pd.concat([
    extract_pcap_features(test_files["Benign"], "Benign"),
    extract_pcap_features(test_files["Attack"], "Attack")
])

# Select relevant features
features = ['Length', 'Timestamp']  # Numerical features
label_column = 'Label'  # Classification target

# Encode labels
label_encoder = LabelEncoder()
data_train[label_column] = label_encoder.fit_transform(data_train[label_column])
data_test[label_column] = label_encoder.transform(data_test[label_column])

# Extract feature matrix and labels
X_train = data_train[features].values
y_train = data_train[label_column].values
X_test = data_test[features].values
y_test = data_test[label_column].values

# Normalize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape for Transformer (samples, timesteps, features)
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Define Transformer block
def transformer_block(inputs, num_heads=8, ff_dim=64, dropout=0.2):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return out2

# Model Input
inputs = Input(shape=(1, X_train_reshaped.shape[2]))
x = Conv1D(filters=32, kernel_size=1, activation='relu')(inputs)  # Added CNN layer for feature extraction
x = BatchNormalization()(x)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dense(50, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)  # Binary classification

# Build Model
model = Model(inputs=inputs, outputs=outputs)

# Compile model with improved optimizer and learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model with increased epochs and batch size
model.fit(X_train_reshaped, y_train, epochs=50, batch_size=64, validation_data=(X_test_reshaped, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test_reshaped, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Epoch 1/50
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 16ms/step - accuracy: 0.9944 - loss: 0.0227 - val_accuracy: 0.7941 - val_loss: 1.8883
Epoch 2/50
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 15ms/step - accuracy: 0.9993 - loss: 0.0045 - val_accuracy: 0.7941 - val_loss: 1.2718
Epoch 3/50
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 16ms/step - accuracy: 0.9995 - loss: 0.0034 - val_accuracy: 0.7940 - val_loss: 1.1927
Epoch 4/50
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 15ms/step - accuracy: 0.9995 - loss: 0.0033 - val_accuracy: 0.8585 - val_loss: 0.7142
Epoch 5/50
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m291s[0m 15ms/step - accuracy: 0.9996 - loss: 0.0012 - val_accuracy: 0.9788 - val_loss: 0.3689
Epoch 6/50
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 15ms/step - accuracy: 0.9998 - loss: 8.8655e-04 - val_accuracy: 0.9789 -

KeyboardInterrupt: 

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Conv1D, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import pyshark
import os
import nest_asyncio

nest_asyncio.apply()

def extract_pcap_features(pcap_file, label):
    cap = pyshark.FileCapture(pcap_file, display_filter="bluetooth")
    data = []

    for pkt in cap:
        try:
            protocol = pkt.highest_layer
            length = int(pkt.length)
            timestamp = float(pkt.sniff_time.timestamp())

            data.append([protocol, length, timestamp, label])
        except AttributeError:
            continue

    cap.close()
    return pd.DataFrame(data, columns=['Protocol', 'Length', 'Timestamp', 'Label'])

# Define dataset paths
data_folder = "/content/Bluebugging/"
train_files = {
    "Benign": os.path.join(data_folder, "train", "Bluetooth_Benign_train.pcap"),
    "Attack": os.path.join(data_folder, "train", "Bluetooth_DoS_train.pcap")
}
test_files = {
    "Benign": os.path.join(data_folder, "test", "Bluetooth_Benign_test.pcap"),
    "Attack": os.path.join(data_folder, "test", "Bluetooth_DoS_test.pcap")
}

# Process train data
data_train = pd.concat([
    extract_pcap_features(train_files["Benign"], "Benign"),
    extract_pcap_features(train_files["Attack"], "Attack")
])

# Process test data
data_test = pd.concat([
    extract_pcap_features(test_files["Benign"], "Benign"),
    extract_pcap_features(test_files["Attack"], "Attack")
])

# Select relevant features
features = ['Length', 'Timestamp']  # Numerical features
label_column = 'Label'  # Classification target

# Encode labels
label_encoder = LabelEncoder()
data_train[label_column] = label_encoder.fit_transform(data_train[label_column])
data_test[label_column] = label_encoder.transform(data_test[label_column])

# Extract feature matrix and labels
X_train = data_train[features].values
y_train = data_train[label_column].values
X_test = data_test[features].values
y_test = data_test[label_column].values

# Normalize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape for Transformer (samples, timesteps, features)
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Define Transformer block
def transformer_block(inputs, num_heads=8, ff_dim=64, dropout=0.2):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return out2

# Model Input
inputs = Input(shape=(1, X_train_reshaped.shape[2]))
x = Conv1D(filters=32, kernel_size=1, activation='relu')(inputs)  # Added CNN layer for feature extraction
x = BatchNormalization()(x)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dense(50, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)  # Binary classification

# Build Model
model = Model(inputs=inputs, outputs=outputs)

# Compile model with improved optimizer and learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model with increased epochs and batch size
model.fit(X_train_reshaped, y_train, epochs=10, batch_size=64, validation_data=(X_test_reshaped, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test_reshaped, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Epoch 1/10
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m326s[0m 17ms/step - accuracy: 0.9935 - loss: 0.0228 - val_accuracy: 0.7940 - val_loss: 1.7062
Epoch 2/10
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 16ms/step - accuracy: 0.9991 - loss: 0.0054 - val_accuracy: 0.7939 - val_loss: 1.3847
Epoch 3/10
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 16ms/step - accuracy: 0.9991 - loss: 0.0047 - val_accuracy: 0.7939 - val_loss: 0.4354
Epoch 4/10
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 16ms/step - accuracy: 0.9995 - loss: 0.0028 - val_accuracy: 0.9788 - val_loss: 0.3632
Epoch 5/10
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 16ms/step - accuracy: 0.9996 - loss: 0.0017 - val_accuracy: 0.7939 - val_loss: 1.2525
Epoch 6/10
[1m18999/18999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 16ms/step - accuracy: 0.9996 - loss: 0.0017 - val_accuracy: 0.7941 - val