In [1]:
# Load MATLAB file containing the dataset into Python
import scipy.io
import urllib.request
import os
matlab_file = 'url.mat'

# Download the dataset from the following URL: https://www.sysnet.ucsd.edu/projects/url/url.mat

if(not os.path.exists(matlab_file)):
    download = 'https://www.sysnet.ucsd.edu/projects/url/url.mat'
    print('Downloading the dataset from the following URL: ', download )
    urllib.request.urlretrieve(download, matlab_file)
    print('The dataset has been downloaded successfully.')
# Save the dataset in the same directory as the Python script

print('Loading the MATLAB file containing the dataset...')

data = scipy.io.loadmat(matlab_file)

print('The MATLAB file has been loaded.')
print(data.keys())

Loading the MATLAB file containing the dataset...
The MATLAB file has been loaded.
dict_keys(['__header__', '__version__', '__globals__', 'FeatureTypes', 'Day120', 'Day119', 'Day118', 'Day117', 'Day116', 'Day115', 'Day114', 'Day113', 'Day112', 'Day111', 'Day110', 'Day109', 'Day108', 'Day107', 'Day106', 'Day105', 'Day104', 'Day103', 'Day102', 'Day101', 'Day100', 'Day99', 'Day98', 'Day97', 'Day96', 'Day95', 'Day94', 'Day93', 'Day92', 'Day91', 'Day90', 'Day89', 'Day88', 'Day87', 'Day86', 'Day85', 'Day84', 'Day83', 'Day82', 'Day81', 'Day80', 'Day79', 'Day78', 'Day77', 'Day76', 'Day75', 'Day74', 'Day73', 'Day72', 'Day71', 'Day70', 'Day69', 'Day68', 'Day67', 'Day66', 'Day65', 'Day64', 'Day63', 'Day62', 'Day61', 'Day60', 'Day59', 'Day58', 'Day57', 'Day56', 'Day55', 'Day54', 'Day53', 'Day52', 'Day51', 'Day50', 'Day49', 'Day48', 'Day47', 'Day46', 'Day45', 'Day44', 'Day43', 'Day42', 'Day41', 'Day40', 'Day39', 'Day38', 'Day37', 'Day36', 'Day35', 'Day34', 'Day33', 'Day32', 'Day31', 'Day30', 'Day29

In [2]:
import torch

# Load the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

Device: cuda


In [3]:
print(data['Day2']['labels'][0][0])

[[0]
 [0]
 [1]
 ...
 [1]
 [1]
 [1]]


In [4]:

import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Extract relevant information
FeatureTypes = data['FeatureTypes']

# Initialize lists to store preprocessed data
X_all = []
y_all = []

# Iterate over each day
for day in range(120):
    day_str = "Day{}".format(day)
    if day_str in data:
        print('Processing', day_str)
        X_day = data[day_str]['data'][0, 0]
        y_day = data[day_str]['labels'][0, 0]
        
        # Convert to PyTorch tensors
        X_day = torch.tensor(X_day.shape, dtype=torch.float32)
        y_day = torch.tensor(y_day.shape, dtype=torch.float32)
        
        # Normalize the data
        scaler = StandardScaler()
        X_day_normalized = scaler.fit_transform(X_day.numpy().reshape(-1, 1))
        X_day = torch.tensor(X_day_normalized, dtype=torch.float32)
        
        # Normalize the labels
        y_day_normalized = scaler.fit_transform(y_day.numpy().reshape(-1, 1))
        y_day = torch.tensor(y_day_normalized, dtype=torch.float32)
        
        X_all.append(X_day)
        y_all.append(y_day)

# Concatenate data from all days
X_all_concatenated = torch.cat(X_all, dim=0)
y_all_concatenated = torch.cat(y_all, dim=0)

print('X_all_concatenated shape:', X_all_concatenated.shape)
print('y_all_concatenated shape:', y_all_concatenated.shape)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_all_concatenated, y_all_concatenated, test_size=0.2, random_state=42, shuffle=True)

print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

Processing Day0
Processing Day1
Processing Day2
Processing Day3
Processing Day4
Processing Day5
Processing Day6
Processing Day7
Processing Day8
Processing Day9
Processing Day10
Processing Day11
Processing Day12
Processing Day13
Processing Day14
Processing Day15
Processing Day16
Processing Day17
Processing Day18
Processing Day19
Processing Day20
Processing Day21
Processing Day22
Processing Day23
Processing Day24
Processing Day25
Processing Day26
Processing Day27
Processing Day28
Processing Day29
Processing Day30
Processing Day31
Processing Day32
Processing Day33
Processing Day34
Processing Day35
Processing Day36
Processing Day37
Processing Day38
Processing Day39
Processing Day40
Processing Day41
Processing Day42
Processing Day43
Processing Day44
Processing Day45
Processing Day46
Processing Day47
Processing Day48
Processing Day49
Processing Day50
Processing Day51
Processing Day52
Processing Day53
Processing Day54
Processing Day55
Processing Day56
Processing Day57
Processing Day58
Process

In [5]:
# Print all labels in the test and training sets
print('y_train:', y_train)
print('y_test:', y_test)
print('x_train:', X_train)
print('x_test:', X_test)

y_train: tensor([[-1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [-1.],
        [-1.],
        [ 1.],
        [-1.],
        [-1.],
        [-1.],
        [-1.],
        [ 1.],
        [-1.],
        [ 1.],
        [ 1.],
        [-1.],
        [-1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
        [-1.],
        [-1.],
        [-1.],
        [ 1.],
        [-1.],
        [ 1.],
        [-1.],
        [ 1.],
        [ 1.],
        [-1.],
        [-1.],
        [-1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
        [-1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
        [-1.],
        [-1.],
        [-1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
        [-1.],
        [-1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
        [ 1.],
        [-1.],
        [ 1.],
 

In [6]:
import torch.nn as nn

# Define the neural network architecture
class URLClassifier(nn.Module):
    def __init__(self, num_features):
        super(URLClassifier, self).__init__()
        self.fc1 = nn.Linear(num_features, 128)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Create an instance of the neural network
num_features = X_train.shape[1]
print('Number of features:', num_features)
model = URLClassifier(num_features)
model.to(device)
print(model)

Number of features: 1
URLClassifier(
  (fc1): Linear(in_features=1, out_features=128, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [7]:
import torch.optim as optim

# Assuming you have defined your model and device earlier
# Assuming X_train, X_test, y_train, y_test are already defined

# Move data and labels to the same device 
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)

    # Ensure the output has the same shape as y_train
    outputs = outputs  # Remove the extra dimension

    loss = criterion(outputs, y_train)  # Remove the extra dimension from y_train as well

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss for the current epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_outputs = test_outputs.squeeze()  # Remove the extra dimension
    test_loss = criterion(test_outputs, y_test.squeeze())  # Remove the extra dimension from y_test as well
    print(f'Test Loss: {test_loss.item():.4f}')

Epoch [1/100], Loss: 0.9741
Epoch [2/100], Loss: 0.9492
Epoch [3/100], Loss: 0.9252
Epoch [4/100], Loss: 0.9009
Epoch [5/100], Loss: 0.8804
Epoch [6/100], Loss: 0.8525
Epoch [7/100], Loss: 0.8312
Epoch [8/100], Loss: 0.8053
Epoch [9/100], Loss: 0.7864
Epoch [10/100], Loss: 0.7660
Epoch [11/100], Loss: 0.7465
Epoch [12/100], Loss: 0.7339
Epoch [13/100], Loss: 0.7109
Epoch [14/100], Loss: 0.6932
Epoch [15/100], Loss: 0.6781
Epoch [16/100], Loss: 0.6623
Epoch [17/100], Loss: 0.6492
Epoch [18/100], Loss: 0.6391
Epoch [19/100], Loss: 0.6267
Epoch [20/100], Loss: 0.6112
Epoch [21/100], Loss: 0.6035
Epoch [22/100], Loss: 0.6001
Epoch [23/100], Loss: 0.5909
Epoch [24/100], Loss: 0.5790
Epoch [25/100], Loss: 0.5733
Epoch [26/100], Loss: 0.5665
Epoch [27/100], Loss: 0.5609
Epoch [28/100], Loss: 0.5582
Epoch [29/100], Loss: 0.5508
Epoch [30/100], Loss: 0.5471
Epoch [31/100], Loss: 0.5423
Epoch [32/100], Loss: 0.5395
Epoch [33/100], Loss: 0.5376
Epoch [34/100], Loss: 0.5335
Epoch [35/100], Loss: 0

In [8]:
# Print accuracy
threshold = 0.5
predictions = (test_outputs > threshold).float()
accuracy = (predictions == y_test).float().mean()
print(f'Accuracy: {accuracy.item():.4f}')

Accuracy: 0.2500
