In [1]:
# Load MATLAB file containing the dataset into Python
import scipy.io
import urllib.request
import os
matlab_file = 'url.mat'

# Download the dataset from the following URL: https://www.sysnet.ucsd.edu/projects/url/url.mat

if(not os.path.exists(matlab_file)):
    download = 'https://www.sysnet.ucsd.edu/projects/url/url.mat'
    print('Downloading the dataset from the following URL: ', download )
    urllib.request.urlretrieve(download, matlab_file)
    print('The dataset has been downloaded successfully.')
# Save the dataset in the same directory as the Python script

print('Loading the MATLAB file containing the dataset...')

data = scipy.io.loadmat(matlab_file)

print('The MATLAB file has been loaded.')
print(data.keys())

The MATLAB file has been loaded.
dict_keys(['__header__', '__version__', '__globals__', 'FeatureTypes', 'Day120', 'Day119', 'Day118', 'Day117', 'Day116', 'Day115', 'Day114', 'Day113', 'Day112', 'Day111', 'Day110', 'Day109', 'Day108', 'Day107', 'Day106', 'Day105', 'Day104', 'Day103', 'Day102', 'Day101', 'Day100', 'Day99', 'Day98', 'Day97', 'Day96', 'Day95', 'Day94', 'Day93', 'Day92', 'Day91', 'Day90', 'Day89', 'Day88', 'Day87', 'Day86', 'Day85', 'Day84', 'Day83', 'Day82', 'Day81', 'Day80', 'Day79', 'Day78', 'Day77', 'Day76', 'Day75', 'Day74', 'Day73', 'Day72', 'Day71', 'Day70', 'Day69', 'Day68', 'Day67', 'Day66', 'Day65', 'Day64', 'Day63', 'Day62', 'Day61', 'Day60', 'Day59', 'Day58', 'Day57', 'Day56', 'Day55', 'Day54', 'Day53', 'Day52', 'Day51', 'Day50', 'Day49', 'Day48', 'Day47', 'Day46', 'Day45', 'Day44', 'Day43', 'Day42', 'Day41', 'Day40', 'Day39', 'Day38', 'Day37', 'Day36', 'Day35', 'Day34', 'Day33', 'Day32', 'Day31', 'Day30', 'Day29', 'Day28', 'Day27', 'Day26', 'Day25', 'Day24', 'D

In [2]:
import torch

# Load the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

Device: cuda


In [3]:

import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Extract relevant information
FeatureTypes = data['FeatureTypes']

# Initialize lists to store preprocessed data
X_all = []
y_all = []

# Iterate over each day
for day in range(120):
    day_str = "Day{}".format(day)
    if day_str in data:
        X_day = data[day_str]['data'][0, 0]
        y_day = data[day_str]['labels'][0, 0]
        
        # Convert to PyTorch tensors
        X_day = torch.tensor(X_day.shape, dtype=torch.float32)
        y_day = torch.tensor(y_day.shape, dtype=torch.float32)
        
        # Normalize the data
        scaler = StandardScaler()
        X_day_normalized = scaler.fit_transform(X_day.numpy().reshape(-1, 1))
        X_day = torch.tensor(X_day_normalized, dtype=torch.float32)
        
        X_all.append(X_day)
        y_all.append(y_day)

# Concatenate data from all days
X_all_concatenated = torch.cat(X_all, dim=0)
y_all_concatenated = torch.cat(y_all, dim=0)

print('X_all_concatenated shape:', X_all_concatenated.shape)
print('y_all_concatenated shape:', y_all_concatenated.shape)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_all_concatenated, y_all_concatenated, test_size=0.2, random_state=42, shuffle=True)

print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

X_all_concatenated shape: torch.Size([240, 1])
y_all_concatenated shape: torch.Size([240])
X_train shape: torch.Size([192, 1])
X_test shape: torch.Size([48, 1])
y_train shape: torch.Size([192])
y_test shape: torch.Size([48])


In [4]:
# Print all labels in the test and training sets
print('y_train:', y_train)
print('y_test:', y_test)

y_train: tensor([1.0000e+00, 2.0000e+04, 2.0000e+04, 2.0000e+04, 1.0000e+00, 1.0000e+00,
        2.0000e+04, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 2.0000e+04,
        1.0000e+00, 2.0000e+04, 2.0000e+04, 1.0000e+00, 1.0000e+00, 2.0000e+04,
        2.0000e+04, 2.0000e+04, 1.0000e+00, 2.0000e+04, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 2.0000e+04, 1.0000e+00, 2.0000e+04, 1.0000e+00, 2.0000e+04,
        2.0000e+04, 1.0000e+00, 1.0000e+00, 1.0000e+00, 2.0000e+04, 2.0000e+04,
        2.0000e+04, 1.0000e+00, 2.0000e+04, 1.0000e+00, 2.0000e+04, 2.0000e+04,
        1.0000e+00, 2.0000e+04, 1.0000e+00, 1.0000e+00, 1.0000e+00, 2.0000e+04,
        2.0000e+04, 1.0000e+00, 2.0000e+04, 2.0000e+04, 2.0000e+04, 1.0000e+00,
        2.0000e+04, 1.0000e+00, 1.0000e+00, 2.0000e+04, 2.0000e+04, 1.6000e+04,
        2.0000e+04, 1.0000e+00, 2.0000e+04, 2.0000e+04, 1.0000e+00, 2.0000e+04,
        1.0000e+00, 1.3000e+02, 1.0000e+00, 1.0000e+00, 2.0000e+04, 2.0000e+04,
        2.0000e+04, 2.0000e+04,

In [5]:
import torch.nn as nn

# Define the neural network architecture
class URLClassifier(nn.Module):
    def __init__(self, num_features):
        super(URLClassifier, self).__init__()
        self.fc1 = nn.Linear(num_features, 128)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Create an instance of the neural network
num_features = X_train.shape[1]
model = URLClassifier(num_features)
model.to(device)
print(model)

URLClassifier(
  (fc1): Linear(in_features=1, out_features=128, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [7]:
# Move data and labels to the same device
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train.unsqueeze(1))

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss for the current epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, y_test.unsqueeze(1))
    print(f'Test Loss: {test_loss.item():.4f}')

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
