In [1]:
import numpy as np
import pandas as pd
import os
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from tqdm import tqdm

df = pd.read_csv('drive/MyDrive/ML_PROJECT_DATASET/data.csv', error_bad_lines=False)
df = df.dropna()

X = df["password"]
y = df["strength"]

n = 100000
random_samples = df.sample(n, random_state=42)

X_random = random_samples["password"]
y_random = random_samples["strength"]

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_random, y_random, test_size=0.2)

# Vectorize the password data
vectorizer = CountVectorizer(analyzer="char")
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

y_train_array = y_train.to_numpy()
y_test_array = y_test.to_numpy()

y_train_tensor = torch.tensor(y_train_array, dtype=torch.int64)
y_test_tensor = torch.tensor(y_test_array, dtype=torch.int64)
X_train_tensor = torch.tensor(X_train_vec.toarray(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_vec.toarray(), dtype=torch.float32)



  df = pd.read_csv('drive/MyDrive/ML_PROJECT_DATASET/data.csv', error_bad_lines=False)
Skipping line 2810: expected 2 fields, saw 5
Skipping line 4641: expected 2 fields, saw 5
Skipping line 7171: expected 2 fields, saw 5
Skipping line 11220: expected 2 fields, saw 5
Skipping line 13809: expected 2 fields, saw 5
Skipping line 14132: expected 2 fields, saw 5
Skipping line 14293: expected 2 fields, saw 5
Skipping line 14865: expected 2 fields, saw 5
Skipping line 17419: expected 2 fields, saw 5
Skipping line 22801: expected 2 fields, saw 5
Skipping line 25001: expected 2 fields, saw 5
Skipping line 26603: expected 2 fields, saw 5
Skipping line 26742: expected 2 fields, saw 5
Skipping line 29702: expected 2 fields, saw 5
Skipping line 32767: expected 2 fields, saw 5
Skipping line 32878: expected 2 fields, saw 5
Skipping line 35643: expected 2 fields, saw 5
Skipping line 36550: expected 2 fields, saw 5
Skipping line 38732: expected 2 fields, saw 5
Skipping line 40567: expected 2 fields, 

In [2]:
"""First NN: simple model with a single hidden layer"""

class PasswordClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(PasswordClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        # self.activation1 = nn.Tanh() # , nn.LeakyReLU() ecc.
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        # x = self.activation1(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

input_size = X_train_tensor.shape[1]
hidden_size = 64
output_size = len(label_encoder.classes_)

model = PasswordClassifier(input_size, hidden_size, output_size)

In [None]:
"""Second NN: model with two hidden layer"""

class PasswordClassifier(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(PasswordClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

input_size = X_train_tensor.shape[1]
hidden_size1 = 64
hidden_size2 = 32

output_size = len(label_encoder.classes_)
model = PasswordClassifier(input_size, hidden_size1, hidden_size2, output_size)

In [3]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 2
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    progress_bar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]")
    for batch_X, batch_y in progress_bar:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        progress_bar.set_postfix({"Loss": f"{loss.item():.4f}"})

model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, predicted = torch.max(test_outputs, 1)
    accuracy = accuracy_score(y_test, predicted)
    print(f"Test Accuracy: {accuracy:.4f}")


Epoch [1/2]: 100%|██████████| 2500/2500 [00:07<00:00, 321.66it/s, Loss=0.7177]
Epoch [2/2]: 100%|██████████| 2500/2500 [00:08<00:00, 302.22it/s, Loss=0.7418]


Test Accuracy: 0.8798


In [5]:
test_password = "To#rkw1zxXzfY7$4^*89bHs7Xb5!#A!4Ve8xGb4jW9arQdU61k" # strong
#test_password = "LIserEptInguISEd" # medium
#test_password = "hello"  # weak

test_password_vec = vectorizer.transform([test_password])
test_password_tensor = torch.tensor(test_password_vec.toarray(), dtype=torch.float32)

model.eval()
with torch.no_grad():
    prediction = model(test_password_tensor)
    _, predicted_class = torch.max(prediction, 1)

predicted_class_label = label_encoder.inverse_transform(predicted_class.numpy())

print(f"The predicted strength of the password '{test_password}' is: {predicted_class_label[0]}")


The predicted strength of the password 'To#rkw1zxXzfY7$4^*89bHs7Xb5!#A!4Ve8xGb4jW9arQdU61k' is: 1
