# Password Strength Checker using Python

Author:Claudia Magliano

Date: 18/04/2024


In [3]:
import pandas as pd
import numpy as np
import io
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

Get the dataset from kaggle:
https://www.kaggle.com/datasets/bhavikbb/password-strength-classifier-dataset

There are lines with errors so we have to ignore them.

In [6]:
#data = pd.read_csv("data.csv")
def func(x):
    return x[0:1]
# df = pd.read_csv(io.StringIO(t), converters={'column': func})
data = pd.read_csv('data.csv', on_bad_lines = "skip", names=['Password','Strength'], skiprows =1,
                    converters={'Strength': func})
#data = pd.read_csv('data.csv', delimiter='\t', names=(range(1)))
print(data.head())

      Password Strength
0     kzde5577        1
1     kino3434        1
2    visi7k1yr        1
3     megzy123        1
4  lamborghin1        1


In [7]:
data.dtypes

Password    object
Strength    object
dtype: object

In [8]:
data.shape

(320239, 2)

The dataset has two columns; password and strength. In the strength column:

0 means: the password’s strength is weak;

1 means: the password’s strength is medium;

2 means: the password’s strength is strong;

Before moving forward, I will convert 0, 1, and 2 values in the strength column to weak, medium, and strong:

In [9]:
data.Strength.value_counts()

Strength
1    237643
0     42869
2     39657
         70
Name: count, dtype: int64

In [10]:
data.Strength = data.Strength.replace('', np.nan).dropna()

In [11]:
data.isna().sum()

Password     0
Strength    70
dtype: int64

In [12]:
data.shape

(320239, 2)

In [13]:
data = data.dropna(axis=0, how='any')

In [14]:
data.shape

(320169, 2)

# Password Strength Prediction Model
Now let’s move to train a machine learning model to predict the strength of the password. Before we start preparing the model, we need to tokenize the passwords as we need the model to learn from the combinations of digits, letters, and symbols to predict the password’s strength. So here’s how we can tokenize and split the data into training and test sets:

In [15]:
def word(password):
    character=[]
    for i in password:
        character.append(i)
    return character

x = np.array(data["Password"])
y = np.array(data["Strength"])

tdif = TfidfVectorizer(tokenizer=word)
x = tdif.fit_transform(x)
xtrain, xtest, ytrain, ytest = train_test_split(x, y,
                                                test_size=0.05,
                                                random_state=42)



In [16]:
indices = np.mat([xtrain.nonzero()[0], xtrain.nonzero()[1]]).transpose()
values = xtrain.data
dense_shape = xtrain.shape

xtrain_tensor = tf.sparse.SparseTensor(indices=indices,
                                  values=values,
                                  dense_shape=dense_shape)

In [17]:
indices = np.mat([xtest.nonzero()[0], xtest.nonzero()[1]]).transpose()
values = xtest.data
dense_shape = xtest.shape

xtest_tensor = tf.sparse.SparseTensor(indices=indices,
                                  values=values,
                                  dense_shape=dense_shape)

In [18]:
ytrain = ytrain.astype('int')
ytest = ytest.astype('int')

Now here’s how to train a classification model to predict the strength of the password:

In [20]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(xtest_tensor.shape[1],)),  # Primeira camada densa com 64 neurônios
    Dense(32, activation='relu'),  # Segunda camada densa com 32 neurônios
    Dense(3, activation='softmax')  # Camada de saída com 3 neurônios, um para cada classe
])

# Compilação do modelo
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Resumo do modelo
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                7872      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 3)                 99        
                                                                 
Total params: 10051 (39.26 KB)
Trainable params: 10051 (39.26 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [21]:
xtrain_tensor = tf.sparse.reorder(xtrain_tensor)
xtest_tensor = tf.sparse.reorder(xtest_tensor)

In [22]:
# Treinamento do modelo
history = model.fit(xtrain_tensor, ytrain, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Now here’s how we can check the strength of a password using the trained model:

In [23]:
import getpass
user = getpass.getpass("Enter Password: ")
data = tdif.transform([user]).toarray()
output = model.predict(data)
print(output)

Enter Password: ··········
[[7.6481587e-22 5.2400813e-03 9.9475986e-01]]


# Summary
So this is how you can use machine learning to create a password’s strength checker using the Python programming language. A password strength checker works by understanding the combination of digits, letters, and special symbols you use in your password. I hope you liked this article on creating a password’s strength checker with Machine Learning using Python. Feel free to ask valuable questions in the comments section below.