Cracking a password involves a systematic effort to uncover the secure system's password. This process may include trying common passwords, exploring clearly generated candidate passwords or employing a straightforward brute-force exhaustive search. Enhancing password complexity is crucial to heighten resistance against cracking attempts.

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
%cd /content/drive/My Drive/Colab Notebooks

/content/drive/My Drive/Colab Notebooks


In [12]:

import pandas as pd # import pandas and read  the passwords in the passwordDataset.csv

df = pd.read_csv(
    "passwordDataset.csv", dtype={"password": "str", "strength": "int"}, index_col=None
)

In [None]:
df

Unnamed: 0,password,strength
0,kzde5577,1
1,kino3434,1
2,visi7k1yr,1
3,megzy123,1
4,lamborghin1,1
...,...,...
669634,10redtux10,1
669635,infrared1,1
669636,184520socram,1
669637,marken22a,1


In [13]:
df = df.sample(frac=1) # Shuffle

In [14]:
#It is necessary to partition the DataFrame into two distinct sets – one designated for training purposes and the other for testing.
l = len(df.index)
train_df = df.head(int(l * 0.8))
test_df = df.tail(int(l * 0.2))

In [15]:
#Preparing the labels and feature data for training and testing
y_train = train_df.pop("strength").values
y_test = test_df.pop("strength").values
X_train = train_df.values.flatten()
X_test = test_df.values.flatten()

In [16]:
#We have to create a function that separates the characters of a given string
def character_tokens(input_string):
    """Break string into characters."""
    return [x for x in input_string]

In [17]:
#Sets up a pipeline to apply TF-IDF (Term Frequency-Inverse Document Frequency) on the characters of passwords, followed by training a Gradient Boosting classifier.
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier

password_clf = Pipeline(
    [("vect", TfidfVectorizer(tokenizer=character_tokens)), ("clf", XGBClassifier()),]
)

In [18]:
password_clf.fit(X_train, y_train)
password_clf.score(X_test, y_test)



0.9797352288933523

In [19]:
#Assigns values to two variables, one representing a commonly used password and the other a computer-generated, high-entropy password
common_password = "qwerty"
strong_computer_generated_password = "c9lCwLBFmdLbG6iWla4H"

In [20]:
#Determine the classifier's strength predictions for the two passwords provided
password_clf.predict([common_password,strong_computer_generated_password])

array([0, 2])