# **Weak Password Detect**

In [1]:
import os
import pickle
from embedding import tfidf
from models import svm_clf

In [2]:
def save_model(vectorizer, cls_model):
    with open("./cache/weak_password/vectorizer.pickle", "wb") as pickle_file:
        pickle.dump(vectorizer, pickle_file)

    with open("./cache/weak_password/svm_clf.pickle", "wb") as pickle_file:
        pickle.dump(cls_model, pickle_file)

def load_model():
    with open("./cache/weak_password/vectorizer.pickle", "rb") as pickle_file:
        vectorizer = pickle.load(pickle_file)

    with open("./cache/weak_password/svm_clf.pickle", "rb") as pickle_file:
        cls_model = pickle.load(pickle_file)

    return vectorizer, cls_model

## **Train**

In [4]:
data_file_path = os.path.join("data/weak_password", "data.csv")

X, y,vectorizer = tfidf.make_vector(data_file_path)
cls_model = svm_clf.train_and_eval(X, y)

svm_clf.eval(vectorizer, cls_model)

save_model(vectorizer, cls_model)

b'Skipping line 2810: expected 2 fields, saw 5\nSkipping line 4641: expected 2 fields, saw 5\nSkipping line 7171: expected 2 fields, saw 5\nSkipping line 11220: expected 2 fields, saw 5\nSkipping line 13809: expected 2 fields, saw 5\nSkipping line 14132: expected 2 fields, saw 5\nSkipping line 14293: expected 2 fields, saw 5\nSkipping line 14865: expected 2 fields, saw 5\nSkipping line 17419: expected 2 fields, saw 5\nSkipping line 22801: expected 2 fields, saw 5\nSkipping line 25001: expected 2 fields, saw 5\nSkipping line 26603: expected 2 fields, saw 5\nSkipping line 26742: expected 2 fields, saw 5\nSkipping line 29702: expected 2 fields, saw 5\nSkipping line 32767: expected 2 fields, saw 5\nSkipping line 32878: expected 2 fields, saw 5\nSkipping line 35643: expected 2 fields, saw 5\nSkipping line 36550: expected 2 fields, saw 5\nSkipping line 38732: expected 2 fields, saw 5\nSkipping line 40567: expected 2 fields, saw 5\nSkipping line 40576: expected 2 fields, saw 5\nSkipping line 

            password  strength
0           kzde5577         1
1           kino3434         1
2          visi7k1yr         1
3           megzy123         1
4        lamborghin1         1
...              ...       ...
669635    10redtux10         1
669636     infrared1         1
669637  184520socram         1
669638     marken22a         1
669639      fxx4pw4g         1

[669640 rows x 2 columns]
              precision    recall  f1-score   support

           0    0.94287   0.83320   0.88465      8795
           1    0.96416   0.98973   0.97678     49763
           2    0.99026   0.95527   0.97245      8406

    accuracy                        0.96485     66964
   macro avg    0.96576   0.92607   0.94463     66964
weighted avg    0.96464   0.96485   0.96413     66964

[1 1 1 1 0 1 0 1 1 1 1 1]


## **Inference**

In [5]:
vectorizer, cls_model = load_model()

password_list = [
    "WUt9IZzE0OQ7PkNE",
    "elyass15@ajilent-ci",
    "jerusalem393",
    "g067057895",
    "intel1"
]

X_predict = vectorizer.transform(password_list)
y_Predict = cls_model.predict(X_predict)

for i, pwd in enumerate(password_list):
    print(pwd, y_Predict[i])

WUt9IZzE0OQ7PkNE 2
elyass15@ajilent-ci 2
jerusalem393 1
g067057895 1
intel1 0
