## Import libraries 

In [123]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfVectorizer 

## Data Exploration

In [2]:
df = pd.read_csv("data.csv", on_bad_lines='skip')

In [3]:
df.head()

Unnamed: 0,password,strength
0,kzde5577,1
1,kino3434,1
2,visi7k1yr,1
3,megzy123,1
4,lamborghin1,1


In [4]:
df.shape

(669640, 2)

**Observations:** The data contains 669640 observations of passwords and two columns

In [5]:
df["strength"].unique()

array([1, 2, 0])

**Observations:** The target column has three unique values

In [6]:
df.describe()

Unnamed: 0,strength
count,669640.0
mean,0.990196
std,0.507948
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,2.0


**Observations:** The highest and lowest value on the target column is 2 and 0 respectively

In [7]:
df.isnull().sum()

password    1
strength    0
dtype: int64

**Observations:** The data contains only one missing value on the password column, so it is safe to drop the missing value

In [8]:
df=df.dropna()

## Setting up Validation framework

In [57]:
X=df.drop("strength",axis=1).values.flatten()
y=df["strength"].values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [58]:
y_train

array([1, 1, 2, ..., 1, 1, 2])

## Tokenization

In [60]:
def tokens(string):
    """break strings into characters"""
    return [x for x in string]

## Machine learning pipeline

In [61]:
password_clf=Pipeline([("vect",TfidfVectorizer(tokenizer=tokens)),("clf",DecisionTreeClassifier())])

In [62]:
password_clf.fit(X_train,y_train)


Pipeline(steps=[('vect',
                 TfidfVectorizer(tokenizer=<function tokens at 0x125051280>)),
                ('clf', DecisionTreeClassifier())])

In [64]:
password_clf.score(X_test,y_test)

0.9286183621050117

In [118]:
strong="sakaryal&#305;"
medium="ilovemum12"
weak="qwerty"

In [119]:
password_clf.predict([strong,medium,weak])

array([2, 1, 0])