# K Nearest Neighbours Algorithm

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
import random
import nltk
from nltk.corpus import words
from sklearn.pipeline import Pipeline


In [2]:
data =pd.read_excel('3passwords_train.xlsx')
passwords1 = data['passwords'].tolist()
passwords = [str(item) for item in passwords1] #converts all strings and integer passwords into strings
labels = data['label'].tolist()


In [3]:
vectorizer = TfidfVectorizer(analyzer='char', lowercase=False)
X = vectorizer.fit_transform(passwords)
print('Passwords have been converted into tokens')


Passwords have been converted into tokens


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


In [5]:
k = 5  # Number of neighbors to consider
knn_classifier = KNeighborsClassifier(n_neighbors=k)
knn_classifier.fit(X_train, y_train)


In [6]:
predictions = knn_classifier.predict(X_test)

In [7]:
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions,average='weighted')
print(f"Accuracy: {accuracy:.2f}")
print("Precision:", precision)
print("Recall:", recall)


Accuracy: 0.89
Precision: 0.911764705882353
Recall: 0.8928571428571429


In [None]:
print('Phase 1 complete!')

In [183]:
#function that makes a password stronger

# Download the NLTK words dataset if you haven't already
import nltk


# Get the list of English words from the NLTK corpus
word_list = words.words()

def strengthen_password(password, num_words, num_numbers):
    new_password = password

    # Add random words
    for _ in range(num_words):
        new_password += random.choice(word_list)

    # Add random numbers
    for _ in range(num_numbers):
        new_password += str(random.randint(0, 9))

    return new_password

In [None]:


# Build a pipeline combining the vectorizer and classifier
pipeline = Pipeline([('vectorizer', vectorizer), ('classifier', knn_classifier)])

# Fit the pipeline on the data
pipeline.fit(passwords, labels)

# Function to rate a password
def rate_password(password):
    # Convert the password into a list to match the input format
    password_list = [password]
    
    # Predict the label (good or bad)
    prediction = pipeline.predict(password_list)
    
    return prediction[0]

# Test the rate_password function
test_password = input('')
rating = rate_password(test_password)

if rating == 0:
    rating_text = "very weak"
    while rating == 0:
        new_pass = strengthen_password(test_password,2,3)
        rating = rate_password(new_pass)
if rating == 1:
    rating_text = "weak"
    while rating == 1:
        new_pass = strengthen_password(test_password,1,2)
        rating = rate_password(new_pass)
if rating == 2:
    rating_text = "good"
elif rating == 3:
    rating_text = " ideal"

print(f"The password '{test_password}' is rated as {rating_text}")


In [167]:
#following snippet combines original password with a desireable word to create a new stronger password

#nltk.download('words')


# Load the NLTK words corpus
nltk_words = set(words.words())

def generate_password(input_password, target_word):
    # Identify words in the input password
    input_words = input_password.split()
    
    # Find similar words in the NLTK corpus
    similar_words = [word for word in nltk_words if target_word in word]
    
    # Select a random similar word
    if similar_words:
        selected_word = random.choice(similar_words)
    else:
        selected_word = target_word
    
    # Generate a random number to append to the password
    random_number = random.randint(10, 99)
    
    # Construct the new password
    new_password = f"{input_password}{selected_word}{random_number}"
    
    return new_password

# Test the password generation
input_password = "s3cur3"
target_word = "carbon"
new_password = generate_password(input_password, target_word)

print("Original Password:", input_password)
print("New Password:", new_password)

Original Password: s3cur3
New Password: s3cur3carbonic45
