In [None]:
import pandas as pd
import numpy as np
import random
import string
import re
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Function to calculate entropy
def calculate_entropy(password):
    entropy = 0
    for char_type in [string.ascii_lowercase, string.ascii_uppercase, string.digits, string.punctuation]:
        proportion = len([c for c in password if c in char_type]) / len(password)
        if proportion > 0:
            entropy -= proportion * np.log2(proportion)
    return entropy

# Function to generate random passwords
def generate_password(length, include_digits, include_uppercase, include_special):
    chars = string.ascii_lowercase
    if include_digits:
        chars += string.digits
    if include_uppercase:
        chars += string.ascii_uppercase
    if include_special:
        chars += string.punctuation
    return ''.join(random.choice(chars) for _ in range(length))

# Create a larger dataset of passwords
passwords = []
strengths = []

for _ in range(2000):
    length = random.randint(5, 16)
    include_digits = random.choice([True, False])
    include_uppercase = random.choice([True, False])
    include_special = random.choice([True, False])
    password = generate_password(length, include_digits, include_uppercase, include_special)
    
    # Label the strength based on a simple heuristic
    if length < 8 or not (include_digits or include_uppercase or include_special):
        strength = 'bad'
    elif length >= 8 and (include_digits and include_uppercase):
        strength = 'good'
    elif length >= 12 and (include_digits and include_uppercase and include_special):
        strength = 'excellent'
    else:
        strength = 'good'
    
    passwords.append(password)
    strengths.append(strength)

# Create a DataFrame
df = pd.DataFrame({'password': passwords, 'strength': strengths})

# Feature engineering: Length, digits, special chars, uppercase, lowercase, entropy
def password_features(password):
    length = len(password)
    digit_count = len(re.findall(r'\d', password))
    special_char_count = len(re.findall(r'[^\w\s]', password))
    uppercase_count = len(re.findall(r'[A-Z]', password))
    lowercase_count = len(re.findall(r'[a-z]', password))
    entropy = calculate_entropy(password)
    return pd.Series([length, digit_count, special_char_count, uppercase_count, lowercase_count, entropy])

df[['length', 'digit_count', 'special_char_count', 'uppercase_count', 'lowercase_count', 'entropy']] = df['password'].apply(password_features)

# Encode the labels (bad, good, excellent)
label_encoder = LabelEncoder()
df['strength_label'] = label_encoder.fit_transform(df['strength'])

# Prepare data for model training
X = df[['length', 'digit_count', 'special_char_count', 'uppercase_count', 'lowercase_count', 'entropy']]
y = df['strength_label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Test with new passwords
new_passwords = ['helloworld', 'P@55word!', '123abcDEF!', 'Qwerty123!$']
new_password_features = [password_features(pwd) for pwd in new_passwords]
new_password_predictions = model.predict(new_password_features)

for pwd, pred in zip(new_passwords, new_password_predictions):
    print(f"Password: {pwd}, Strength: {label_encoder.inverse_transform([pred])[0]}")
