<a href="https://colab.research.google.com/github/al3inten/ROCKYOU.AI/blob/main/Another_copy_of_rock.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets accelerate -q


In [None]:
# ✅ STEP 1: Define the full path to the file in your Google Drive
file_path = r"C:\Users\USER\Downloads\leaked_passwords.txt"


# ✅ STEP 2: Read the file with encoding fallback (latin-1 → cp1252)
try:
    with open(file_path, 'r', encoding='latin-1') as f:
        leaked_passwords = [line.strip() for line in f if line.strip()]
except UnicodeDecodeError:
    print("⚠️ latin-1 failed. Trying cp1252...")
    try:
        with open(file_path, 'r', encoding='cp1252') as f:
            leaked_passwords = [line.strip() for line in f if line.strip()]
    except Exception as e:
        print(f"❌ Could not read file. Error: {e}")
        leaked_passwords = []

print(f"\n✅ Loaded {len(leaked_passwords)} passwords from Drive")
print("📌 Sample passwords from file:")
print(leaked_passwords[:10])  # Show first 10


In [None]:
# Ask the user to enter a password
user_input = input("🔑 Enter the password you want to check: ")

# Normalize the input (strip spaces, lowercase optional)
user_input = user_input.strip()

print(f"✅ Your password: {user_input}")


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


In [None]:
# Use lowercase version of passwords to standardize
password_corpus = [pwd.lower() for pwd in leaked_passwords]

# Create the TF-IDF vectorizer
vectorizer = TfidfVectorizer(analyzer='char', ngram_range=(2, 4))  # character-level TF-IDF
tfidf_matrix = vectorizer.fit_transform(password_corpus)


In [None]:
user_input_cleaned = user_input.lower()
user_vector = vectorizer.transform([user_input_cleaned])


In [None]:
similarity_scores = cosine_similarity(user_vector, tfidf_matrix).flatten()


In [None]:
# Get top 10 most similar passwords
top_indices = similarity_scores.argsort()[::-1][:10]
top_matches = [(password_corpus[i], similarity_scores[i]) for i in top_indices]

print("\n🔍 Top similar passwords using AI (TF-IDF + Cosine):")
for pwd, score in top_matches:
    print(f"🔸 {pwd} (score: {score:.4f})")


In [None]:
import string
import random
import os

# ✅ Define password strength scoring logic
def password_strength_score(pwd, similarity_score):
    length_score = min(len(pwd) / 12, 1.0)  # 1.0 if length >= 12
    variety_score = sum([
        any(c.islower() for c in pwd),
        any(c.isupper() for c in pwd),
        any(c.isdigit() for c in pwd),
        any(c in string.punctuation for c in pwd)
    ]) / 4  # Max is 1.0

    similarity_penalty = max(0, 1 - similarity_score)  # lower similarity = higher score

    # Weighted final score
    final_score = (0.4 * length_score) + (0.4 * variety_score) + (0.2 * similarity_penalty)
    return round(final_score * 10, 2)  # Score out of 10

# ✅ Generate a stronger, human-readable password
def suggest_strong_password(base_pwd):
    substitutions = {
        'a': ['@', '4'], 'e': ['3'], 'i': ['1', '!'],
        'o': ['0'], 's': ['$', '5'], 't': ['7'], 'l': ['1'],
        'h': ['#']
    }
    strong_pwd = ""
    for ch in base_pwd:
        if ch.lower() in substitutions and random.random() < 0.7:
            strong_pwd += random.choice(substitutions[ch.lower()])
        else:
            strong_pwd += ch
    if not any(c.isdigit() for c in strong_pwd):
        strong_pwd += str(random.randint(10, 99))
    if not any(c in "!@#$%&*?" for c in strong_pwd):
        strong_pwd += random.choice("!@#$%&*?")
    return strong_pwd

# ✅ Scoring and reporting top matches
print("\n📋 Password Risk Log:")
log_lines = []
for pwd, score in top_matches:
    strength = password_strength_score(user_input, score)
    line = f"Input: {user_input} | Similar: {pwd} | Similarity: {score:.2f} | Score: {strength}/10"
    log_lines.append(line)
    print("•", line)

# ✅ Suggest improved version
suggested_password = suggest_strong_password(user_input)
print(f"\n💡 Suggested stronger password: {suggested_password}")
# ✅ Define your local path (adjust "USER" to your system username)
folder_path = r"C:\Users\USER\Downloads\password-ai"
output_path = os.path.join(folder_path, "password_risk_report.txt")

# ✅ Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# ✅ Save the report locally
with open(output_path, 'a') as f:
    f.write("=== New Password Check ===\n")
    for line in log_lines:
        f.write(line + "\n")
    f.write(f"Suggested Stronger Password: {suggested_password}\n\n")

print(f"\n📁 Report saved locally to: {output_path}")



In [None]:
import matplotlib.pyplot as plt

# ✅ Get similarity + strength for all top matches
passwords = []
similarities = []
strength_scores = []

for pwd, sim in top_matches:
    score = password_strength_score(user_input, sim)
    passwords.append(pwd)
    similarities.append(sim)
    strength_scores.append(score)

# ✅ Create the scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(similarities, strength_scores, c='orange', s=100, alpha=0.7, edgecolors='black')

for i, pwd in enumerate(passwords):
    plt.annotate(pwd, (similarities[i]+0.005, strength_scores[i]+0.2), fontsize=9)

plt.title("📊 Password Similarity vs Strength Score", fontsize=14)
plt.xlabel("Cosine Similarity with Leaked Passwords (0 = safe, 1 = dangerous)")
plt.ylabel("Strength Score (0–10)")
plt.grid(True)
plt.axhline(y=5, color='red', linestyle='--', label='Minimum Safe Score')
plt.axvline(x=0.5, color='blue', linestyle='--', label='High Similarity Threshold')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
def suggest_strong_password_v2(base_pwd, min_score=7.5, max_attempts=10):
    import random
    import re

    substitutions = {
        'a': '@', 'e': '3', 'i': '1',
        'o': '0', 's': '$', 't': '7',
        'l': '1', 'h': '#'
    }

    weak_patterns = ['123', 'password', 'qwerty', 'admin', 'letmein', 'login']

    attempt = 0
    while attempt < max_attempts:
        # Step 1: Clean up weak patterns
        pwd = base_pwd
        for pattern in weak_patterns:
            pwd = re.sub(pattern, '', pwd, flags=re.IGNORECASE)

        # Step 2: Apply consistent substitutions (leet style)
        strong_pwd = ''
        for ch in pwd:
            if ch.lower() in substitutions:
                strong_pwd += substitutions[ch.lower()]
            else:
                strong_pwd += ch

        # Step 3: Add a random number at the end if no digit
        if not any(c.isdigit() for c in strong_pwd):
            strong_pwd += str(random.randint(10, 99))

        # Step 4: Add # at start and end if missing
        if not strong_pwd.startswith("#"):
            strong_pwd = "#" + strong_pwd
        if not strong_pwd.endswith("#"):
            strong_pwd += "#"

        # Step 5: Evaluate strength
        score = password_strength_score(strong_pwd, 0.0)
        if score >= min_score:
            return strong_pwd, score

        attempt += 1

    # Fallback return
    return strong_pwd, score


In [None]:
strong_pwd, new_score = suggest_strong_password_v2(user_input)

print(f"\n💡 AI-Enhanced Stronger Password: {strong_pwd} (Score: {new_score}/10)")
