<a href="https://colab.research.google.com/github/canstralian/Wordlists/blob/main/Password_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMPORT SECTION

In [14]:
!pip install transformers==4.31.0



In [4]:
import base64

In [5]:
import os

# Setting environment variables
os.environ['GOOGLE_API_KEY'] = 'your-google-api-key'
os.environ['OPENAI_API_KEY'] = 'your-openai-api-key'
os.environ['WAND_API_KEY'] = 'your-wand-api-key'
os.environ['HF_ACCESS_TOKEN'] = 'your-huggingface-token'
os.environ['COHERE_API_KEY'] = 'cohere_api_key'
os.environ['GITHUB_TOKEN'] = 'your-github-token'
os.environ['GOOGLE_API_KEY_1'] = 'your-google-api-key-1'


In [6]:
# Accessing environment variables securely
google_api_key = os.getenv('GOOGLE_API_KEY')
openai_api_key = os.getenv('OPENAI_API_KEY')
wand_api_key = os.getenv('WAND_API_KEY')
hf_access_token = os.getenv('HF_ACCESS_TOKEN')
cohere_api_key = os.getenv('COHERE_API_KEY')
github_token = os.getenv('GITHUB_TOKEN')
google_api_key_1 = os.getenv('GOOGLE_API_KEY_1')

print(google_api_key)  # Example: Printing the GOOGLE_API_KEY (Avoid printing sensitive keys in practice)


your-google-api-key


In [7]:
from google.colab import auth
from google.colab import drive
auth.authenticate_user()

# Your code to access files in Google Drive, for example


In [8]:
# Import necessary libraries
import hashlib
import random
import string
import csv

# Function to generate strong passwords
def generate_password(length=12):
    characters = string.ascii_letters + string.digits + string.punctuation
    return ''.join(random.choice(characters) for _ in range(length))

# Function to hash passwords
def hash_password(password):
    return hashlib.sha256(password.encode()).hexdigest()

# Generate 200 passwords and their hashes
passwords = []
for _ in range(200):  # 200 passwords
    password = generate_password(random.randint(10, 14))  # Random length between 10-14
    hashed_password = hash_password(password)
    passwords.append([password, hashed_password])

# Write to CSV file
csv_filename = '/content/passwords_and_hashes.csv'
with open(csv_filename, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['password', 'hash'])  # Write header
    writer.writerows(passwords)

# Output the file path
csv_filename

'/content/passwords_and_hashes.csv'

In [9]:
from transformers import pipeline

def generate_passwords_huggingface(model_name, prompt):
    """
    Generates passwords using a Hugging Face model
    :param model_name: Hugging Face model name
    :param prompt: Prompt to guide password generation
    :return: List of generated passwords
    """
    generator = pipeline('text-generation', model=model_name)
    generated_passwords = generator(prompt, max_length=10, num_return_sequences=5)

    return [entry['generated_text'].strip() for entry in generated_passwords]


In [10]:
import wandb

def augment_password_dataset(api_key, password_dataset):
    """
    Uses the Wand API for enhancing and tracking password dataset changes.
    :param api_key: Wand API key
    :param password_dataset: List of passwords to be tracked and enhanced
    :return: Updated password dataset
    """
    wandb.login(api_key)  # Log into WandB
    wandb.init(project="password-dataset-enhancement")

    augmented_data = []

    for password in password_dataset:
        # Example of transformation or augmentation (just a placeholder for real logic)
        augmented_password = password[::-1]  # Reverse the password as an example augmentation
        augmented_data.append(augmented_password)

    # Log the augmented dataset
    wandb.log({"augmented_passwords": augmented_data})

    return augmented_data


In [11]:
from google.cloud import storage

def upload_password_dataset_to_gcs(api_key, dataset, bucket_name, file_name):
    """
    Uploads the password dataset to Google Cloud Storage
    :param api_key: Google API key
    :param dataset: Password dataset to be uploaded
    :param bucket_name: Google Cloud Storage bucket name
    :param file_name: Name of the file to be saved
    """
    # Initialize Google Cloud Storage client
    client = storage.Client.from_service_account_json(api_key)
    bucket = client.get_bucket(bucket_name)

    # Create a blob and upload the dataset
    blob = bucket.blob(file_name)
    blob.upload_from_string(str(dataset))

    print(f"Password dataset uploaded to gs://{bucket_name}/{file_name}")


In [12]:
import requests

def push_dataset_to_github(api_key, repo_owner, repo_name, file_path, commit_message):
    """
    Pushes the password dataset to a GitHub repository
    :param api_key: GitHub API token
    :param repo_owner: GitHub repository owner
    :param repo_name: GitHub repository name
    :param file_path: Path to the file to upload
    :param commit_message: Commit message for the push
    """
    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}"

    with open(file_path, 'r') as file:
        content = file.read()

    # Encode content to Base64
    content_base64 = base64.b64encode(content.encode()).decode()

    headers = {
        'Authorization': f'token {api_key}'
    }

    data = {
        "message": commit_message,
        "content": content_base64,
    }

    response = requests.put(url, headers=headers, json=data)
    if response.status_code == 201:
        print("Dataset successfully pushed to GitHub!")
    else:
        print(f"Error: {response.json()}")


In [13]:
def enhance_password_dataset(api_keys, dataset):
    # 1. Generate password embeddings using Cohere
    embeddings = generate_password_embeddings(api_keys['cohere'], dataset)

    # 2. Generate creative password suggestions using OpenAI
    openai_suggestions = generate_password_suggestions(api_keys['openai'], "Generate secure password suggestions.")

    # 3. Use Hugging Face to generate text-based passwords
    hf_passwords = generate_passwords_huggingface("gpt2", "Create strong password combinations.")

    # 4. Use Wand to augment dataset
    augmented_data = augment_password_dataset(api_keys['wand'], dataset)

    # 5. Upload the final dataset to Google Cloud Storage
    upload_password_dataset_to_gcs(api_keys['google'], augmented_data, "your_bucket", "augmented_passwords.jsonl")

    # 6. Push to GitHub for version control
    push_dataset_to_github(api_keys['github'], "your_repo_owner", "your_repo_name", "augmented_passwords.jsonl", "Added new augmented passwords.")

    return augmented_data
