1. Generator and Discriminator

In [1]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the Generator class
class Discriminator(nn.Module):
    def __init__(self, vocab_size, hidden_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(vocab_size * max_length, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Flatten the input tensor
        x = x.view(x.size(0), -1)
        return self.model(x)


# Define the Discriminator class
class Discriminator(nn.Module):
    def __init__(self, vocab_size, hidden_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(vocab_size, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()  # Output is a probability of being a real password
        )
    
    def forward(self, x):
        return self.model(x)


Using device: cuda


2. Password Dataset

In [3]:
# Custom dataset class
class PasswordDataset(Dataset):
    def __init__(self, passwords, char2idx, max_length):
        self.passwords = passwords
        self.char2idx = char2idx
        self.max_length = max_length

    def __len__(self):
        return len(self.passwords)

    def __getitem__(self, idx):
        password = self.passwords[idx]
        padded_password = self.pad_sequence([self.char2idx[char] for char in password])
        return torch.tensor(padded_password, dtype=torch.long)
    
    def pad_sequence(self, seq):
        padded = [self.char2idx[' ']] * self.max_length
        padded[:len(seq)] = seq
        return padded

# Function to read passwords from a file
def read_passwords(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        passwords = [line.strip() for line in file.readlines()]
    return passwords

# Load passwords from the file
file_path = 'filtered_words_5_rockyou.txt'  # Replace with your file path
passwords = read_passwords(file_path)

# Create character mappings
char2idx = {char: idx for idx, char in enumerate(sorted(set(''.join(passwords) + ' ')))}
idx2char = {idx: char for char, idx in char2idx.items()}
max_length = max(len(password) for password in passwords)

# Create dataset and dataloader
dataset = PasswordDataset(passwords, char2idx, max_length)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

3. Training Loop

In [4]:
def one_hot_encode(sequence, vocab_size):
    one_hot = torch.zeros(len(sequence), vocab_size)
    for idx, char_idx in enumerate(sequence):
        one_hot[idx, char_idx] = 1
    return one_hot


In [5]:
# Model parameters
noise_dim = 100
embedding_dim = 128
hidden_dim = 256
vocab_size = len(char2idx)

# Instantiate the models
G = Generator(noise_dim, vocab_size, hidden_dim).to(device)
D = Discriminator(vocab_size, hidden_dim).to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(G.parameters(), lr=0.0002)
optimizer_D = optim.Adam(D.parameters(), lr=0.0002)

# Training loop
num_epochs = 200
for epoch in range(num_epochs):
    for i, real_passwords in enumerate(dataloader):
        real_passwords = real_passwords.to(device)
        batch_size = real_passwords.size(0)

        # One-hot encode real passwords
        real_passwords_one_hot = torch.stack([one_hot_encode(pwd, vocab_size) for pwd in real_passwords])
        
        # Labels for real and fake passwords
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        # Train the discriminator
        optimizer_D.zero_grad()
        
        # Discriminator loss on real passwords
        outputs = D(real_passwords_one_hot.float())
        d_loss_real = criterion(outputs, real_labels)
        
        # Generate fake passwords
        z = torch.randn(batch_size, noise_dim).to(device)
        fake_passwords = G(z)
        
        # Discriminator loss on fake passwords
        outputs = D(fake_passwords.detach())
        d_loss_fake = criterion(outputs, fake_labels)
        
        # Backprop and optimize
        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        optimizer_D.step()
        
        # Train the generator
        optimizer_G.zero_grad()
        
        # Generator loss
        outputs = D(fake_passwords)
        g_loss = criterion(outputs, real_labels)
        
        # Backprop and optimize
        g_loss.backward()
        optimizer_G.step()
        
    print(f'Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}')

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

4. Generate Passwords

In [None]:
# Generate new passwords
G.eval()
generated_passwords = []
with torch.no_grad():
    for _ in range(100):
        z = torch.randn(1, noise_dim).to(device)
        fake_password = G(z)
        password = ''.join([idx2char[idx] for idx in torch.argmax(fake_password, dim=1).cpu().numpy()])
        generated_passwords.append(password.strip())

# Print generated passwords
for i, password in enumerate(generated_passwords):
    print(f"Password {i+1}: {password}")


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# Custom dataset class
class PasswordDataset(Dataset):
    def __init__(self, passwords, char2idx, max_length):
        self.passwords = passwords
        self.char2idx = char2idx
        self.max_length = max_length

    def __len__(self):
        return len(self.passwords)

    def __getitem__(self, idx):
        password = self.passwords[idx]
        padded_password = self.pad_sequence([self.char2idx[char] for char in password])
        return torch.tensor(padded_password, dtype=torch.long)
    
    def pad_sequence(self, seq):
        padded = [self.char2idx[' ']] * self.max_length
        padded[:len(seq)] = seq
        return padded

# One-hot encode function
def one_hot_encode(sequence, vocab_size):
    one_hot = torch.zeros(len(sequence), vocab_size)
    for idx, char_idx in enumerate(sequence):
        one_hot[idx, char_idx] = 1
    return one_hot

# Generator class
class Generator(nn.Module):
    def __init__(self, noise_dim, vocab_size, max_length):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 256),
            nn.ReLU(),
            nn.Linear(256, vocab_size * max_length),
            nn.Tanh()
        )
        self.vocab_size = vocab_size
        self.max_length = max_length
    
    def forward(self, z):
        gen_output = self.model(z)
        gen_output = gen_output.view(z.size(0), self.max_length, self.vocab_size)
        return gen_output

# Discriminator class
class Discriminator(nn.Module):
    def __init__(self, vocab_size, hidden_dim, max_length):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(vocab_size * max_length, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Flatten the input tensor
        x = x.view(x.size(0), -1)
        return self.model(x)

# Read passwords from a file
def read_passwords(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        passwords = [line.strip() for line in file.readlines()]
    return passwords

# Training parameters
noise_dim = 100
hidden_dim = 256
max_length = 8
batch_size = 128
num_epochs = 16
learning_rate = 0.0002

# Load passwords from the file
file_path = 'D:\\Osama Khalid\\Osama Khalid BSCY 7\\FYP Part 1\\AI Codes\\Filtered_Passwords_Dataset\\length_5\\filtered_words_00000001.txt'  # Replace with your file path
print("Loading passwords...")
passwords = read_passwords(file_path)
print(f"Loaded {len(passwords)} passwords.")

# Create character mappings
char2idx = {char: idx for idx, char in enumerate(sorted(set(''.join(passwords) + ' ')))}
idx2char = {idx: char for char, idx in char2idx.items()}
vocab_size = len(char2idx)
print(f"Vocabulary size: {vocab_size}")

# Create dataset and dataloader
dataset = PasswordDataset(passwords, char2idx, max_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
print("DataLoader created.")

# Initialize models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
G = Generator(noise_dim, vocab_size, max_length).to(device)
D = Discriminator(vocab_size, hidden_dim, max_length).to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(G.parameters(), lr=learning_rate)
optimizer_D = optim.Adam(D.parameters(), lr=learning_rate)

print("Starting training...")
# Training loop
for epoch in range(num_epochs):
    running_d_loss = 0.0
    running_g_loss = 0.0
    with tqdm(dataloader, unit="batch") as tepoch:
        for real_passwords in tepoch:
            tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")

            real_passwords = real_passwords.to(device)
            batch_size = real_passwords.size(0)

            # One-hot encode real passwords
            real_passwords_one_hot = torch.stack([one_hot_encode(pwd, vocab_size) for pwd in real_passwords]).to(device)
            
            # Labels for real and fake passwords
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            # Train the discriminator
            optimizer_D.zero_grad()
            
            # Discriminator loss on real passwords
            outputs = D(real_passwords_one_hot.float())
            d_loss_real = criterion(outputs, real_labels)
            
            # Generate fake passwords
            z = torch.randn(batch_size, noise_dim).to(device)
            fake_passwords = G(z)
            
            # Discriminator loss on fake passwords
            outputs = D(fake_passwords.detach())
            d_loss_fake = criterion(outputs, fake_labels)
            
            # Backprop and optimize
            d_loss = d_loss_real + d_loss_fake
            d_loss.backward()
            optimizer_D.step()
            
            # Train the generator
            optimizer_G.zero_grad()
            
            # Generator loss
            outputs = D(fake_passwords)
            g_loss = criterion(outputs, real_labels)
            
            # Backprop and optimize
            g_loss.backward()
            optimizer_G.step()

            running_d_loss += d_loss.item()
            running_g_loss += g_loss.item()

            tepoch.set_postfix(d_loss=d_loss.item(), g_loss=g_loss.item())

    print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {running_d_loss/len(dataloader):.4f}, g_loss: {running_g_loss/len(dataloader):.4f}")

print("Training finished.")

# Save the models
torch.save(G.state_dict(), 'password_generator.pth')
torch.save(D.state_dict(), 'password_discriminator.pth')
print("Models saved.")




OSError: [WinError 126] The specified module could not be found. Error loading "c:\Users\Osama Khalid\.conda\envs\Testpytroch\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.

CPU CODE Only

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# Custom dataset class
class PasswordDataset(Dataset):
    def __init__(self, passwords, char2idx, max_length):
        self.passwords = passwords
        self.char2idx = char2idx
        self.max_length = max_length

    def __len__(self):
        return len(self.passwords)

    def __getitem__(self, idx):
        password = self.passwords[idx]
        padded_password = self.pad_sequence([self.char2idx[char] for char in password])
        return torch.tensor(padded_password, dtype=torch.long)
    
    def pad_sequence(self, seq):
        padded = [self.char2idx[' ']] * self.max_length
        padded[:len(seq)] = seq
        return padded

# One-hot encode function
def one_hot_encode(sequence, vocab_size):
    one_hot = torch.zeros(len(sequence), vocab_size)
    for idx, char_idx in enumerate(sequence):
        one_hot[idx, char_idx] = 1
    return one_hot

# Generator class
class Generator(nn.Module):
    def __init__(self, noise_dim, vocab_size, max_length):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 256),
            nn.ReLU(),
            nn.Linear(256, vocab_size * max_length),
            nn.Tanh()
        )
        self.vocab_size = vocab_size
        self.max_length = max_length
    
    def forward(self, z):
        gen_output = self.model(z)
        gen_output = gen_output.view(z.size(0), self.max_length, self.vocab_size)
        return gen_output

# Discriminator class
class Discriminator(nn.Module):
    def __init__(self, vocab_size, hidden_dim, max_length):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(vocab_size * max_length, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Flatten the input tensor
        x = x.view(x.size(0), -1)
        return self.model(x)

# Read passwords from a file
def read_passwords(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        passwords = [line.strip() for line in file.readlines()]
    return passwords

# Training parameters
noise_dim = 100
hidden_dim = 256
max_length = 8
batch_size = 64
num_epochs = 100
learning_rate = 0.0002

# Load passwords from the file
file_path = 'filtered_words_5_rockyou.txt'  # Replace with your file path
print("Loading passwords...")
passwords = read_passwords(file_path)
print(f"Loaded {len(passwords)} passwords.")

# Create character mappings
char2idx = {char: idx for idx, char in enumerate(sorted(set(''.join(passwords) + ' ')))}
idx2char = {idx: char for char, idx in char2idx.items()}
vocab_size = len(char2idx)
print(f"Vocabulary size: {vocab_size}")

# Create dataset and dataloader
dataset = PasswordDataset(passwords, char2idx, max_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
print("DataLoader created.")

# Initialize models
device = torch.device('cpu')
G = Generator(noise_dim, vocab_size, max_length).to(device)
D = Discriminator(vocab_size, hidden_dim, max_length).to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(G.parameters(), lr=learning_rate)
optimizer_D = optim.Adam(D.parameters(), lr=learning_rate)

print("Starting training...")
# Training loop
for epoch in range(num_epochs):
    running_d_loss = 0.0
    running_g_loss = 0.0
    with tqdm(dataloader, unit="batch") as tepoch:
        for real_passwords in tepoch:
            tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")

            real_passwords = real_passwords.to(device)
            batch_size = real_passwords.size(0)

            # One-hot encode real passwords
            real_passwords_one_hot = torch.stack([one_hot_encode(pwd, vocab_size) for pwd in real_passwords]).to(device)
            
            # Labels for real and fake passwords
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            # Train the discriminator
            optimizer_D.zero_grad()
            
            # Discriminator loss on real passwords
            outputs = D(real_passwords_one_hot.float())
            d_loss_real = criterion(outputs, real_labels)
            
            # Generate fake passwords
            z = torch.randn(batch_size, noise_dim).to(device)
            fake_passwords = G(z)
            
            # Discriminator loss on fake passwords
            outputs = D(fake_passwords.detach())
            d_loss_fake = criterion(outputs, fake_labels)
            
            # Backprop and optimize
            d_loss = d_loss_real + d_loss_fake
            d_loss.backward()
            optimizer_D.step()
            
            # Train the generator
            optimizer_G.zero_grad()
            
            # Generator loss
            outputs = D(fake_passwords)
            g_loss = criterion(outputs, real_labels)
            
            # Backprop and optimize
            g_loss.backward()
            optimizer_G.step()

            running_d_loss += d_loss.item()
            running_g_loss += g_loss.item()

            tepoch.set_postfix(d_loss=d_loss.item(), g_loss=g_loss.item())

    print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {running_d_loss/len(dataloader):.4f}, g_loss: {running_g_loss/len(dataloader):.4f}")

print("Training finished.")

# Save the models
torch.save(G.state_dict(), 'password_generator_cpu.pth')
torch.save(D.state_dict(), 'password_discriminator_cpu.pth')
print("Models saved.")


In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Define the Generator class
class Generator(nn.Module):
    def __init__(self, noise_dim, vocab_size, max_length):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 256),
            nn.ReLU(),
            nn.Linear(256, vocab_size * max_length),
            nn.Tanh()
        )
        self.vocab_size = vocab_size
        self.max_length = max_length
    
    def forward(self, z):
        gen_output = self.model(z)
        gen_output = gen_output.view(z.size(0), self.max_length, self.vocab_size)
        return gen_output

# Define the Discriminator class
class Discriminator(nn.Module):
    def __init__(self, vocab_size, hidden_dim, max_length):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(vocab_size * max_length, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Flatten the input tensor
        x = x.view(x.size(0), -1)
        return self.model(x)

# Load the saved generator and discriminator models
generator = Generator(noise_dim=100, vocab_size=vocab_size, max_length=max_length)
generator.load_state_dict(torch.load('password_generator.pth'))
generator.eval()

discriminator = Discriminator(vocab_size=vocab_size, hidden_dim=hidden_dim, max_length=max_length)
discriminator.load_state_dict(torch.load('password_discriminator.pth'))
discriminator.eval()

# Generate 100 passwords using the generator
num_passwords = 100000
noise = torch.randn(num_passwords, noise_dim)
with torch.no_grad():
    generated_passwords = generator(noise).argmax(dim=-1)

# Decode the generated passwords
generated_passwords_decoded = []
for password in generated_passwords:
    password_decoded = ''.join([idx2char[idx.item()] for idx in password if idx.item() != char2idx[' ']])
    generated_passwords_decoded.append(password_decoded)

# Print the generated passwords
for i, password in enumerate(generated_passwords_decoded, start=1):
    print(f"Password {i}: {password}")



NameError: name 'vocab_size' is not defined