In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds
print("tensorflow version: ", tf.__version__)

# Name of dataset in tensorflow datasets
DATASET_NAME="rock_you"

2021-10-14 11:50:54.129498: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-14 11:50:54.129517: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


tensorflow version:  2.6.0


Build Networks models based on the architecture in the paper


In [4]:
# create a class Residual block based on Residual Networks definition
class ResidualBlock(tf.keras.Model):
    def __init__(self, dim):
        super().__init__()
        self.res_block = tf.keras.Sequential([
            tf.keras.layers.ReLU(True),
            tf.keras.layers.Conv1D(dim, dim, 5, padding='same'),
            tf.keras.layers.ReLU(True),
            tf.keras.layers.Conv1D(dim, dim, 5, padding='same'),
        ])

    def call(self, input_data, **kwargs):
        output = self.res_block(input_data)
        return input_data + (0.3 * output)

# create a class Generator Network
class GeneratorNetwork(tf.keras.Model):
    def __init__(self, dim, pass_length):
        self.dim = dim
        self.pass_length = pass_length

        # instantiate a Sequential Model
        self.generator_res_block_model = tf.keras.models.Sequential()

        # first linear layer
        self.first_linear_layer = tf.keras.layers.Dense(128, activation='linear',
                                                        input_shape=(dim*pass_length, ))

        # residual blocks in a sequential order
        self.generator_res_block_model.add(ResidualBlock(dim=5))
        self.generator_res_block_model.add(ResidualBlock(dim=5))
        self.generator_res_block_model.add(ResidualBlock(dim=5))
        self.generator_res_block_model.add(ResidualBlock(dim=5))
        self.generator_res_block_model.add(ResidualBlock(dim=5))

        # convolutional 1D layer
        self.conv_1d_layer = tf.keras.layers.Conv1D(64, 32, 1, padding='valid')

        # last soft max layer
        self.softmax_layer = tf.keras.layers.Softmax(axis=1)

    def call(self, input_noise, **kwargs):
        """

        :param input_noise: noise input of some sample generated passwords
        :param kwargs:
        :return: the generated passwords for an iteration
        """

        # feed first layer with noise data
        output = self.first_linear_layer(input_noise)

        # reshape the result of linear layer
        output = tf.reshape(output, (-1, 2, 128))

        # feed residual blocks by output from reshape stage
        output = self.generator_res_block_model(output)
        output = tf.reshape(output, (1, 32, 8))

        # feed resulted data to convolutional layer
        output = self.conv_1d_layer(output)

        # transpose operation on the resulted output
        output = tf.transpose(output)

        # feed softmax layer with transposed output
        output = self.softmax_layer(output)
        output = tf.reshape(output, [2, 1, 32])

        return output

# create discriminator network
class DiscriminatorNetwork(tf.keras.Model):
    def __init__(self, dim, pass_length):
        self.dim = dim
        self.pass_length = pass_length

        self.block = tf.keras.Sequential([
            ResidualBlock(dim),
            ResidualBlock(dim),
            ResidualBlock(dim),
            ResidualBlock(dim),
            ResidualBlock(dim),
        ])
        self.conv1d = tf.keras.layers.Conv1D(dim, 32, 1, padding='valid')
        self.linear = tf.keras.layers.Dense(pass_length * dim, activation='linear')

    def call(self, input_data, **kwargs):
        output = tf.transpose(input_data, [0, 2, 1])
        output = self.conv1d(output)
        output = self.block(output)
        output = tf.reshape(output, (-1, 64, 4))
        output = self.linear(output)
        return output


In [29]:
def load_and_cache_dataset():
    dataset, dataset_info = tfds.load(name=DATASET_NAME,
                                  split='train[:75%]',
                                  with_info=True)
    print("---------------- DATASET INFO ---------------")
    print(dataset_info)
    dataset = dataset.shuffle(100000, reshuffle_each_iteration=True)

    # Cache dataset for future use
    dataset = dataset.cache()
    return dataset

In [33]:
ds = load_and_cache_dataset()
print(ds.options)
for data in tfds.as_numpy(ds):
    # print(list(data.keys()))
    print(data.get("password").decode("utf-8"))

---------------- DATASET INFO ---------------
tfds.core.DatasetInfo(
    name='rock_you',
    full_name='rock_you/1.0.0',
    description="""
    This dataset contains 14,344,391 passwords that were leaked or stolen from
    various sites. The author of this dataset states that "I'm hosting them because
    it seems like nobody else does (hopefully it isn't because hosting them is
    illegal :)). Naturally, I'm not the one who stole these; I simply found them
    online, removed any names/email addresses/etc.".
    
    This dataset is used to train Machine Learning models for password guessing
    and cracking.
    """,
    homepage='https://wiki.skullsecurity.org/Passwords',
    data_path='/home/hafez/tensorflow_datasets/rock_you/1.0.0',
    download_size=133.44 MiB,
    dataset_size=393.36 MiB,
    features=FeaturesDict({
        'password': Text(shape=(), dtype=tf.string),
    }),
    supervised_keys=None,
    disable_shuffling=False,
    splits={
        'train': <SplitInfo num_e

2021-10-14 12:08:25.207850: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


KeyboardInterrupt: 

In [None]:
def training(epochs=2, batch_size=128, save_interval=500):
    # Load the rock you dataset
    ds = load_and_cache_dataset()

