# Training PassGAN

This Jupyter notebook is mainly for debugging, has same functionality to "train.py". However, the parameters are configured small.

## Dependencies

In [1]:
import os, sys
sys.path.append(os.getcwd())

import time
import pickle
import numpy as np
import tensorflow as tf

import utils
import tflib as lib
import tflib.ops.linear
import tflib.ops.con1
import tflib.plot
import mod1


Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
# TensorFlow virsion
print(tf.__version__)

2.16.1


## Parameters

To simplify migration process between Jupyter Notebook and .py format, we created virtual ArgumentParser class.

In [3]:
# Class to virtualize ArgumentParser
class VirtualArgparse:
    
    # Path to dataset
    training_data = "data/text.txt"
    
    # Name of directory to output
    output_dir = "pretrained"
    
    save_every = 10   #5000
    iters = 100   #200000
    batch_size = 64
    seq_length = 10
    layer_dim = 128
    critic_iters = 10
    lamb = 10

In [4]:
# Virtualize ArgumentParser instance
args = VirtualArgparse

## Creating Directories

In [5]:
if not os.path.isdir(args.output_dir):
    os.makedirs(args.output_dir)

if not os.path.isdir(os.path.join(args.output_dir, 'checkpoints')):
    os.makedirs(os.path.join(args.output_dir, 'checkpoints'))

if not os.path.isdir(os.path.join(args.output_dir, 'samples')):
    os.makedirs(os.path.join(args.output_dir, 'samples'))

## Importing Dataset

In [6]:
lines, charmap, inv_charmap = utils.load_dataset(
    path=args.training_data,
    max_length=args.seq_length)

loaded 120 lines in dataset


## Creating Dictionary

In [7]:
# Pickle to avoid encoding errors with json
with open(os.path.join(args.output_dir, 'charmap.pickle'), 'wb') as f:
    pickle.dump(charmap, f)

with open(os.path.join(args.output_dir, 'charmap_inv.pickle'), 'wb') as f:
    pickle.dump(inv_charmap, f)
print(charmap)
print("Number of unique characters in dataset: {}".format(len(charmap)))

{'unk': 0, '`': 1, 'f': 2, 'd': 3, 'g': 4, 's': 5, 'h': 6, '4': 7, 'e': 8, '5': 9, 'l': 10, 'y': 11, '.': 12, 'i': 13, 'o': 14, '3': 15}
Number of unique characters in dataset: 16


## Modeling Generator

In [8]:
real_inputs_discrete = tf.random.uniform(shape=[args.batch_size, args.seq_length], minval=0, maxval=len(charmap), dtype=tf.int32)
real_inputs = tf.one_hot(real_inputs_discrete, 96)

In [9]:
print(real_inputs_discrete)
print(real_inputs)

Tensor("random_uniform:0", shape=(64, 10), dtype=int32)
Tensor("one_hot:0", shape=(64, 10, 96), dtype=float32)


In [10]:
fake_inputs = mod1.Generator(args.batch_size, args.seq_length, args.layer_dim, len(charmap))


Linear layer: Generator.Input, Input dim: 128, Output dim: 1280
Weight values shape: (128, 1280), dtype: float32
Creating new parameter: Generator.Input.W, Value shape: (128, 1280), dtype: float32
Created weight: <tf.Variable 'Generator.Input/Generator.Input.W:0' shape=(128, 1280) dtype=float32_ref>
Creating new parameter: Generator.Input.b, Value shape: (1280,), dtype: float32
Result shape: (64, 1280), dtype: <dtype: 'float32'>


In [11]:
fake_inputs_discrete = tf.argmax(fake_inputs, fake_inputs.get_shape().ndims-1)

## Modeling Discriminator

In [12]:
disc_real = mod1.Discriminator(real_inputs, args.seq_length, args.layer_dim, len(charmap))
disc_fake = mod1.Discriminator(fake_inputs, args.seq_length, args.layer_dim, len(charmap))

disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
gen_cost = -tf.reduce_mean(disc_fake)

ValueError: Depth of output (128) is not a multiple of the number of groups (6) for '{{node Discriminator.Input/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](Discriminator.Input/conv1d/ExpandDims, Discriminator.Input/conv1d/ExpandDims_1)' with input shapes: [64,1,10,96], [1,1,16,128].

In [34]:
# WGAN lipschitz-penalty
alpha = tf.random.uniform(
    shape=[args.batch_size, 1, 1],
    minval=0.0,
    maxval=1.0
)

differences = fake_inputs - real_inputs
interpolates = real_inputs + (alpha * differences)
gradients = tf.gradients(mod1.Discriminator(interpolates, args.seq_length, args.layer_dim, len(charmap)), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2]))
gradient_penalty = tf.reduce_mean((slopes - 1.0) ** 2)
disc_cost += args.lamb * gradient_penalty

gen_params = lib.params_with_name('Generator')
disc_params = lib.params_with_name('Discriminator')


# ... rest of your code ...

# gen_train_op = tf.keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9).minimize(gen_cost, var_list=gen_params)
# disc_train_op = tf.keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9).minimize(disc_cost, var_list=disc_params)
gen_train_op = tf.keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
disc_train_op = tf.keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
gen_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9)
disc_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9)


Linear layer: Discriminator.Output, Input dim: 1280, Output dim: 1
Weight values shape: (1280, 1), dtype: float32
Created weight: <tf.Variable 'Discriminator.Output/Discriminator.Output.W:0' shape=(1280, 1) dtype=float32_ref>
Result shape: (64, 1), dtype: <dtype: 'float32'>


In [35]:
# Dataset iterator
def inf_train_gen():
    while True:
        np.random.shuffle(lines)
        for i in range(0, len(lines)-args.batch_size+1, args.batch_size):
            yield np.array(
                [[charmap[c] for c in l] for l in lines[i:i+args.batch_size]],
                dtype='int32'
            )

In [36]:
# During training we monitor JS divergence between the true & generated ngram
# distributions for n=1,2,3,4. To get an idea of the optimal values, we
# evaluate these statistics on a held-out set first.
true_char_ngram_lms = [utils.NgramLanguageModel(i+1, lines[10*args.batch_size:], tokenize=False) for i in range(4)]
validation_char_ngram_lms = [utils.NgramLanguageModel(i+1, lines[:10*args.batch_size], tokenize=False) for i in range(4)]
for i in range(4):
    print("validation set JSD for n={}: {}".format(i+1, true_char_ngram_lms[i].js_with(validation_char_ngram_lms[i])))
true_char_ngram_lms = [utils.NgramLanguageModel(i+1, lines, tokenize=False) for i in range(4)]

validation set JSD for n=1: 0.5
validation set JSD for n=2: 0.49999999999999994
validation set JSD for n=3: 0.5000000000000001
validation set JSD for n=4: 0.5000000000000001


## TensorFlow Session

In [123]:
import tensorflow as tf
import numpy as np
import time
import os

# Assuming args, fake_inputs, real_inputs, mod1, charmap, inv_charmap, inf_train_gen, compute_gen_cost, compute_disc_cost, utils, true_char_ngram_lms, and lib are already defined

# Define WGAN lipschitz-penalty
alpha = tf.random.uniform(shape=[args.batch_size, 1, 1], minval=0.0, maxval=1.0)
differences = fake_inputs - real_inputs
interpolates = real_inputs + (alpha * differences)
with tf.GradientTape() as tape:
    tape.watch(interpolates)
    d_interpolates = mod1.Discriminator(interpolates, args.seq_length, args.layer_dim, len(charmap))
gradients = tape.gradient(d_interpolates, [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2]))
gradient_penalty = tf.reduce_mean((slopes - 1.0) ** 2)
disc_cost += args.lamb * gradient_penalty

# Assume these functions return a cost to minimize


# Get parameters for generator and discriminator
gen_params = lib.params_with_name('Generator')
disc_params = lib.params_with_name('Discriminator')

# Define optimizers
gen_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9)
disc_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9)

# Define training operations
gen_train_op = gen_optimizer.minimize(gen_cost, var_list=gen_params)
disc_train_op = disc_optimizer.minimize(disc_cost, var_list=disc_params)

# Create the TensorFlow session
with tf.compat.v1.Session() as session:
    
    # Time stamp
    localtime = time.asctime(time.localtime(time.time()))
    print("Starting TensorFlow session...")
    print("Local current time :", localtime)
    
    # Initialize variables
    session.run(tf.compat.v1.global_variables_initializer())

    def generate_samples():
        samples = session.run(fake_inputs)
        samples = np.argmax(samples, axis=2)
        decoded_samples = []
        for i in range(len(samples)):
            decoded = []
            for j in range(len(samples[i])):
                decoded.append(inv_charmap[samples[i][j]])
            decoded_samples.append(tuple(decoded))
        return decoded_samples

    gen = inf_train_gen()

    for iteration in range(args.iters + 1):
        start_time = time.time()

        # Train Generator
        if iteration > 0:
            session.run(gen_train_op)

        # Train Discriminator
        for i in range(args.critic_iters):
            _data = next(gen)

            # Debugging steps
            print(f"Iteration: {iteration}, Critic Iteration: {i}")
            print(f"_data shape: {_data.shape}")
            print(f"real_inputs_discrete expected shape: {real_inputs_discrete.shape}")

            try:
                _disc_cost, _ = session.run(
                    [disc_cost, disc_train_op],
                    feed_dict={real_inputs_discrete: _data}
                )
            except Exception as e:
                print(f"Error during session.run: {e}")
                break

        lib.plot.output_dir = args.output_dir
        lib.plot.plot('time', time.time() - start_time)
        lib.plot.plot('train disc cost', _disc_cost)

        # Output to text file after every 100 samples
        if iteration % 100 == 0 and iteration > 0:
            samples = []
            for _ in range(10):
                samples.extend(generate_samples())

            for i in range(4):
                lm = utils.NgramLanguageModel(i+1, samples, tokenize=False)
                lib.plot.plot(f'js{i+1}', lm.js_with(true_char_ngram_lms[i]))

            os.makedirs(os.path.join(args.output_dir, 'samples'), exist_ok=True)
            with open(os.path.join(args.output_dir, 'samples', f'samples_{iteration}.txt'), 'w') as f:
                for s in samples:
                    s = "".join(s)
                    f.write(s + "\n")

        if iteration % args.save_every == 0 and iteration > 0:
            os.makedirs(os.path.join(args.output_dir, 'checkpoints'), exist_ok=True)
            model_saver = tf.compat.v1.train.Saver()
            model_saver.save(session, os.path.join(args.output_dir, 'checkpoints', f'checkpoint_{iteration}.ckpt'))
            print(f"{iteration} / {args.iters} ({iteration / args.iters * 100.0:.2f}%)")

        if iteration == args.iters:
            print("...Training done.")

        # Uncomment the following lines if you have the lib.plot.flush() and lib.plot.tick() functions available
        # if iteration % 100 == 0:
        #     lib.plot.flush()

        # lib.plot.tick()

# Time stamp
localtime = time.asctime(time.localtime(time.time()))
print("Ending TensorFlow session.")
print("Local current time :", localtime)


Linear layer: Discriminator.Output, Input dim: 1280, Output dim: 1
Weight values shape: (1280, 1), dtype: float32
Created weight: <tf.Variable 'Discriminator.Output/Discriminator.Output.W:0' shape=(1280, 1) dtype=float32_ref>
Result shape: (64, 1), dtype: <dtype: 'float32'>
Starting TensorFlow session...
Local current time : Tue May 28 19:08:54 2024
Iteration: 0, Critic Iteration: 0
_data shape: (64, 10)
real_inputs_discrete expected shape: (64, 10)
Iteration: 0, Critic Iteration: 1
_data shape: (64, 10)
real_inputs_discrete expected shape: (64, 10)
Iteration: 0, Critic Iteration: 2
_data shape: (64, 10)
real_inputs_discrete expected shape: (64, 10)
Iteration: 0, Critic Iteration: 3
_data shape: (64, 10)
real_inputs_discrete expected shape: (64, 10)
Iteration: 0, Critic Iteration: 4
_data shape: (64, 10)
real_inputs_discrete expected shape: (64, 10)
Iteration: 0, Critic Iteration: 5
_data shape: (64, 10)
real_inputs_discrete expected shape: (64, 10)
Iteration: 0, Critic Iteration: 6
_d

In [122]:
with tf.compat.v1.Session() as session:

    # Time stamp
    localtime = time.asctime( time.localtime(time.time()) )
    print("Starting TensorFlow session...")
    print("Local current time :", localtime)
    
    # Start TensorFlow session...
    session.run(tf.compat.v1.global_variables_initializer())

    def generate_samples():
        samples = session.run(fake_inputs)
        samples = np.argmax(samples, axis=2)
        decoded_samples = []
        for i in range(len(samples)):
            decoded = []
            for j in range(len(samples[i])):
                decoded.append(inv_charmap[samples[i][j]])
            decoded_samples.append(tuple(decoded))
        return decoded_samples

    gen = inf_train_gen()
    for iteration in range(args.iters + 1):
        start_time = time.time()

        # Train Generator
        if iteration > 0:
            print('gen_train_op')
            _ = session.run(gen_train_op)

        # Train Discriminator
        for i in range(args.critic_iters):
            _data = next(gen)
            _disc_cost, _ = session.run([disc_cost, disc_train_op],feed_dict={real_inputs_discrete:_data})
            

        lib.plot.output_dir = args.output_dir
        lib.plot.plot('time', time.time() - start_time)
        lib.plot.plot('train disc cost', _disc_cost)

        # Output to text file after every 100 samples
        if iteration % 100 == 0 and iteration > 0:

            samples = []
            for i in range(10):
                samples.extend(generate_samples())

            for i in range(4):
                lm = utils.NgramLanguageModel(i+1, samples, tokenize=False)
                lib.plot.plot('js{}'.format(i+1), lm.js_with(true_char_ngram_lms[i]))

            with open(os.path.join(args.output_dir, 'samples', 'samples_{}.txt').format(iteration), 'w') as f:
                for s in samples:
                    s = "".join(s)
                    f.write(s + "\n")

        if iteration % args.save_every == 0 and iteration > 0:
            model_saver = tf.train.Saver()
            model_saver.save(session, os.path.join(args.output_dir, 'checkpoints', 'checkpoint_{}.ckpt').format(iteration))
            print("{} / {} ({}%)".format(iteration, args.iters, iteration/args.iters*100.0 ))

        if iteration == args.iters:
            print("...Training done.")
        
        #if iteration % 100 == 0:
            #lib.plot.flush()

        #lib.plot.tick()
        
# Time stamp
localtime = time.asctime( time.localtime(time.time()) )
print("Ending TensorFlow session.")
print("Local current time :", localtime)

Starting TensorFlow session...
Local current time : Tue May 28 19:06:58 2024


TypeError: Argument `fetch` = <keras.src.optimizers.adam.Adam object at 0x000001C20EA8E9D0> has invalid type "Adam" must be a string or Tensor. (Can not convert a Adam into a Tensor or Operation.)