In [None]:
!pip install tensorflow
!pip install keras
!pip install mido
!pip install pydub
!pip install resampy

In [None]:
# Step 1

import os
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np

import helper
from glob import glob
import pickle as pkl
import scipy.misc
from PIL import Image

import time

import cv2
import matplotlib.pyplot as plt
%matplotlib inline

do_preprocess = True
from_checkpoint = False

In [None]:
# Step 2
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Step 3
data_dir = '/content/drive/MyDrive/photos2' # Data

#Step 4
data_resized_dir = "/content/drive/MyDrive/resized_data"# Resized data

if do_preprocess == True:
    if not os.path.isdir(data_resized_dir):
        os.mkdir(data_resized_dir)

    for each in os.listdir(data_dir):
        try:
            image = cv2.imread(os.path.join(data_dir, each))
            image = cv2.resize(image, (128, 128))
            cv2.imwrite(os.path.join(data_resized_dir, each), image)
        except Exception as e:
            print(str(e))


In [None]:
# Taken from Udacity Face generator Project
import math
import os
import hashlib
from urllib.request import urlretrieve
import zipfile
import gzip
import shutil

import numpy as np
from PIL import Image



def _read32(bytestream):
    """
    Read 32-bit integer from bytesteam
    :param bytestream: A bytestream
    :return: 32-bit integer
    """
    dt = np.dtype(np.uint32).newbyteorder('>')
    return np.frombuffer(bytestream.read(4), dtype=dt)[0]


def _unzip(save_path, _, database_name, data_path):
    """
    Unzip wrapper with the same interface as _ungzip
    :param save_path: The path of the gzip files
    :param database_name: Name of database
    :param data_path: Path to extract to
    :param _: HACK - Used to have to same interface as _ungzip
    """
    print('Extracting {}...'.format(database_name))
    with zipfile.ZipFile(save_path) as zf:
        zf.extractall(data_path)


def _ungzip(save_path, extract_path, database_name, _):
    """
    Unzip a gzip file and extract it to extract_path
    :param save_path: The path of the gzip files
    :param extract_path: The location to extract the data to
    :param database_name: Name of database
    :param _: HACK - Used to have to same interface as _unzip
    """
    # Get data from save_path
    with open(save_path, 'rb') as f:
        with gzip.GzipFile(fileobj=f) as bytestream:
            magic = _read32(bytestream)
            if magic != 2051:
                raise ValueError('Invalid magic number {} in file: {}'.format(magic, f.name))
            num_images = _read32(bytestream)
            rows = _read32(bytestream)
            cols = _read32(bytestream)
            buf = bytestream.read(rows * cols * num_images)
            data = np.frombuffer(buf, dtype=np.uint8)
            data = data.reshape(num_images, rows, cols)

    # Save data to extract_path
    for image_i, image in enumerate(
            tqdm(data, unit='File', unit_scale=True, miniters=1, desc='Extracting {}'.format(database_name))):
        Image.fromarray(image, 'L').save(os.path.join(extract_path, 'image_{}.jpg'.format(image_i)))


def get_image(image_path, width, height, mode):
    """
    Read image from image_path
    :param image_path: Path of image
    :param width: Width of image
    :param height: Height of image
    :param mode: Mode of image
    :return: Image data
    """
    image = Image.open(image_path)


    return np.array(image.convert(mode))


def get_batch(image_files, width, height, mode):
    data_batch = np.array(
        [get_image(sample_file, width, height, mode) for sample_file in image_files]).astype(np.float32)

    # Make sure the images are in 4 dimensions
    if len(data_batch.shape) < 4:
        data_batch = data_batch.reshape(data_batch.shape + (1,))

    return data_batch


def images_square_grid(images, mode):
    """
    Save images as a square grid
    :param images: Images to be used for the grid
    :param mode: The mode to use for images
    :return: Image of images in a square grid
    """
    # Get maximum size for square grid of images
    save_size = math.floor(np.sqrt(images.shape[0]))

    # Scale to 0-255
    images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8)

    # Put images in a square arrangement
    images_in_square = np.reshape(
            images[:save_size*save_size],
            (save_size, save_size, images.shape[1], images.shape[2], images.shape[3]))
    if mode == 'L':
        images_in_square = np.squeeze(images_in_square, 4)

    # Combine images to grid image
    new_im = Image.new(mode, (images.shape[1] * save_size, images.shape[2] * save_size))
    for col_i, col_images in enumerate(images_in_square):
        for image_i, image in enumerate(col_images):
            im = Image.fromarray(image, mode)
            new_im.paste(im, (col_i * images.shape[1], image_i * images.shape[2]))

    return new_im


def download_extract(database_name, data_path):
    """
    Download and extract database
    :param database_name: Database name
    """
    DATASET_CELEBA_NAME = 'celeba'
    DATASET_MNIST_NAME = 'mnist'

    if database_name == DATASET_CELEBA_NAME:
        url = 'https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/celeba.zip'
        hash_code = '00d2c5bc6d35e252742224ab0c1e8fcb'
        extract_path = os.path.join(data_path, 'img_align_celeba')
        save_path = os.path.join(data_path, 'celeba.zip')
        extract_fn = _unzip
    elif database_name == DATASET_MNIST_NAME:
        url = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
        hash_code = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
        extract_path = os.path.join(data_path, 'mnist')
        save_path = os.path.join(data_path, 'train-images-idx3-ubyte.gz')
        extract_fn = _ungzip

    if os.path.exists(extract_path):
        print('Found {} Data'.format(database_name))
        return

    if not os.path.exists(data_path):
        os.makedirs(data_path)

    if not os.path.exists(save_path):
        with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Downloading {}'.format(database_name)) as pbar:
            urlretrieve(
                url,
                save_path,
                pbar.hook)

    assert hashlib.md5(open(save_path, 'rb').read()).hexdigest() == hash_code, \
        '{} file is corrupted.  Remove the file and try again.'.format(save_path)

    os.makedirs(extract_path)
    try:
        extract_fn(save_path, extract_path, database_name, data_path)
    except Exception as err:
        shutil.rmtree(extract_path)  # Remove extraction folder if there is an error
        raise err

    # Remove compressed data
    os.remove(save_path)


class Dataset(object):
    """
    Dataset
    """
    def __init__(self, data_files):
        """
        Initalize the class
        :param dataset_name: Database name
        :param data_files: List of files in the database
        """
        IMAGE_WIDTH = 128
        IMAGE_HEIGHT = 128

        self.image_mode = 'RGB'
        image_channels = 3

        self.data_files = data_files
        self.shape = len(data_files), IMAGE_WIDTH, IMAGE_HEIGHT, image_channels

    def get_batches(self, batch_size):
        """
        Generate batches
        :param batch_size: Batch Size
        :return: Batches of data
        """
        IMAGE_MAX_VALUE = 255

        current_index = 0
        while current_index + batch_size <= self.shape[0]:
            data_batch = get_batch(
                self.data_files[current_index:current_index + batch_size],
                *self.shape[1:3],
                self.image_mode)

            current_index += batch_size

            yield data_batch / IMAGE_MAX_VALUE - 0.5

In [None]:
resized_data_filenames = [data_resized_dir+'/'+i for i in os.listdir(data_resized_dir)]
show_n_images = 9
train_images = get_batch(resized_data_filenames[:show_n_images], 64, 64, 'RGB')
plt.imshow(images_square_grid(train_images, 'RGB'))

In [None]:
# Step 7
def model_inputs(real_dim, z_dim):
    """t
    Create he model inputs
    :param real_dim: tuple containing width, height and channels
    :param z_dim: The dimension of Z
    :return: Tuple of (tensor of real input images, tensor of z data, learning rate G, learning rate D)
    """
    inputs_real = tf.placeholder(tf.float32, (None, *real_dim), name='inputs_real')
    inputs_z = tf.placeholder(tf.float32, (None, z_dim), name="input_z")
    learning_rate_G = tf.placeholder(tf.float32, name="learning_rate_G")
    learning_rate_D = tf.placeholder(tf.float32, name="learning_rate_D")

    return inputs_real, inputs_z, learning_rate_G, learning_rate_D



In [None]:
# Step 8
def generator(z, output_channel_dim, is_train=True):
    ''' Build the generator network.

        Arguments
        ---------
        z : Input tensor for the generator
        output_channel_dim : Shape of the generator output
        n_units : Number of units in hidden layer
        reuse : Reuse the variables with tf.variable_scope
        alpha : leak parameter for leaky ReLU

        Returns
        -------
        out:
    '''
    with tf.variable_scope("generator", reuse= not is_train):

        # First FC layer --> 8x8x1024
        fc1 = tf.layers.dense(z, 8*8*1024)

        # Reshape it
        fc1 = tf.reshape(fc1, (-1, 8, 8, 1024))

        # Leaky ReLU
        fc1 = tf.nn.leaky_relu(fc1, alpha=alpha)


        # Transposed conv 1 --> BatchNorm --> LeakyReLU
        # 8x8x1024 --> 16x16x512
        trans_conv1 = tf.layers.conv2d_transpose(inputs = fc1,
                                  filters = 512,
                                  kernel_size = [5,5],
                                  strides = [2,2],
                                  padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name="trans_conv1")

        batch_trans_conv1 = tf.layers.batch_normalization(inputs = trans_conv1, training=is_train, epsilon=1e-5, name="batch_trans_conv1")

        trans_conv1_out = tf.nn.leaky_relu(batch_trans_conv1, alpha=alpha, name="trans_conv1_out")


        # Transposed conv 2 --> BatchNorm --> LeakyReLU
        # 16x16x512 --> 32x32x256
        trans_conv2 = tf.layers.conv2d_transpose(inputs = trans_conv1_out,
                                  filters = 256,
                                  kernel_size = [5,5],
                                  strides = [2,2],
                                  padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name="trans_conv2")

        batch_trans_conv2 = tf.layers.batch_normalization(inputs = trans_conv2, training=is_train, epsilon=1e-5, name="batch_trans_conv2")

        trans_conv2_out = tf.nn.leaky_relu(batch_trans_conv2, alpha=alpha, name="trans_conv2_out")


        # Transposed conv 3 --> BatchNorm --> LeakyReLU
        # 32x32x256 --> 64x64x128
        trans_conv3 = tf.layers.conv2d_transpose(inputs = trans_conv2_out,
                                  filters = 128,
                                  kernel_size = [5,5],
                                  strides = [2,2],
                                  padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name="trans_conv3")

        batch_trans_conv3 = tf.layers.batch_normalization(inputs = trans_conv3, training=is_train, epsilon=1e-5, name="batch_trans_conv3")

        trans_conv3_out = tf.nn.leaky_relu(batch_trans_conv3, alpha=alpha, name="trans_conv3_out")


        # Transposed conv 4 --> BatchNorm --> LeakyReLU
        # 64x64x128 --> 128x128x64
        trans_conv4 = tf.layers.conv2d_transpose(inputs = trans_conv3_out,
                                  filters = 64,
                                  kernel_size = [5,5],
                                  strides = [2,2],
                                  padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name="trans_conv4")

        batch_trans_conv4 = tf.layers.batch_normalization(inputs = trans_conv4, training=is_train, epsilon=1e-5, name="batch_trans_conv4")

        trans_conv4_out = tf.nn.leaky_relu(batch_trans_conv4, alpha=alpha, name="trans_conv4_out")


        # Transposed conv 5 --> tanh
        # 128x128x64 --> 128x128x3
        logits = tf.layers.conv2d_transpose(inputs = trans_conv4_out,
                                  filters = 3,
                                  kernel_size = [5,5],
                                  strides = [1,1],
                                  padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name="logits")

        out = tf.tanh(logits, name="out")

        return out



In [None]:
# Step 9

def discriminator(x, is_reuse=False, alpha = 0.2):
    ''' Build the discriminator network.

        Arguments
        ---------
        x : Input tensor for the discriminator
        n_units: Number of units in hidden layer
        reuse : Reuse the variables with tf.variable_scope
        alpha : leak parameter for leaky ReLU

        Returns
        -------
        out, logits:
    '''
    with tf.variable_scope("discriminator", reuse = is_reuse):

        # Input layer 128*128*3 --> 64x64x64
        # Conv --> BatchNorm --> LeakyReLU
        conv1 = tf.layers.conv2d(inputs = x,
                                filters = 64,
                                kernel_size = [5,5],
                                strides = [2,2],
                                padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name='conv1')

        batch_norm1 = tf.layers.batch_normalization(conv1,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm1')

        conv1_out = tf.nn.leaky_relu(batch_norm1, alpha=alpha, name="conv1_out")


        # 64x64x64--> 32x32x128
        # Conv --> BatchNorm --> LeakyReLU
        conv2 = tf.layers.conv2d(inputs = conv1_out,
                                filters = 128,
                                kernel_size = [5, 5],
                                strides = [2, 2],
                                padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name='conv2')

        batch_norm2 = tf.layers.batch_normalization(conv2,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm2')

        conv2_out = tf.nn.leaky_relu(batch_norm2, alpha=alpha, name="conv2_out")



        # 32x32x128 --> 16x16x256
        # Conv --> BatchNorm --> LeakyReLU
        conv3 = tf.layers.conv2d(inputs = conv2_out,
                                filters = 256,
                                kernel_size = [5, 5],
                                strides = [2, 2],
                                padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name='conv3')

        batch_norm3 = tf.layers.batch_normalization(conv3,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                name = 'batch_norm3')

        conv3_out = tf.nn.leaky_relu(batch_norm3, alpha=alpha, name="conv3_out")



        # 16x16x256 --> 16x16x512
        # Conv --> BatchNorm --> LeakyReLU
        conv4 = tf.layers.conv2d(inputs = conv3_out,
                                filters = 512,
                                kernel_size = [5, 5],
                                strides = [1, 1],
                                padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name='conv4')

        batch_norm4 = tf.layers.batch_normalization(conv4,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                name = 'batch_norm4')

        conv4_out = tf.nn.leaky_relu(batch_norm4, alpha=alpha, name="conv4_out")



        # 16x16x512 --> 8x8x1024
        # Conv --> BatchNorm --> LeakyReLU
        conv5 = tf.layers.conv2d(inputs = conv4_out,
                                filters = 1024,
                                kernel_size = [5, 5],
                                strides = [2, 2],
                                padding = "SAME",
                                kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                name='conv5')

        batch_norm5 = tf.layers.batch_normalization(conv5,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                name = 'batch_norm5')

        conv5_out = tf.nn.leaky_relu(batch_norm5, alpha=alpha, name="conv5_out")


        # Flatten it
        flatten = tf.reshape(conv5_out, (-1, 8*8*1024))

        # Logits
        logits = tf.layers.dense(inputs = flatten,
                                units = 1,
                                activation = None)


        out = tf.sigmoid(logits)

        return out, logits



In [None]:
# Step 10
def model_loss(input_real, input_z, output_channel_dim, alpha):
    """
    Get the loss for the discriminator and generator
    :param input_real: Images from the real dataset
    :param input_z: Z input
    :param out_channel_dim: The number of channels in the output image
    :return: A tuple of (discriminator loss, generator loss)
    """
    # Generator network 
    g_model = generator(input_z, output_channel_dim)
    # g_model is the generator output

    # Discriminator network 
    d_model_real, d_logits_real = discriminator(input_real, alpha=alpha)
    d_model_fake, d_logits_fake = discriminator(g_model,is_reuse=True, alpha=alpha)

    # Calculate losses
    d_loss_real = tf.reduce_mean(
                  tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real,
                                                          labels=tf.ones_like(d_model_real)))
    d_loss_fake = tf.reduce_mean(
                  tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                          labels=tf.zeros_like(d_model_fake)))
    d_loss = d_loss_real + d_loss_fake

    g_loss = tf.reduce_mean(
             tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                     labels=tf.ones_like(d_model_fake)))

    return d_loss, g_loss



In [None]:
# Step 11
def model_optimizers(d_loss, g_loss, lr_D, lr_G, beta1):
    """
    Get optimization operations
    :param d_loss: Discriminator loss Tensor
    :param g_loss: Generator loss Tensor
    :param learning_rate: Learning Rate Placeholder
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :return: A tuple of (discriminator training operation, generator training operation)
    """
    # Get the trainable_variables, split into G and D parts
    t_vars = tf.trainable_variables()
    g_vars = [var for var in t_vars if var.name.startswith("generator")]
    d_vars = [var for var in t_vars if var.name.startswith("discriminator")]

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Generator update
    gen_updates = [op for op in update_ops if op.name.startswith('generator')]

    # Optimizers
    with tf.control_dependencies(gen_updates):
        d_train_opt = tf.train.AdamOptimizer(learning_rate=lr_D, beta1=beta1).minimize(d_loss, var_list=d_vars)
        g_train_opt = tf.train.AdamOptimizer(learning_rate=lr_G, beta1=beta1).minimize(g_loss, var_list=g_vars)

    return d_train_opt, g_train_opt



In [None]:
# step 12
def show_generator_output(sess, n_images, input_z, out_channel_dim, image_mode, image_path, save, show):
    """
    Show example output for the generator
    :param sess: TensorFlow session
    :param n_images: Number of Images to display
    :param input_z: Input Z Tensor
    :param out_channel_dim: The number of channels in the output image
    :param image_mode: The mode to use for images ("RGB" or "L")
    :param image_path: Path to save the image
    """
    cmap = None if image_mode == 'RGB' else 'gray'
    z_dim = input_z.get_shape().as_list()[-1]
    example_z = np.random.uniform(-1, 1, size=[n_images, z_dim])

    samples = sess.run(
        generator(input_z, out_channel_dim, False),
        feed_dict={input_z: example_z})

    images_grid = images_square_grid(samples, image_mode)

    if save == True:
        # Create the directory if it doesn't exist
        os.makedirs(os.path.dirname(image_path), exist_ok=True)
        # Save image
        images_grid.save(image_path, 'JPEG')

    if show == True:
        plt.imshow(images_grid, cmap=cmap)
        plt.show()

In [None]:
# Step 13
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True



In [None]:
 # Step 14
def train(epoch_count, batch_size, z_dim, learning_rate_D, learning_rate_G, beta1, get_batches, data_shape, data_image_mode, alpha):
    """
    Train the GAN
    :param epoch_count: Number of epochs
    :param batch_size: Batch Size
    :param z_dim: Z dimension
    :param learning_rate: Learning Rate
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :param get_batches: Function to get batches
    :param data_shape: Shape of the data
    :param data_image_mode: The image mode to use for images ("RGB" or "L")
    """
    # Create our input placeholders
    input_images, input_z, lr_G, lr_D = model_inputs(data_shape[1:], z_dim)

    # Losses
    d_loss, g_loss = model_loss(input_images, input_z, data_shape[3], alpha)

    # Optimizers
    d_opt, g_opt = model_optimizers(d_loss, g_loss, lr_D, lr_G, beta1)

    g_losses = []
    d_losses = []

    version = "firstTrain"
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        # Saver
        saver = tf.train.Saver()

        num_epoch = 0

        if from_checkpoint == True:
            saver.restore(sess, "./models/model.ckpt-300")
            image_path = "new_train/new_gen_image.jpg"
            show_generator_output(sess, 1, input_z, data_shape[3], data_image_mode, image_path, True, True)

        else:
            for epoch_i in range(epoch_count):
                num_epoch += 1
                if num_epoch % 5 == 0:
                    save_path = saver.save(sess, "./models/model.ckpt")
                    print("Model saved")

                # saves model every 50 epochs
                if epoch_i > 50 and epoch_i % 50 == 0:
                    save_path = saver.save(sess, "./models/model.ckpt", global_step = epoch_i, write_meta_graph=False)
                for batch_images in get_batches(batch_size):
                    # Random noise
                    batch_z = np.random.uniform(-1, 1, size=(batch_size, z_dim))
                    # Run optimizers
                    _ = sess.run(d_opt, feed_dict={input_images: batch_images, input_z: batch_z, lr_D: learning_rate_D})
                    _ = sess.run(g_opt, feed_dict={input_images: batch_images, input_z: batch_z, lr_G: learning_rate_G})

                # will calculate losses and generate an image for each epoch

                train_loss_d = d_loss.eval({input_z: batch_z, input_images: batch_images})
                train_loss_g = g_loss.eval({input_z: batch_z})
                g_losses.append(train_loss_g)
                d_losses.append(train_loss_d)
                # Save it
                image_name = str(epoch_i) + ".jpg"
                image_path = "./images/" + image_name
                print("Epoch {}/{}...".format(epoch_i+1, epochs),
                      "Discriminator Loss: {:.4f}...".format(train_loss_d),
                      "Generator Loss: {:.4f}".format(train_loss_g))
                show_generator_output(sess, 9, input_z, data_shape[3], data_image_mode, image_path, True, True)

    return d_losses, g_losses


In [None]:

# Step 15
# Size input image for discriminator
real_size = (128,128,3)

# Size of latent vector to generator
z_dim = 100
learning_rate_D =  .00005 # Thanks to Alexia Jolicoeur Martineau https://ajolicoeur.wordpress.com/cats/
learning_rate_G = 2e-4 # Thanks to Alexia Jolicoeur Martineau https://ajolicoeur.wordpress.com/cats/
batch_size = 32
epochs = 1200
alpha = 0.5
beta1 = 0.5


In [None]:
# Step 16
dataset = Dataset(resized_data_filenames)

In [None]:
# Step 17
dataset.shape

In [None]:
# Step 18
with tf.Graph().as_default():
    d_losses, g_losses = train(epochs, batch_size, z_dim, learning_rate_D, learning_rate_G, beta1, dataset.get_batches, dataset.shape, dataset.image_mode, alpha)



In [None]:
# Step 19
fig, ax = plt.subplots()
d_losses = np.array(d_losses)
g_losses = np.array(g_losses)
plt.plot(d_losses, label='Discriminator', alpha=0.5)
plt.plot(g_losses, label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()

In [None]:
# Step 20
from_checkpoint = True

In [None]:
# Step 21
with tf.Graph().as_default():
    train(epochs, batch_size, z_dim, learning_rate_D, learning_rate_G, beta1, dataset.get_batches,
          dataset.shape, dataset.image_mode, alpha)

In [None]:
# Define the generate_video function
import cv2
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow.keras.models import Model

# Disable eager execution to work with TensorFlow 1.x style code
tf.disable_eager_execution()

def generate_video(sess, generator_tensor, input_z, latent_dim, video_output_path, num_frames=100, fps=30):
    """
    Generate a video using the trained generator model.

    Args:
        sess: TensorFlow session.
        generator_tensor: Output tensor of the generator.
        input_z: Input placeholder tensor for the generator.
        latent_dim: Dimension of the latent space for noise generation.
        video_output_path: Path to save the generated video.
        num_frames: Number of frames to generate for the video (default: 100).
        fps: Frames per second for the generated video (default: 30).
    """
    # Generate random latent noise vectors
    latent_vectors = np.random.normal(0, 1, size=(num_frames, latent_dim))

    # Generate frames
    frames = []
    for latent_vector in latent_vectors:
        # Generate image from latent vector using the model
        generated_image = sess.run(generator_tensor, feed_dict={input_z: latent_vector.reshape(1, latent_dim)})[0]
        # Scale the pixel values to 0-255
        generated_image = ((generated_image + 1) * 127.5).astype(np.uint8)
        frames.append(generated_image)

    # Get height, width, and channels of the first frame
    height, width, channels = frames[0].shape

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
    video_writer = cv2.VideoWriter(video_output_path, fourcc, fps, (width, height))

    # Write frames to video
    for frame in frames:
        video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))  # Convert to BGR for OpenCV

    # Release video writer
    video_writer.release()
    print(f'Video saved to {video_output_path}')

# Step 20
from_checkpoint = True

# Step 21
with tf.Graph().as_default():
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Restore the trained generator model
        saver = tf.train.import_meta_graph('./models/model.ckpt.meta')
        saver.restore(sess, './models/model.ckpt')

        # Get the generator tensor by name
        generator_tensor_name = 'generator_499/out:0'  # Update this line with the correct tensor name
        generator_tensor = sess.graph.get_tensor_by_name(generator_tensor_name)

        # Define the input placeholder tensor by name
        input_z = sess.graph.get_tensor_by_name('input_z:0')

        # Generate the video using the trained generator model
        video_output_path = 'generated_video.mp4'
        num_frames = 100  # Number of frames to generate
        fps = 60  # Frames per second for the video
        latent_dim = 100  # Dimension of the latent space for noise generation

        generate_video(sess, generator_tensor, input_z, latent_dim, video_output_path, num_frames, fps)


In [None]:
import tensorflow.compat.v1 as tf
import numpy as np
import cv2

tf.disable_eager_execution()

def load_generator_model(sess, checkpoint_path):
    saver = tf.train.import_meta_graph(checkpoint_path + '.meta')
    saver.restore(sess, checkpoint_path)

    input_image = sess.graph.get_tensor_by_name('input_z:0')
    generated_image = sess.graph.get_tensor_by_name('generator_499/out:0')

    return input_image, generated_image


In [None]:
def preprocess_image(image_path, image_size):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (image_size, image_size))
    image = (image / 127.5) - 1.0  # Normalize to [-1, 1]
    return image[np.newaxis, ...]  # Add batch dimension

In [None]:
def generate_image(sess, input_image_tensor, generated_image_tensor, input_image):
    generated_image = sess.run(generated_image_tensor, feed_dict={input_image_tensor: input_image})
    generated_image = (generated_image[0] + 1) * 127.5  # Rescale to [0, 255]
    generated_image = generated_image.astype(np.uint8)
    return generated_image

In [None]:
def save_image(image, output_path):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(output_path, image)

In [None]:
#  GENERATE AN IMAGE FROM AN INPUT GIVEN (Not using it, just to see image outputs)
def main(input_image_path, output_image_path, checkpoint_path, image_size, latent_dim):
    with tf.Graph().as_default():
        with tf.Session() as sess:
            input_latent_tensor, generated_image_tensor = load_generator_model(sess, checkpoint_path)

            # Generate random latent vector instead of processing an input image
            input_latent = np.random.normal(0, 1, size=(1, latent_dim))
            generated_image = generate_image(sess, input_latent_tensor, generated_image_tensor, input_latent)

            save_image(generated_image, output_image_path)
            print(f"Generated image saved to {output_image_path}")

# Parameters
input_image_path = '/content/drive/MyDrive/photos2/my image.jpeg'  # Path to the input image
output_image_path = '/content/drive/MyDrive/photos2/my image2.jpeg'  # Path to save the generated image
checkpoint_path = './models/model.ckpt'  # Path to the model checkpoint
image_size = 128  # Example image size (adjust as needed)
latent_dim = 100  # Dimension of the latent space for noise generation

main(input_image_path, output_image_path, checkpoint_path, image_size, latent_dim)

In [None]:
# GENERATE VIDEO FROM INPUT IMAGE GENERTED GANS 

import cv2
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow.keras.models import Model

# Disable eager execution to work with TensorFlow 1.x style code
tf.disable_eager_execution()

def preprocess_image(image_path, image_size):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (image_size, image_size))
    image = (image / 127.5) - 1.0  # Normalize to [-1, 1]
    return image[np.newaxis, ...]  # Add batch dimension

def generate_video(sess, generator_tensor, input_z, latent_dim, video_output_path, input_image, num_frames=100, fps=30):
    """
    Generate a video using the trained generator model.

    Args:
        sess: TensorFlow session.
        generator_tensor: Output tensor of the generator.
        input_z: Input placeholder tensor for the generator.
        latent_dim: Dimension of the latent space for noise generation.
        video_output_path: Path to save the generated video.
        input_image: Preprocessed input image.
        num_frames: Number of frames to generate for the video (default: 100).
        fps: Frames per second for the generated video (default: 30).
    """
    # Generate random latent noise vectors
    latent_vectors = np.random.normal(0, 1, size=(num_frames, latent_dim))

    # Combine latent vectors with the input image in some way (this is an example and might need adjustment)
    input_image_flat = input_image.flatten()
    if len(input_image_flat) >= latent_dim:
        input_image_flat = input_image_flat[:latent_dim]
    else:
        input_image_flat = np.pad(input_image_flat, (0, latent_dim - len(input_image_flat)), 'constant')

    latent_vectors = latent_vectors + input_image_flat

    # Generate frames
    frames = []
    for latent_vector in latent_vectors:
        # Generate image from latent vector using the model
        generated_image = sess.run(generator_tensor, feed_dict={input_z: latent_vector.reshape(1, latent_dim)})[0]
        # Scale the pixel values to 0-255
        generated_image = ((generated_image + 1) * 127.5).astype(np.uint8)
        frames.append(generated_image)

    # Get height, width, and channels of the first frame
    height, width, channels = frames[0].shape

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
    video_writer = cv2.VideoWriter(video_output_path, fourcc, fps, (width, height))

    # Write frames to video
    for frame in frames:
        video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))  # Convert to BGR for OpenCV

    # Release video writer
    video_writer.release()
    print(f'Video saved to {video_output_path}')


# Step 21
with tf.Graph().as_default():
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Restore the trained generator model
        saver = tf.train.import_meta_graph('./models/model.ckpt.meta')
        saver.restore(sess, './models/model.ckpt')

        # Get the generator tensor by name
        generator_tensor_name = 'generator_499/out:0'  # Update this line with the correct tensor name
        generator_tensor = sess.graph.get_tensor_by_name(generator_tensor_name)

        # Define the input placeholder tensor by name
        input_z = sess.graph.get_tensor_by_name('input_z:0')

        # Preprocess the input image
        input_image_path = '/content/drive/MyDrive/photos2/image.jpeg'
        image_size = 128  # Example image size (adjust as needed)
        input_image = preprocess_image(input_image_path, image_size)

        # Generate the video using the trained generator model
        video_output_path = 'generated_video.mp4'
        num_frames = 216  # Number of frames to generate
        fps = 3  # Frames per second for the video
        latent_dim = 100  # Dimension of the latent space for noise generation

        generate_video(sess, generator_tensor, input_z, latent_dim, video_output_path, input_image, num_frames, fps)


In [None]:
#Processing midi file
from collections import defaultdict
from mido import MidiFile
from pydub import AudioSegment
from pydub.generators import Sine

#Segment audio files (retain a specific loop)
from pydub import AudioSegment
import tempfile
import os

#Playing generated audios
import IPython.display as ipd

#Combine audios
import soundfile as sf
import numpy as np
import resampy

In [None]:
midi_file_path = "/content/drive/MyDrive/audios/Never-Gonna-Give-You-Up-3.mid"

In [None]:
def ticks_to_ms(ticks, tempo, ticks_per_beat):
    """Convert MIDI ticks to milliseconds."""
    beats_per_second = tempo / 60.0
    seconds_per_beat = 1 / beats_per_second
    seconds_per_tick = seconds_per_beat / ticks_per_beat
    return ticks * seconds_per_tick * 1000

def note_to_freq(note):
    """Convert MIDI note number to frequency."""
    return 440.0 * (2.0 ** ((note - 69) / 12.0))

def process_midi_file(midi_file_path, tempo=100):
    """
    Process a MIDI file and generate a melody in WAV format.

    This function reads a MIDI file, converts the note events into audio signals,
    and generates a WAV file that plays the melody. It uses sine wave generators
    to synthesize the audio signals for each note.

    Parameters:
        midi_file_path (str): Path to the input MIDI file.
        tempo (int): Tempo of the generated audio in beats per minute (BPM). Default is 100 BPM.

    Notes:
        - The function assumes that all note events in the MIDI file are to be converted
          to sine wave audio signals.
        - The generated WAV file will have the same duration as the input MIDI file.
        - A maximum of 1000 iterations per track is used to prevent infinite loops.
        - The function overlays the generated audio signals on a silent audio segment
          of the same length as the MIDI file.
    """
    # Load the MIDI file
    mid = MidiFile(midi_file_path)
    print(f"Loaded MIDI file '{midi_file_path}' with length {mid.length:.2f} seconds")

    # Create a silent audio segment of the same length as the MIDI file
    output = AudioSegment.silent(duration=mid.length * 1000.0)
    print(f"Created silent audio segment of duration {mid.length * 1000.0:.2f} ms")

    max_iterations = 1000  # Maximum number of iterations to prevent infinite loops
    
    print("Processing track")
    
    for i, track in enumerate(mid.tracks):
        current_pos = 0.0
        current_notes = defaultdict(dict)
        iterations = 0

        for msg in track:
            if iterations >= max_iterations:
                break

            current_pos += ticks_to_ms(msg.time, tempo, mid.ticks_per_beat)

            if msg.type == 'note_on' and msg.velocity > 0:
                current_notes[msg.channel][msg.note] = (current_pos, msg)

            if msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                start_pos, start_msg = current_notes[msg.channel].pop(msg.note, (None, None))
                if start_pos is None:
                    continue

                duration = current_pos - start_pos

                if duration > 50:  # Ensure duration is valid
                    signal_generator = Sine(note_to_freq(msg.note))
                    rendered = signal_generator.to_audio_segment(duration=int(duration - 50), volume=-20).fade_out(100).fade_in(30)
                    output = output.overlay(rendered, start_pos)

            iterations += 1

    print(f"Processing complete")
    
    # Export the truncated audio to a temporary file
    temp_output_path = tempfile.NamedTemporaryFile(suffix='.wav', delete=False).name
    output.export(temp_output_path, format='wav')
    
    print(f"Wav audio file saved at: {temp_output_path}")
    
    return temp_output_path

In [None]:
wav_audio_path = process_midi_file(midi_file_path, tempo=100)

In [None]:
#Play the wav audio to test it and determine where to loop it
ipd.display(ipd.Audio(wav_audio_path))

In [None]:
def generate_wav_output(midi_file_path, tempo=100):
    """
    Process a MIDI file and generate a melody in WAV format.

    Parameters:
        midi_file_path (str): Path to the input MIDI file.
        tempo (int): Tempo of the generated audio in beats per minute (BPM). Default is 100 BPM.

    Returns:
        AudioSegment: The generated audio segment.
    """
    # Load the MIDI file
    mid = MidiFile(midi_file_path)
    print(f"Loaded MIDI file '{midi_file_path}' with length {mid.length:.2f} seconds")

    # Create a silent audio segment of the same length as the MIDI file
    output = AudioSegment.silent(duration=mid.length * 1000.0)
    print(f"Created silent audio segment of duration {mid.length * 1000.0:.2f} ms")

    max_iterations = 1000  # Maximum number of iterations to prevent infinite loops
    
    print("Processing track")
    
    for i, track in enumerate(mid.tracks):
        current_pos = 0.0
        current_notes = defaultdict(dict)
        iterations = 0

        for msg in track:
            if iterations >= max_iterations:
                break

            current_pos += ticks_to_ms(msg.time, tempo, mid.ticks_per_beat)

            if msg.type == 'note_on' and msg.velocity > 0:
                current_notes[msg.channel][msg.note] = (current_pos, msg)

            if msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                start_pos, start_msg = current_notes[msg.channel].pop(msg.note, (None, None))
                if start_pos is None:
                    continue

                duration = current_pos - start_pos

                if duration > 50:  # Ensure duration is valid
                    signal_generator = Sine(note_to_freq(msg.note))
                    rendered = signal_generator.to_audio_segment(duration=int(duration - 50), volume=-20).fade_out(100).fade_in(30)
                    output = output.overlay(rendered, start_pos)

            iterations += 1

    print(f"Processing complete")
    
    return output

In [None]:
def save_wav_output(midi_file_path, output_file_path):
    """
    Process a MIDI file and save the generated audio to a local file.

    This function calls the process_midi_file function to convert the MIDI file
    into an audio segment and then saves the audio segment as a WAV file at the
    specified output path.

    Parameters:
        midi_file_path (str): Path to the input MIDI file.
        output_file_path (str): Path to save the output WAV file.

    Example:
        save_to_local("collab.mid", "output.wav")
    """
    # Call the process_midi_file function
    # Call the function and capture the second returned value as wav_output
    wav_output = generate_wav_output(midi_file_path)

    
    # Export the returned AudioSegment object to a file
    wav_output.export(output_file_path, format="wav")
    print(f"Wav Audio file saved as '{output_file_path}'")

In [None]:
#Save the output file
output_file_path = "first_melody_output.wav"
save_wav_output(midi_file_path, output_file_path)

In [None]:
def truncate_wav(start_time_sec, end_time_sec):
    """
    Truncate a WAV file from start_time_sec to end_time_sec and return the truncated audio.

    Parameters:
        start_time_sec (float): Start time in seconds.
        end_time_sec (float): End time in seconds.

    Returns:
        AudioSegment: The truncated audio segment.
    """
    
    # Process the MIDI file to generate the WAV audio segment
    wav_output = generate_wav_output(midi_file_path)

    # Calculate start and end times in milliseconds
    start_time_ms = start_time_sec * 1000
    end_time_ms = end_time_sec * 1000

    # Extract the segment
    truncated_audio = wav_output[start_time_ms:end_time_ms]

    print(f"Truncation complete. Segment from {start_time_sec} to {end_time_sec} seconds.")

    # Export the truncated audio to a temporary file
    temp_output_path = tempfile.NamedTemporaryFile(suffix='.wav', delete=False).name
    truncated_audio.export(temp_output_path, format='wav')
    
    print(f"Truncated audio file saved at: {temp_output_path}")
    
    return temp_output_path

In [None]:
# Usage of truncate_wav function
start_time_sec = 7
end_time_sec = 31
truncated_audio_path = truncate_wav(start_time_sec, end_time_sec)

In [None]:
#Play the truncated audio to test it
ipd.display(ipd.Audio(truncated_audio_path))

In [None]:
def generate_truncated_wav(start_time_sec, end_time_sec):
    """
    Truncate a WAV file from start_time_sec to end_time_sec and return the truncated audio.

    Parameters:
        start_time_sec (float): Start time in seconds.
        end_time_sec (float): End time in seconds.

    Returns:
        AudioSegment: The truncated audio segment.
    """
    
    # Process the MIDI file to generate the WAV audio segment
    wav_output = generate_wav_output(midi_file_path)

    # Calculate start and end times in milliseconds
    start_time_ms = start_time_sec * 1000
    end_time_ms = end_time_sec * 1000

    # Extract the segment
    truncated_audio = wav_output[start_time_ms:end_time_ms]
    return truncated_audio

In [None]:
def save_truncated(output_file_path):
    """
    Truncate a WAV file and save the truncated audio to a local file.

    This function calls the truncate_wav function to truncate the WAV file and then
    saves the truncated audio segment as a WAV file at the specified output path.

    Parameters:
        output_wav_path (str): Path to save the truncated output WAV file.

    Example:
        save_truncated(output_file_path)
    """
    # Call the process_midi_file function
    truncated_audio = generate_truncated_wav(start_time_sec, end_time_sec)
    
    # Export the returned AudioSegment object to a file
    truncated_audio.export(output_file_path, format="wav")
    print(f"Truncated audio file saved as '{output_file_path}'")

In [None]:
#Usage of the save_truncated function
output_file_path = "truncated_melody.wav"

save_truncated(output_file_path)

In [None]:
def loop_truncated_wav(input_wav_path, loop_count):
    """
    Loop a wav file a specified number of times to make one longer file.

    :param input_wav_path: Path to the input wav file
    :param loop_count: Number of times to loop the wav file
    :return: Path to the looped audio file
    """
    # Load the audio file
    audio = AudioSegment.from_wav(input_wav_path)

    # Loop the audio file
    looped_audio = audio * loop_count
    
    # Export the looped audio to a temporary file
    temp_output_path = tempfile.NamedTemporaryFile(suffix='.wav', delete=False).name
    looped_audio.export(temp_output_path, format='wav')
    
    return temp_output_path

In [None]:
# Example usage
input_wav_path = 'truncated_melody.wav'
loop_count = 7  # Number of times to loop the audio

looped_audio_path = loop_truncated_wav(input_wav_path, loop_count)
print(f"Looped audio file saved at: {looped_audio_path}")

In [None]:
# Play the looped audio using IPython display
ipd.display(ipd.Audio(looped_audio_path))

In [None]:
def generate_save_looped_wav(input_wav_path, loop_count, output_file_path):
    """
    Loop a wav file a specified number of times to make one longer file and save.

    :param input_wav_path: Path to the input wav file
    :param loop_count: Number of times to loop the wav file
    :return: Path to the looped audio file
    """
    # Load the audio file
    audio = AudioSegment.from_wav(input_wav_path)

    # Loop the audio file
    looped_audio = audio * loop_count
    
    # Export the looped audio to a file
    looped_audio.export(output_file_path, format="wav")
    print(f"Looped audio file saved as '{output_file_path}'")

In [None]:
output_file_path = "looped_audio.wav"
input_wav_path = "/content/truncated_melody.wav"
generate_save_looped_wav(input_wav_path, 3, output_file_path)

In [None]:
def read_wav(file_path):
    """
    Reads a WAV file and converts stereo signals to mono.

    Parameters:
        file_path (str): The path to the WAV file.

    Returns:
        tuple: A tuple containing the sample rate and the audio signal as numpy array.
    """
    audio_signal, sample_rate = sf.read(file_path, dtype='int16')
    if len(audio_signal.shape) == 2:  # if stereo, convert to mono
        audio_signal = audio_signal.mean(axis=1).astype(np.int16)
    return sample_rate, audio_signal
    print(f"Completed reading {file_path} and converted to mono signal.")
    
def write_wav(file_path, sample_rate, audio_signal):
    """
    Writes audio data to a WAV file.

    Parameters:
        file_path (str): The path to save the WAV file.
        sample_rate (int): The sample rate of the audio.
        audio_signal (numpy.ndarray): The audio signal as a numpy array.
    """
    sf.write(file_path, audio_signal, sample_rate, subtype='PCM_16')
    
    print(f"Completed writing audio data to a WAV file.")

def resample_audio(audio_signal, original_sr, target_sr):
    """
    Resamples the audio signal to the target sample rate.

    Parameters:
        audio_signal (numpy.ndarray): The audio signal to resample.
        original_sr (int): The original sample rate.
        target_sr (int): The target sample rate.

    Returns:
        numpy.ndarray: The resampled audio signal.
    """
    return resampy.resample(audio_signal.astype(np.float32), original_sr, target_sr).astype(np.int16)
    
    print(f"Completed resampling the audio signal to the target sample rate.")

def blend_wav_files(file_paths, output_path, weights=None):
    """
    Blends multiple WAV files into one by averaging them with optional weights.

    Parameters:
        file_paths (list): List of paths to input WAV files.
        output_path (str): The path to save the blended WAV file.
        weights (list, optional): List of blending weights for each input file. Default is None.

    Raises:
        ValueError: If input WAV files have different sample rates.

    Note:
        The length of the output signal will be the length of the longest input signal.
        If weights are not provided, equal weights are used for blending.
    """
    sample_rate, audio_signals = [], []

    for file_path in file_paths:
        sr, audio = read_wav(file_path)
        sample_rate.append(sr)
        audio_signals.append(audio)

    # Resample all audio signals to the sample rate of the first file
    target_sr = sample_rate[0]
    audio_signals = [resample_audio(audio, sr, target_sr) if sr != target_sr else audio for audio, sr in zip(audio_signals, sample_rate)]

    max_length = max(len(audio) for audio in audio_signals)
    blended_signal = np.zeros(max_length)

    if weights is None:
        weights = [1.0 / len(audio_signals)] * len(audio_signals)

    for audio, weight in zip(audio_signals, weights):
        padded_audio = np.zeros(max_length)
        padded_audio[:len(audio)] = audio
        blended_signal += padded_audio * weight

    blended_signal = np.clip(blended_signal, -32768, 32767).astype(np.int16)

    write_wav(output_path, target_sr, blended_signal)
    print(f"Completed blending multiple WAV files into one. File saved as {output_path}")

In [None]:
# melody blender 
file_paths = ['/content/drive/MyDrive/audios/audio1.wav', '/content/drive/MyDrive/audios/audio2.wav', '/content/drive/MyDrive/audios/audio3.wav', '/content/looped_audio.wav']
output_path = "melody_for_video.wav"
blend_wav_files(file_paths, output_path)