Bu repo görsel üreten bir gan modelini ele alır (vanilla)
Bu repo üzerinde 2 aşamada conditional tabular data için veri üretiyor olacağız
* Koşullu yapma
* Tabular datada çalışıyor hale getirme

Şu anda 2. aşamada çalışılınıyor

In [None]:
from __future__ import absolute_import, division

import tensorflow as tf
import tensorflow.keras as keras 

import numpy as np
from pathlib import Path 

import PIL 
import imageio
from IPython import display

print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.debugging.set_log_device_placement(False)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
        
import matplotlib.pyplot as plt
import sys 

sys.path.insert(0, "..")

from gan.networks import Generator, Discriminator, GAN

#### Prepare MNIST data

In [None]:
data_path = Path("./gan/datasets/data")

In [None]:
from keras.datasets import mnist

In [None]:
(train_image, train_labels), (_, _) = mnist.load_data()

In [None]:
train_images = train_image.reshape(train_image.shape[0], 28, 28, 1).astype('float32')
train_images = (train_images - 127.5) / 127.5 

In [None]:
BUFFER_SIZE = 60000
BATCH_SIZE = 32
latent_dim = 100

In [None]:
# belirli bir koşul için veri seti hazırlama (koşul "= 5" olması)
indices = np.where(train_labels == 5)[0]
train_images_5 = []
for i in range(len(train_labels)):
    if i in indices:
        train_images_5.append(train_images[i])
        
train_images_5 = np.array(train_images_5)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices(train_images_5).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [None]:
# toplamda 60.000 görsel var
train_images.shape

In [None]:
# toplamda 5421 adet "5" görseli var
train_images_5.shape

Aşağıdaki model oluşturma adımında hangi koşul yazılırsa yazılsın model gerçek veya sahte ayrımı yapmaya çalışacaktır.
Bu yüzden önemli nokta veri setini ayrıştırıp ona vermektir örneğin sadece 5 için görsel çizdirmeye çalışalım.

#### Prepare Model

In [None]:
def loss_fn(labels, output):
    return keras.losses.BinaryCrossentropy(from_logits=True)(labels, output)

In [None]:
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.05)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.05)

In [None]:
discriminator = Discriminator()
generator = Generator()
gan = GAN(discriminator, generator, latent_dim)

In [None]:
gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)

In [None]:
gan.fit(train_dataset, epochs=10)

#### Testing Generator

In [None]:
noise = tf.random.normal([1, 100])

In [None]:
generated_image = generator(noise)

In [None]:
generator.summary()

In [None]:
pred = discriminator(generated_image)

In [None]:
pred

In [None]:
plt.imshow(generated_image[0, :, :, 0], cmap='gray')

In [None]:
# deney
# ilk deney koşula uygun n adet yani birden fazla sentetik veri üretebilir miyiz?
# ikinci deney farklı koşullar için tekrar ve tekrar model eğitmek ile tek bir model eğitmek arasında ne gibi maliyet farkı var?

In [None]:
# deney 1:

In [None]:
latent_dim = 100
num_images = 49
random_latent_vectors = tf.random.normal(shape=(num_images, latent_dim))
generated_images = generator(random_latent_vectors)

In [None]:
plt.imshow(generated_images[10])

In [None]:
# deney 2:

In [None]:
%%time
# vanilla gan tüm veriler ile eğitiliyor.
train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
discriminator = Discriminator()
generator = Generator()
gan = GAN(discriminator, generator, latent_dim)
gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
gan.fit(train_dataset, epochs=50)

32 dk 46 saniye - 60000x28x28

In [None]:
%%time
for spec in range(10):
    indices = np.where(train_labels == spec)[0]
    train_images_spec = []
    for i in range(len(train_labels)):
        if i in indices:
            train_images_spec.append(train_images[i])

    train_images_spec = np.array(train_images_spec)
    train_dataset = tf.data.Dataset.from_tensor_slices(train_images_spec).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    print("for spec:", spec)
    print("len train_images_spec:", len(train_images_spec))
    
    discriminator = Discriminator()
    generator = Generator()
    gan = GAN(discriminator, generator, latent_dim)
    gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
    gan.fit(train_dataset, epochs=50)

33 dk - her veri ayrı ayrı

In [None]:
# Aşarısı sadece modellerin çıktısını test etmek için

In [None]:
%%time
list_img = []
for spec in range(10):
    indices = np.where(train_labels == spec)[0]
    train_images_spec = []
    for i in range(len(train_labels)):
        if i in indices:
            train_images_spec.append(train_images[i])

    train_images_spec = np.array(train_images_spec)
    train_dataset = tf.data.Dataset.from_tensor_slices(train_images_spec).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    print("spec:", spec)
    print("len train_images_spec:", len(train_images_spec))
    print("ilk on:", indices[:10])
    
    discriminator = Discriminator()
    generator = Generator()
    gan = GAN(discriminator, generator, latent_dim)
    gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
    gan.fit(train_dataset, epochs=20)
    noise = tf.random.normal([1, 100])
    generated_image = generator(noise)
    list_img.append(generated_image[0, :, :, 0])
    #plt.imshow(generated_image[0, :, :, 0], cmap='gray')

In [None]:
for i in range(10):
    plt.imshow(list_img[i])
    plt.show()

Not: Şu anda ilk aşama tamamlandı. Yani görsel veriler için belirli bir koşula uygun veri üretimi vanilla gan kullanılarak gerçekleştirildi.

Birden fazla örnek oluşturma veya oluşturulan örneklerin gösterilmesi ile ilgili bir problem gözlemlenmedi.

Ayrıca modellerin tek tek eğitilmesi veya tüm verilerin aynı anda koşuldan bağımsız eğitilmesi konusunda bir zaman farkı bulunmamaktadır.

Bizim verilerimizin çok fazla boyuta sahip olduğu için bu kadar uzun sürdü eğitimler örneğin, adult veri seti 32.500x15 boyutundadır, bizim görsel veri setimiz 60.000x48x48 yani yaklaşık 96 katı boyutunda.

Burada bir sonraki aşama tabular data için eğitim gerçekleştirebilmektir.
Daha sonrasında işlemlerin fonksiyonlaştırılması ve nesneye yönelimli programlama yapısında py dosyası formatına getirilmesidir.

En sonunda da bu modelin eğitimi hızlandırılmaya çalışılabilir. Veriyi temsil eden en iyi bir örneklem seçilebilir vs. veya paralelleştirme denenebilir. Modelin içine koşul verilmeye çalışılabilir. Early stopping eklenebilir. Veri boyutuna bağlı epoch sayısı belirlenebilir. Fakat version 1 için elde çalışan bir yapının bulunması iyi olacaktır. 

Şu anda 2. aşamadayız yani görsel verisi için eğittiğimiz modeli tabular data için eğiteceğiz.

In [1]:
from __future__ import absolute_import, division

import tensorflow as tf
import tensorflow.keras as keras 

import numpy as np
from pathlib import Path 

import PIL 
import imageio
from IPython import display

print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.debugging.set_log_device_placement(False)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
        
import matplotlib.pyplot as plt
import sys 

sys.path.insert(0, "..")

from gan.networks import Generator, Discriminator, GAN

2.5.0
Num GPUs Available:  1
1 Physical GPUs, 1 Logical GPUs


In [2]:
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

In [3]:
data_path = "C:/Users/kalybeai-dxlc693/Desktop/GANS/adult.csv"

In [4]:
data = pd.read_csv(data_path)

In [5]:
df = data.copy()

In [6]:
le = preprocessing.LabelEncoder()
for i in ['workclass','education','marital.status','occupation','relationship','race','sex','native.country','income']:
    df[i] = le.fit_transform(df[i].astype(str))

In [7]:
df

Unnamed: 0,age,workclass,fnlwgt,education,education.num,marital.status,occupation,relationship,race,sex,capital.gain,capital.loss,hours.per.week,native.country,income
0,90,0,77053,11,9,6,0,1,4,0,0,4356,40,39,0
1,82,4,132870,11,9,6,4,1,4,0,0,4356,18,39,0
2,66,0,186061,15,10,6,0,4,2,0,0,4356,40,39,0
3,54,4,140359,5,4,0,7,4,4,0,0,3900,40,39,0
4,41,4,264663,15,10,5,10,3,4,0,0,3900,40,39,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,22,4,310152,15,10,4,11,1,4,1,0,0,40,39,0
32557,27,4,257302,7,12,2,13,5,4,0,0,0,38,39,0
32558,40,4,154374,11,9,2,7,0,4,1,0,0,40,39,1
32559,58,4,151910,11,9,6,1,4,4,0,0,0,40,39,0


In [8]:
scaler = StandardScaler()

X_train = scaler.fit_transform(df.drop('income', 1))
y_train = df['income'].values

  X_train = scaler.fit_transform(df.drop('income', 1))


In [9]:
X_train.shape

(32561, 14)

In [10]:
y_train.shape

(32561,)

In [11]:
y_train

array([0, 0, 0, ..., 1, 0, 0])

In [12]:
BUFFER_SIZE = len(X_train)
BATCH_SIZE = 32
latent_dim = 100

In [13]:
def loss_fn(labels, output):
    return keras.losses.BinaryCrossentropy(from_logits=True)(labels, output)

generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.05)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.05)

In [14]:
X_train = np.float32(X_train)
train_dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [None]:
%%time
# vanilla gan tüm veriler ile eğitiliyor.
discriminator = Discriminator()
generator = Generator()
gan = GAN(discriminator, generator, latent_dim)
gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)

In [None]:
gan.fit(train_dataset, epochs=50)

In [None]:
noise = tf.random.normal([1, 100])

In [None]:
generated_data = generator(noise)

In [None]:
np.array(generated_data)[0]

In [None]:
# önce train içindeki kısım incelenecek
# sonra koşula uygun yapılmaya çalışılacaktır
# class sayısı dinamik yapılacak

In [43]:
generated_df = pd.DataFrame(columns=data.columns[:])
X_train = np.float32(X_train)
conditional_datasets = []
list_condition = [0, 1]
for cond in list_condition:
    indices = np.where(y_train == cond)[0]
    train_data_cond = []
    for i in range(len(y_train)):
        if i in indices:
            train_data_cond.append(X_train[i])
    train_data_cond = np.array(train_data_cond)
    train_dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    conditional_datasets.append(train_dataset)
    
    discriminator = Discriminator()
    generator = Generator()
    gan = GAN(discriminator, generator, latent_dim)
    gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
    gan.fit(conditional_datasets[cond], epochs=50)
    
    num_gen = len(indices)
    random_latent_vectors = tf.random.normal(shape=(num_gen, latent_dim))
    generated_data = generator(random_latent_vectors)
    gen = pd.DataFrame(np.array(generated_data), columns=data.columns[:-1])
    gen[data.columns[-1]] = cond
    generated_df = pd.concat([generated_df, gen], ignore_index=True)
    

Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [44]:
generated_df

Unnamed: 0,age,workclass,fnlwgt,education,education.num,marital.status,occupation,relationship,race,sex,capital.gain,capital.loss,hours.per.week,native.country,income
0,0.953050,0.146762,0.872278,-0.052673,-0.246752,1.000000,0.968416,-0.994053,0.939371,-1.000000,-0.172806,-0.460047,-0.271497,0.011720,0
1,0.527140,-0.152709,0.998394,-0.887488,-0.985910,-1.000000,-0.849285,-0.993623,-0.886467,-1.000000,0.092665,0.163035,-0.495563,-0.786626,0
2,-0.997269,0.290166,1.000000,-0.999682,0.996131,1.000000,1.000000,-1.000000,-0.999989,-1.000000,0.181234,0.849084,-0.924706,-0.692538,0
3,0.966846,0.158713,-0.069453,-0.042285,0.152899,1.000000,0.998999,-0.992939,0.967859,-1.000000,-0.256531,-0.589193,-0.297412,0.379283,0
4,1.000000,-0.335150,0.948862,-0.770063,-0.971274,-1.000000,-0.999736,-0.967104,-0.648454,-1.000000,0.137781,0.089266,-0.679941,-0.676600,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,-0.008087,0.079040,-0.697490,0.182821,-0.416689,-0.388530,-0.031631,-0.901579,0.387564,0.706460,-0.133491,-0.212863,-0.020339,0.283989,1
32557,-0.755999,0.087710,-0.939204,0.187991,-0.419922,-0.393651,0.082476,-0.894353,0.389686,0.704007,-0.144048,-0.217244,-0.075433,0.288187,1
32558,-0.334042,0.083338,-0.339167,0.187430,-0.411940,-0.397491,-0.299551,-0.899506,0.385305,0.702436,-0.132318,-0.215476,0.880396,0.287024,1
32559,-0.904473,0.999988,-0.831591,0.008513,-0.752403,-0.569980,-0.998397,0.108146,0.390841,0.733607,-0.161410,-0.292565,-0.467243,0.307222,1


In [45]:
generated_df.to_csv("C:/Users/kalybeai-dxlc693/Desktop/GANS/modular-conditional-gan-main/datasets/output_synt/adult.csv",
              index=False, sep=",")

In [38]:
gen_features = scaler.inverse_transform(generated_df.drop('income', 1))

  gen_features = scaler.inverse_transform(generated_df.drop('income', 1))


In [39]:
gen_df = pd.DataFrame(gen_features, columns=data.columns[:-1])

In [40]:
gen_df[data.columns[-1]] = generated_df.iloc[:, -1].values

In [42]:
gen_df.to_csv("C:/Users/kalybeai-dxlc693/Desktop/GANS/modular-conditional-gan-main/datasets/output_synt/adult.csv",
              index=False, sep=",")

In [None]:
conditional_datasets[0]
discriminator = Discriminator()
generator = Generator()
gan = GAN(discriminator, generator, latent_dim)
gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
gan.fit(conditional_datasets[0], epochs=10)
num_gen = list_count[cond]
random_latent_vectors = tf.random.normal(shape=(num_gen, latent_dim))
generated_data = generator(random_latent_vectors)
gen0 = pd.DataFrame(np.array(generated_data), columns=data.columns[:-1])
gen0[data.columns[-1]] = cond

In [None]:
conditional_datasets[0]
discriminator = Discriminator()
generator = Generator()
gan = GAN(discriminator, generator, latent_dim)
gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
gan.fit(conditional_datasets[1], epochs=10)
num_gen = list_count[cond]
random_latent_vectors = tf.random.normal(shape=(num_gen, latent_dim))
generated_data = generator(random_latent_vectors)
gen1 = pd.DataFrame(np.array(generated_data), columns=data.columns[:-1])
gen1[data.columns[-1]] = cond

In [None]:
num_gen = list_count[cond]
random_latent_vectors = tf.random.normal(shape=(num_gen, latent_dim))
generated_data = generator(random_latent_vectors)
gen0 = pd.DataFrame(np.array(generated_data), columns=data.columns[:-1])
gen0[data.columns[-1]] = cond

In [None]:
gen0 = pd.DataFrame(np.array(generated_data), columns=data.columns[:-1])
gen0[data.columns[-1]] = 0

In [None]:
data.columns[-1]

In [None]:
data.columns[:-1]