Tabular Data için Çalışan Koşullu GAN'ın Dinamik hale getirilmesi ve Fonksiyonelleştirilmesi

In [1]:
from __future__ import absolute_import, division

import tensorflow as tf
import tensorflow.keras as keras 

import numpy as np
from pathlib import Path
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

import PIL 
import imageio
from IPython import display

print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.debugging.set_log_device_placement(False)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
        
import matplotlib.pyplot as plt
import sys 

sys.path.insert(0, "..")

from gan.networks import Generator, Discriminator, GAN

2.5.0
Num GPUs Available:  1
1 Physical GPUs, 1 Logical GPUs


In [2]:
file_name = "10ft_3k.csv"
data_path = "../datasets/input_reals/{0}".format(file_name)
data = pd.read_csv(data_path, sep=";")

data.loc[data[data["CROSSING"] >= data["CROSSING"].median()].index, "y"] = 1
data.loc[data[data["CROSSING"] < data["CROSSING"].median()].index, "y"] = 0

df = data.copy()
df.head()

Unnamed: 0,CROSSING,FINISSHING,HEADING_ACCURACY,SHORT_PASSING,VOLLEYS,DRIBBLING,CURVE,FREE_KICK_ACCURACY,LONG_PASSING,BALL_CONTROL,y
0,75.0,48.0,56.0,67.0,56.0,67.0,73.0,70.0,69.0,69.0,1.0
1,75.0,35.0,58.0,71.0,32.0,48.0,54.0,43.0,61.0,72.0,1.0
2,59.0,68.0,72.0,61.0,64.0,63.0,57.0,54.0,57.0,66.0,0.0
3,60.0,55.0,81.0,78.0,50.0,57.0,58.0,48.0,77.0,74.0,0.0
4,63.0,47.0,51.0,64.0,59.0,66.0,71.0,69.0,60.0,64.0,1.0


In [3]:
data.y.value_counts()

1.0    1558
0.0    1442
Name: y, dtype: int64

In [4]:
# data_path = "C:/Users/kalybeai-dxlc693/Desktop/GANS/modular-conditional-gan-main/datasets/input_reals/10ft_3k.csv"
# data = pd.read_csv(data_path, sep=";")
# df = data.copy()

# # preprocess
# le = preprocessing.LabelEncoder()
# for i in ['workclass','education','marital.status','occupation','relationship','race','sex','native.country','income']:
#     df[i] = le.fit_transform(df[i].astype(str))

scaler = StandardScaler()
condition_feature = "y"
X_train = scaler.fit_transform(df.drop(condition_feature, 1))
y_train = df[condition_feature].values

  X_train = scaler.fit_transform(df.drop(condition_feature, 1))


In [5]:
# config
BUFFER_SIZE = len(X_train)
BATCH_SIZE = 64
latent_dim = 50
out_shape = X_train.shape[1]

In [6]:
# loss & optimizer
def loss_fn(labels, output):
    return keras.losses.BinaryCrossentropy(from_logits=True)(labels, output)

generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.05)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.05)

In [7]:
# koşullar:
list_condition = list(np.unique(y_train))

generated_df = pd.DataFrame(columns=data.columns[:])
X_train = np.float32(X_train)
conditional_datasets = {}

for cond in list_condition:
    indices = np.where(y_train == cond)[0]
    train_data_cond = []
    for i in range(len(y_train)):
        if i in indices:
            train_data_cond.append(X_train[i])
    train_data_cond = np.array(train_data_cond)
    print(train_data_cond.shape)
    train_dataset = tf.data.Dataset.from_tensor_slices(train_data_cond).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    conditional_datasets[cond] = train_dataset
    
    discriminator = Discriminator(out_shape=out_shape)
    generator = Generator(out_shape=out_shape, latent_dim=latent_dim)
    gan = GAN(discriminator, generator, latent_dim)
    gan.compile(discriminator_optimizer, generator_optimizer, loss_fn)
    gan.fit(conditional_datasets[cond], epochs=50)
    
    num_gen = len(indices)
    random_latent_vectors = tf.random.normal(shape=(num_gen, latent_dim))
    generated_data = generator(random_latent_vectors)
    gen = pd.DataFrame(np.array(generated_data), columns=data.columns.drop(condition_feature))
    gen[condition_feature] = cond
    generated_df = pd.concat([generated_df, gen], ignore_index=True)

(1442, 10)
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
(1558, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50


Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [8]:
gen_features = scaler.inverse_transform(generated_df.drop(condition_feature, 1))
gen_df = pd.DataFrame(gen_features, columns=data.columns.drop(condition_feature))
gen_df[condition_feature] = generated_df.loc[:, condition_feature].values
gen_df.to_csv("../datasets/output_synt/{0}".format(file_name),
              index=False, sep=",")

  gen_features = scaler.inverse_transform(generated_df.drop(condition_feature, 1))


In [9]:
gen_df

Unnamed: 0,CROSSING,FINISSHING,HEADING_ACCURACY,SHORT_PASSING,VOLLEYS,DRIBBLING,CURVE,FREE_KICK_ACCURACY,LONG_PASSING,BALL_CONTROL,y
0,45.071953,55.001816,76.615021,75.098137,67.968086,72.424416,37.295490,33.801247,69.043243,70.029503,0.0
1,64.782959,69.300926,76.641190,59.832386,68.640251,77.949600,66.850182,32.023582,45.511963,74.727722,0.0
2,39.367741,29.420408,76.667404,72.391342,29.779350,45.973503,34.209759,31.324692,60.596622,63.690086,0.0
3,67.599541,44.082733,42.045090,64.846504,59.073532,68.254135,49.485775,38.579731,46.181057,63.319191,0.0
4,52.429592,54.715538,74.404205,70.253029,63.944351,64.801903,43.826675,41.841766,65.639053,70.718948,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2995,73.815338,69.300636,73.706909,73.597076,68.612480,78.167007,72.256508,68.253586,68.323273,79.872993,1.0
2996,72.018715,69.254967,72.524658,65.171722,68.233528,73.964615,42.255505,53.729008,51.253010,71.949234,1.0
2997,71.184364,67.780426,49.132561,64.623238,48.119453,76.268845,60.292290,42.832336,62.730099,76.395126,1.0
2998,70.697861,58.600239,49.332405,78.711220,64.247307,73.941887,72.287300,68.372589,73.621162,77.351799,1.0
