In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sdv.tabular import CopulaGAN
from sdv.tabular import TVAE
from sdv.tabular import GaussianCopula
from sdv.tabular import CTGAN
from sdv.evaluation import evaluate

from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import train_test_split

# Function for reshaping data after power transformation
def reshape_after_pt(df_pt, df): 
    df_pt = pd.DataFrame(df_pt, columns=
                             df.columns)
    df_pt.index = df.index
    return df_pt

In [2]:
df = pd.read_csv('allregimes.csv')
df = df[['N','xm','nr','ni','p1','p2','p3']]
df.loc[:, "N"] = np.log2(df['N']).apply(int)
df.sample(5)

Unnamed: 0,N,xm,nr,ni,p1,p2,p3
135,8,0.64,1.8,0.023,2.350804,-0.026391,-0.032737
104,8,0.53,1.3,0.023,1.085613,-0.006777,-0.00885
13,10,0.336,2.0,0.09,3.198936,-0.102964,-0.119347
266,6,0.73,1.68,0.023,1.036369,-0.001042,-0.006648
47,7,0.64,1.3,0.023,1.027853,-0.000807,-0.003855


In [3]:
features = ['N','xm','nr','ni']
response = ['p1','p2','p3']
X = df[features]
y = df[response]

# train/test spliting
# Call train_test_split on X, y. Make the test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2)

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
df_train = X_train.join(y_train)

In [6]:
import os
os.chdir('C:\\Users\\kurt_\\Google Drive\\dell_github\\T-Matrix-Titan\\')

In [12]:
test=[]
best_score = 0.7
for epochs in np.arange(500, 1600, 50):
        for batch_size in range(40,600,50):
                model = CTGAN(
                        epochs=epochs,
                        batch_size=batch_size,
                        field_transformers = {'N':'integer'}
                        )
                model.fit(df_train)
                # let's do a few cross validation for sample evaluation score
                scores = []
                for i in range(20):
                        new_X = model.sample(num_rows=200)
                        scores.append(evaluate(new_X, df_train,metrics=[ 'KSTest']))
                score = np.array(scores).mean()

                if score > best_score:
                        best_score = score
                        best_model = model
                        test.append((epochs, batch_size, score))
                        print(epochs,batch_size,score)

500 40 0.7832883597883598
500 390 0.7847658730158731
550 40 0.8096494708994708
600 40 0.8183399470899472
850 40 0.8315621693121692
1050 90 0.8338637566137566


In [23]:
# test_df = pd.DataFrame(test, columns=['epochs','batch_size','score'])
# test_df.sort_values(by='score').tail(55)

In [16]:
model = CopulaGAN(
        epochs=1730,
        batch_size=320,
        generator_dim=(256, 256, 256),
        discriminator_dim=(256, 256, 256),
        field_transformers = {'N':'integer'}
        )
model.fit(X)

new_X = model.sample(num_rows=200)
evaluate(new_X, X)

0.618071572482428

In [22]:
new_X = model.sample(num_rows=400)
evaluate(new_X, X)

0.6297421456751627

In [27]:
old_test = [(1480, 260, 0.734149255511467),
    (1500, 120, 0.7343316449601694)]

In [29]:

test=[]
best_score = 0.7
for ep_bs in old_test:
    epochs = ep_bs[0]
    batch_size = ep_bs[1]
    for l2scale in (2e-5,3e-5):
        model = TVAE(
                epochs=epochs,
                batch_size=batch_size,
                field_transformers = {'N':'integer'}
                )
        model.fit(df_train)
        new_X = model.sample(num_rows=200)
        # let's do a few cross validation for sample evaluation score
        scores = []
        for i in range(6):
                scores.append(evaluate(new_X, df_train))
        score = np.array(scores).mean()

        if score > best_score:
                best_score = score
                best_model = model
                test.append((epochs, batch_size, l2scale, score))
                print(epochs,batch_size,l2scale, score)