In [None]:
import os

from copulas.univariate import UniformUnivariate, GaussianKDE
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from sdv.tabular import GaussianCopula
from sdmetrics.reports.single_table import QualityReport
from sdmetrics.reports.utils import get_column_plot
import seaborn as sns
sns.set()

In [None]:
PATH_RAW_DATA = os.path.join(os.pardir, 'data', 'raw')
PATH_SYN_DATA = os.path.join(os.pardir, 'data', 'processed')
data = pd.read_excel(os.path.join(PATH_RAW_DATA, 'pDeltaT.xlsx'))
clean_data = data.drop(columns='organization')
clean_data.loc[:, 'pDeltaT * 100 [°C]'] = clean_data['pDeltaT [°C]'].values * 100

In [None]:
clean_data

In [None]:
table_metadata = {'fields': {'d [mm]': {'type': 'numerical', 'subtype': 'float'},
                             'f [GHz]': {'type': 'numerical', 'subtype': 'float'},
                             'pPDn [W/m2]': {'type': 'numerical', 'subtype': 'float'},
                             'pPDtot [W/m2]': {'type': 'numerical', 'subtype': 'float'},
                             'psPDn_1 [W/m2]': {'type': 'numerical', 'subtype': 'float'},
                             'psPDtot_1 [W/m2]': {'type': 'numerical', 'subtype': 'float'},
                             'psPDn_4 [W/m2]': {'type': 'numerical', 'subtype': 'float'},
                             'psPDtot_4 [W/m2]': {'type': 'numerical', 'subtype': 'float'},
                             'pDeltaT * 100 [°C]': {'type': 'numerical', 'subtype': 'float'}},
                  'constraints': []}
field_distributions = {'d [mm]': UniformUnivariate,
                       'f [GHz]': UniformUnivariate,
                       'pPDn [W/m2]': GaussianKDE,
                       'pPDtot [W/m2]': GaussianKDE,
                       'psPDn_1 [W/m2]': GaussianKDE,
                       'psPDtot_1 [W/m2]': GaussianKDE,
                       'psPDn_4 [W/m2]': GaussianKDE,
                       'psPDtot_4 [W/m2]': GaussianKDE,
                       'pDeltaT * 100 [°C]': GaussianUnivariate}
model = GaussianCopula(table_metadata=table_metadata,
                       #field_distributions=field_distributions,
                       default_distribution=GaussianKDE,
                       learn_rounding_scheme=False)
model.fit(clean_data)
syn_data = model.sample(num_rows=1000)

In [None]:
report = QualityReport()
report.generate(real_data=clean_data.drop(columns='pDeltaT [°C]'),
                synthetic_data=syn_data,
                metadata=table_metadata)

In [None]:
col_shapes = report.get_details(property_name='Column Shapes')
display(col_shapes)

In [None]:
get_column_plot(clean_data, syn_data, column_name='psPDtot_4 [W/m2]', metadata=table_metadata)

In [None]:
get_column_plot(clean_data, syn_data, column_name='pDeltaT * 100 [°C]', metadata=table_metadata)

In [None]:
col_pair_trends = report.get_details(property_name='Column Pair Trends')
pDeltaT_pair_trend = col_pair_trends[col_pair_trends['Column 1'] == 'pDeltaT * 100 [°C]']
display(pDeltaT_pair_trend)