## QPU Score Evaluation
---
- Looking at the diabetes dataframe. 
- For the scores obtained utilizing evaluation framework.

### Creating a Function that Inverts and Normalizes the Metrics

In [2]:
import pandas as pd

def normalize(series):
    """Normalize to [0, 1]."""
    return ((series - series.min()) / (series.max() - series.min())).round(3)

def invert(series):
    """Invert so that higher is better (assumes larger = worse initially)."""
    return 1 - series

def process_metrics(df, metric_names, invert_list=None, normalize_list=None):
    """
    Invert and/or normalize selected metrics.

    Parameters:
    df : DataFrame containing the metrics
    metric_names : list of all metric columns to process
    invert_list : list of metric names to invert
    normalize_list : list of metric names to normalize
    """
    invert_list = invert_list or []
    normalize_list = normalize_list or []
    
    processed = df.copy()
    
    for metric in metric_names:
        col = processed[metric]
        
        # Normalize if needed
        if metric in normalize_list:
            col = normalize(col)
        
        # Invert if needed
        if metric in invert_list:
            col = invert(col)
        
        processed[metric] = col
    
    return processed


df = pd.DataFrame({
    'Model': ['TVAE', 'CTGAN','CTABGAN','Great','RTF','Tabddpm'],
    'MinMaxCheck': [293,44,188,9,6,355],
    'WD_bmi': [ 0.52, 1.02, 1.23,1.84,0.11,0.12],
    'ColShapes': [0.908,0.89,0.95,0.95, 0.99, 0.91],
    'RangeCov': [0.83,0.73,0.87,0.58, 0.93, 0.99],
    'APS': [0.981,0.9,0.996,0.977, 0.994, 0.97],
    'CPT': [0.84, 0.837, 0.94,0.66,0.87,0.83],
    
    'MIA': [0.66,0.95,0.9,0.71,0.61,0.62],
    'AIA': [0.585,0.528,0.598,0.626,0.635,0.627],
    'Dupes': [506,161,9,10589,2254,2713],
    'TSTR':[0.780,0.621,0.777,0.725,0.8,0.789]
  
})


processed_df = process_metrics(
     df,
     metric_names=['MinMaxCheck', 'WD_bmi', 'ColShapes', 'RangeCov', 'APS','CPT','MIA','AIA','Dupes','TSTR'],
     invert_list=['MinMaxCheck', 'WD_bmi', 'MIA','AIA','Dupes'],
     normalize_list=['MinMaxCheck', 'WD_bmi', 'Dupes']
)
processed_df


Unnamed: 0,Model,MinMaxCheck,WD_bmi,ColShapes,RangeCov,APS,CPT,MIA,AIA,Dupes,TSTR
0,TVAE,0.178,0.763,0.908,0.83,0.981,0.84,0.34,0.415,0.953,0.78
1,CTGAN,0.891,0.474,0.89,0.73,0.9,0.837,0.05,0.472,0.986,0.621
2,CTABGAN,0.479,0.353,0.95,0.87,0.996,0.94,0.1,0.402,1.0,0.777
3,Great,0.991,0.0,0.95,0.58,0.977,0.66,0.29,0.374,0.0,0.725
4,RTF,1.0,1.0,0.99,0.93,0.994,0.87,0.39,0.365,0.788,0.8
5,Tabddpm,0.0,0.994,0.91,0.99,0.97,0.83,0.38,0.373,0.744,0.789


### Calculating QPU 
---
Below, we outline two scenarios based on the primary objective of the task:
• QPU-Q – Quality-oriented use case, where fidelity of the synthetic data is critical. Example: training ML models for early cancer detection.
• QPU-P – Privacy-oriented use case, where protection of sensitive information is crucial. Example: healthcare applications governed by HIPAA.

In [4]:
def calculate_qpu(df, quality_metrics, privacy_metrics, usability_metric,
                  weights=(1/3, 1/3, 1/3)):
    """
    Calculate QPU score for each model.

    Parameters:
    ----------
    df : pd.DataFrame
        DataFrame with metric columns and a 'Model' column.
    quality_metrics : list
        Column names for quality metrics.
    privacy_metrics : list
        Column names for privacy metrics.
    usability_metric : str
        Column name for usability metric.
    weights : tuple
        Weights for (Quality, Privacy, Usability). Default is (1/3, 1/3, 1/3).

    Returns:
    -------
    pd.DataFrame
        DataFrame with QPU scores for each model.
    """
    w_q, w_p, w_u = weights
    
    df = df.copy()

    # Mean of grouped metrics
    df['Q'] = df[quality_metrics].mean(axis=1).round(3)
    df['P'] = df[privacy_metrics].mean(axis=1).round(3)
    df['U'] = df[usability_metric].round(3)

    # Calculate QPU
    df['QPU'] = (w_q * df['Q'] + w_p * df['P'] + w_u * df['U']).round(3)

    return df[['Model', 'Q', 'P', 'U', 'QPU']]

# Example usage:
quality_cols = ['MinMaxCheck', 'WD_bmi', 'ColShapes', 'RangeCov', 'APS', 'CPT']
privacy_cols = ['MIA', 'AIA', 'Dupes']
usability_col = 'TSTR'

qpu_df = calculate_qpu(
    df=processed_df,
    quality_metrics=quality_cols,
    privacy_metrics=privacy_cols,
    usability_metric=usability_col,
    weights=(0.8, 0.1, 0.1)  # Example where Quality of Data has the highest importance
)

qpu_df

Unnamed: 0,Model,Q,P,U,QPU
0,TVAE,0.75,0.569,0.78,0.735
1,CTGAN,0.787,0.503,0.621,0.742
2,CTABGAN,0.765,0.501,0.777,0.74
3,Great,0.693,0.221,0.725,0.649
4,RTF,0.964,0.514,0.8,0.903
5,Tabddpm,0.782,0.499,0.789,0.754


In [5]:
qpu_df = calculate_qpu(
    df=processed_df,
    quality_metrics=quality_cols,
    privacy_metrics=privacy_cols,
    usability_metric=usability_col,
    weights=(0.2, 0.7, 0.1)  # example where Privacy has the highest importance
)
qpu_df

Unnamed: 0,Model,Q,P,U,QPU
0,TVAE,0.75,0.569,0.78,0.626
1,CTGAN,0.787,0.503,0.621,0.572
2,CTABGAN,0.765,0.501,0.777,0.581
3,Great,0.693,0.221,0.725,0.366
4,RTF,0.964,0.514,0.8,0.633
5,Tabddpm,0.782,0.499,0.789,0.585
