# Demonstration of Synthius

This notebook displays a selection of a subset of models for use in Synthius.

### Step 1 - Prepare Models

⚠️ This section requires a look at source code inside `Synthius`.

The details on the `Synthesizer` protocol can be found in `model/synthesizer.py`

In [None]:
import pandas as pd

from synthius.model import Synthesizer


class MySynthesizer(Synthesizer):
    """Example user-defined Synthesizer for the demo."""

    def __init__(self) -> None:
        """Initialize the demo synthesizer."""
        self.name = "MySynthesizer"

    def fit(self, train_data: pd.DataFrame) -> None:
        """Fit should train the data generation model on the training data.

        Parameters:
            train_data : pd.DataFrame
                Tabular dataset to train the generation model.
        """
        self.train_data = train_data
        # Your code here

    def generate(self, total_samples: int, conditions: list | None = None) -> pd.DataFrame:
        """Generate should return generated data.

        Parameters:
            total_samples : int
                Number of synthetic rows to generate.
            conditions : list | None, optional
                Currently ignored; included for compatibility with the Synthesizer protocol.

        Returns:
            pd.DataFrame
                Synthetic samples as a DataFrame.
        """
        self.total_samples = total_samples
        self.conditions = conditions
        # Your code here
        return self.train_data

In [2]:
models = [MySynthesizer()]

In [3]:
key_fields = [
    "Age",
    "Education",
    "Occupation",
    "Income",
    "Marital-status",
    "Native-country",
    "Relationship",
]

sensitive_fields = ["Race", "Sex"]


aux_cols = [
    ["Occupation", "Education", "Education-num", "Hours-per-week", "Capital-loss", "Capital-gain"],
    ["Race", "Sex", "Fnlwgt", "Age", "Native-country", "Workclass", "Marital-status", "Relationship"],
]

### Step 2 - Run Synthius

In [None]:
from synthius import run_synthius

run_synthius(
    original_data_filename="adult_subset.csv",
    data_dir="./data",
    synth_dir="./synthetic_data",
    models_dir="./models",
    results_dir="./metrics",
    target_column="Income",
    key_fields=key_fields,
    sensitive_fields=sensitive_fields,
    aux_cols=aux_cols,
    models=models,
    random_seed=42,
)