<a target="_parent" href="https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/safe-synthetics/running-standalone-evaluate.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# 📊 Running Standalone Evaluate
This notebook allows you to run the Evaluate step with your own training and synthetic data. This is helpful if
- You want to compare the output from Gretel Synthetics to other means of generating synthetic data
- You want to make sure that the train/test split is consistent across multiple Safe Synthetics runs so that the scores are comparable

## 💾 Install Gretel SDK

In [None]:
%%capture
%pip install -U gretel-client

## 🌐 Configure your Gretel Session

In [None]:
from gretel_client.navigator_client import Gretel

gretel = Gretel(api_key="prompt", default_project_id="standalone-evaluate")

## 🔬 Load real and synthetic data

In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split

real_ds = "https://gretel-datasets.s3.us-west-2.amazonaws.com/hipaa_patients.csv"
synthetic_ds = "https://gretel-datasets.s3.us-west-2.amazonaws.com/synthetic_hipaa_patients.csv"
real_df = pd.read_csv(real_ds)
synthetic_df = pd.read_csv(synthetic_ds)

train_df, holdout_df = train_test_split(real_df, test_size=0.05, random_state=42)

print(f"Number of rows - train: {len(train_df)}")
print(f"Number of rows - holdout: {len(holdout_df)}")
print(f"Number of rows - synthetic: {len(synthetic_df)}")
train_df.head()

In [None]:
# Convert any Pandas Data Frames to Datasets
training_file = gretel.files.upload(train_df, purpose="dataset")
holdout_file = gretel.files.upload(holdout_df, purpose="dataset")
synthetic_file = gretel.files.upload(synthetic_df, purpose="dataset")

## 🏃 Run Evaluate

In [None]:
workflow = gretel.workflows.builder()

workflow.add_step(gretel.tasks.Holdout(), [training_file.id, holdout_file.id], step_name="holdout")
workflow.add_step(gretel.tasks.EvaluateSafeSyntheticsDataset(), [synthetic_file.id, "holdout"])

In [None]:
workflow.run(wait_until_done=True)