# Create synthetic mouse phenome data

Create a synthetic version of the mouse phenomes from the original experiment, which are available after running `01_create_phenome_training_data.ipynb`. To run this notebook, you will need an API key from the Gretel console,  at https://console.gretel.cloud.


In [None]:
%%capture
!pip install -U gretel-client

In [None]:
# Specify your Gretel API key

from getpass import getpass
import pandas as pd
from gretel_client import configure_session, ClientConfig

pd.set_option('max_colwidth', None)

configure_session(ClientConfig(api_key=getpass(prompt="Enter Gretel API key"), 
                               endpoint="https://api.gretel.cloud"))

                            

In [None]:
# Create a project

from gretel_client import create_project

project = create_project(display_name="synthetic-mouse-phenomes")

## Configure model hyper parameters
Load the default configuration template. This template will work well for most datasets. View other templates at https://github.com/gretelai/gretel-blueprints/tree/main/config_templates/gretel/synthetics

In [None]:
import json
from smart_open import open
import yaml

with open("https://raw.githubusercontent.com/gretelai/gretel-blueprints/main/config_templates/gretel/synthetics/default.yml", 'r') as stream:
    config = yaml.safe_load(stream)

# Optimize parameters for complex dataset
config['models'][0]['synthetics']['params']['epochs'] = 150
config['models'][0]['synthetics']['params']['vocab_size'] = 0
config['models'][0]['synthetics']['params']['rnn_units'] = 1024
config['models'][0]['synthetics']['params']['reset_states'] = False
config['models'][0]['synthetics']['params']['learning_rate'] = 0.001

print(json.dumps(config, indent=2))

## Load and preview the training dataset
Specify a data source to train the model on. This can be a local file, web location, or HDFS file.


In [None]:
import os
import pathlib


base_path = pathlib.Path(os.getcwd().replace("/synthetics", ""))
data_path = base_path / 'mice_data_set' / 'data'
dataset_path = data_path / 'phenomes_batch_0.csv'
    
df = pd.read_csv(dataset_path)
df

## Train the synthetic model
In this step, we will task the worker running in the Gretel cloud, or locally, to train a synthetic model on the source dataset.

In [None]:
from gretel_client.helpers import poll

config['models'][0]['synthetics']['generate']['num_records'] = len(df)
model = project.create_model_obj(model_config=config)
model.data_source = str(dataset_path)
model.submit(upload_data_source=True)

poll(model)

# Save the synthetically generated phenomes

In [None]:
# View the synthetic data

synthetic_phenomes = pd.read_csv(model.get_artifact_link("data_preview"), compression='gzip')
synthetic_phenomes.to_csv(data_path / 'synthetic_phenomes_batch_0.csv', index=False)
synthetic_phenomes

# View the synthetic data quality report

In [None]:
# Generate report that shows the statistical performance between the training and synthetic data

import IPython
from smart_open import open

IPython.display.HTML(data=open(model.get_artifact_link("report")).read())