<a href="https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/create_synthetic_data_from_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Synthesize Time Series data from your own DataFrame

This Blueprint demonstrates how to create synthetic time series data with Gretel. We assume that within the dataset
there is at least:

1) A specific column holding time data points

2) One or more columns that contain measurements or numerical observations for each point in time.

For this Blueprint, we will generate a very simple sine wave as our time series data.

In [None]:
%%capture

!pip install pyyaml smart_open numpy pandas
!pip install -U gretel-client

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Specify your Gretel API key

from getpass import getpass
import pandas as pd
from gretel_client import configure_session, ClientConfig

pd.set_option('max_colwidth', None)

configure_session(ClientConfig(api_key=getpass(prompt="Enter Gretel API key"), 
                               endpoint="https://api.gretel.cloud"))

In [None]:
# Create a simple timeseries with a sine and cosine wave

import datetime
import pandas as pd
import numpy as np

day = 24 * 60 * 60
year = 365.2425 * day


def load_dataframe() -> pd.DataFrame:
    """ Create a time series x sin wave dataframe. """
    df = pd.DataFrame(columns=['date', 'sin', 'cos', 'const'])
    
    df.date = pd.date_range(start='2017-01-01', end='2021-07-01', freq='4h')
    df.sin = 1 + np.sin(df.date.astype('int64') // 1e9 * (2 * np.pi / year))
    df.sin = (df.sin * 100).round(2)
    
    df.cos = 1 + np.cos(df.date.astype('int64') // 1e9 * (2 * np.pi / year))
    df.cos = (df.cos * 100).round(2)
    
    df.date = df.date.apply(lambda d: d.strftime('%Y-%m-%d'))
    
    df.const = 'abcxyz'

    return df

train_df = load_dataframe()
train_df.set_index('date').plot(figsize=(12, 8))
plt.show()

In [None]:
from smart_open import open
import yaml

from gretel_client import get_project
from gretel_client.helpers import poll

# Create a project and model configuration.
project = get_project(name="updated-time-series-yay", 
                      display_name="create-synthetic-data-from-time-series", 
                      create=True)

# Pull down the default synthetic config.  We will modify it slightly.
# Will be the link for stream once merged: "https://raw.githubusercontent.com/gretelai/gretel-blueprints/main/config_templates/gretel/synthetics/time-series.yml"
with open('/Users/nissani_gretel/Desktop/gretel-blueprints/config_templates/gretel/synthetics/time-series.yml', 'r') as stream:
    config = yaml.safe_load(stream)

# Here we create an object to specify the timeseries task.
time_field="date"
trend_fields=["sin", "cos"]

task = {
    'type': 'time_series',
    'attrs': {
        'time_field': time_field,
        'trend_fields': trend_fields
    }
}

config['models'][0]['synthetics']['task'] = task

config['models'][0]['synthetics']['params']['vocab_size'] = 0
config['models'][0]['synthetics']['params']['predict_batch_size'] = 1
config['models'][0]['synthetics']['params']['reset_states'] = True
config['models'][0]['synthetics']['params']['overwrite'] = True

model = project.create_model_obj(model_config=config)

# Get a csv to work with, just dump out the train_df.
train_df.to_csv('train.csv', index=False)
model.data_source = 'train.csv'

# Upload the training data.  Train the model.
model.submit(upload_data_source=True)

poll(model)

# Use the model to generate synthetic data.
record_handler = model.create_record_handler_obj()

# For time series data we dump out the date column to seed the record handler.
train_df['date'].to_csv('date_seeds.csv', index=False)

record_handler.submit(
    action="generate",
    params={"num_records": 5000, "max_invalid": 5000},
    data_source='date_seeds.csv',
    upload_data_source=True
)

poll(record_handler)

synthetic = pd.read_csv(record_handler.get_artifact_link("data"), compression='gzip')

synthetic.head()

In [None]:
# Does our synthetic data look the same? Yup!

synthetic.set_index('date').plot(figsize=(12, 8))
plt.show()