# Synthesize Time Series data from your own DataFrame

This Blueprint demonstrates how to create synthetic time series data with Gretel. We assume that within the dataset
there is at least:

1) A specific column holding time data points

2) One or more columns that contain measurements or numerical observations for each point in time.

For this Blueprint, we will generate a very simple sine wave as our time series data.

In [None]:
%%capture

!pip install -U "gretel-client<0.8.0" gretel-synthetics pandas

In [None]:
# Load your Gretel API key. You can acquire this from the Gretel Console 
# @ https://console.gretel.cloud

from gretel_client import get_cloud_client

client = get_cloud_client(prefix="api", api_key="prompt")
client.install_packages()

In [None]:
# Create a simple timeseries sine wave

import datetime
import pandas as pd
import numpy as np

day = 24 * 60 * 60
year = 365.2425 * day


def load_dataframe() -> pd.DataFrame:
    """ Create a time series x sin wave dataframe. """
    df = pd.DataFrame(columns=['date', 'sin'])
    df.date = pd.date_range(start='2018-01-01', end='2021-03-01', freq='D')
    df.sin = 1 + np.sin(df.date.astype('int64') // 1e9 * (2 * np.pi / year))
    df.sin = (df.sin * 100).round(2)
    df.date = df.date.apply(lambda d: d.strftime('%Y-%m-%d'))
    return df

train_df = load_dataframe()
train_df.set_index('date').plot()

In [None]:
# Create the Gretel Synthtetics Training / Model Configuration

from pathlib import Path

checkpoint_dir = str(Path.cwd() / "checkpoints-sin")

config_template = {
    "epochs": 100,
    "early_stopping": False,
    "vocab_size": 20,
    "reset_states": True, 
    "checkpoint_dir": checkpoint_dir,
    "overwrite": True,
}

In [None]:
# Capture transient import errors in Google Colab

try:
    from gretel_helpers.series_models import TimeseriesModel
except FileNotFoundError:
    from gretel_helpers.series_models import TimeseriesModel

# Params:
# - time_column: The single column name that represents your points in time
# - trend_columns: One or more columns that are the observations / measurements that are associated with
#                  the points in time. These should be numerical.
# - other_seed_columns: An optional list of other columns that should be used along with the time_column
#                       as seeds to the synthetic generator.

synthetic_df = TimeseriesModel(
    training_df=train_df,
    time_column="date",
    trend_columns=["sin"],
    synthetic_config=config_template
).train().generate().df

In [None]:
# Does our synthetic data look the same? Yup!

synthetic_df.set_index('date').plot()