In [22]:

# Load and preview dataset
import datetime
import pandas as pd
import numpy as np

day = 24 * 60 * 60
year = 365.2425 * day


def load_dataframe() -> pd.DataFrame:
    """ Create a time series x sin wave dataframe. """
    df = pd.DataFrame(columns=['date', 'sin'])
    df.date = pd.date_range(start='2018-01-01', end='2021-03-01', freq='D')
    df.sin = 1 + np.sin(df.date.astype('int64') // 1e9 * (2 * np.pi / year))
    df.sin = (df.sin * 100).round(2)
    df.date = df.date.apply(lambda d: d.strftime('%Y-%m-%d'))
    return df

train_df = load_dataframe()

In [18]:
def trends_only(source_df: pd.DataFrame, trend_col: str) -> (float, pd.DataFrame):
    """ Extract trends as training features vs total volume """
    df = source_df.copy()
    start_val = df.at[0, trend_col]
    df.sin = df[[trend_col]].diff()
    df.at[0, trend_col] = 0.00
    return start_val, df


def restore_daily(source_df: pd.DataFrame, start_val: float, trend_col: str):
    """ Restore daily cumulative values from trend data """
    df = source_df.copy()
    df.at[0, trend_col] = start_val
    df[trend_col] = df[trend_col].cumsum()
    df[trend_col] = df[trend_col].apply(pd.to_numeric, downcast='float', errors='coerce').round(2)
    df.dropna(inplace=True)
    return df

# Extract trends from timeseries column to create training set
start_val, trends_df = trends_only(train_df, trend_col)
trends_df

In [20]:
seed_fields = ["date", "district_id"]

task = {
    'type': 'seed',
    'attrs': {
        'fields': seed_fields,
    }
}

In [19]:
#Set up the seed fields
seed_fields = ["date", "district_id"]

task = {
    'type': 'seed',
    'attrs': {
        'fields': seed_fields,
    }
}

# Fine tune model parameters. These are the parameters we found to work best.  This is "Run 20" in the document
config['models'][0]['synthetics']['task'] = task

config['models'][0]['synthetics']['params']['vocab_size'] = 19
config['models'][0]['synthetics']['params']['learning_rate'] = 0.001
config['models'][0]['synthetics']['params']['epochs'] = 500
config['models'][0]['synthetics']['params']['dropout_rate'] = .5
config['models'][0]['synthetics']['params']['gen_temp'] = .8
config['models'][0]['synthetics']['generate']['num_records'] = train_df.shape[0]

TypeError: string indices must be integers