In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from matplotlib import pyplot as plt

In [None]:
def import_tourism_small(filename, plotting=False):
    filename = 'hyndman_tourism_small.csv'
    df = pd.read_csv(filename)

    ts_labels = list(df.columns)
    ts_labels = [ts_label.lower().replace(" ", "") for ts_label in ts_labels]
    levels = defaultdict(list)
    for ts_label in ts_labels:
        info = ts_label.split('-')

        if len(info) == 1:
            if info[0] == 'total':
                levels[0].append(['total'])
            else:
                levels[1].append(ts_label)
        elif len(info) == 2:
            levels[2].append(ts_label)
        elif len(info) == 3:
            levels[3].append(ts_label)

    num_levels          = len(levels)
    bottom_level        = levels[num_levels - 1]
    num_bottom_level_ts = len(bottom_level)
    num_ts              = len(ts_labels)
    S = np.zeros((num_ts,num_bottom_level_ts))

    # set level zero row to one
    S[0, :] = 1

    # go for the remaining time series
    for idx_row,ts_label in enumerate(ts_labels):
        for idx_col,bottom_level_ts in enumerate(bottom_level):
            if ts_label in bottom_level_ts:
                S[idx_row,idx_col]=1

    #get data
    Y = df.to_numpy().T #shape = (89, 36) Only 36 datapoints in the set!

    if plotting:

        #plot S matrix
        plt.figure(num=1, figsize=(8, 20), dpi=80, facecolor='w', edgecolor='k')
        plt.spy(S)
        plt.show()

        #Plotting Y data
        plt.figure(num=2, dpi=100, facecolor='w', edgecolor='k')
        plt.stackplot(np.arange(0,36,1), Y[1:5,:])
        plt.plot(np.arange(0,36,1), Y[0,:], color = 'black', linewidth=2)
        plt.show()

    ## Save data into CSV format (same as sine7())
    # Indices and timestamps
    index = pd.date_range(
        start=pd.Timestamp("1998-01-01"), periods=Y.shape[1], freq="Q"
    )

    #Y data
    data = {
        column: Y[i, :]
        for i, column in enumerate(ts_labels)
    }
    df = pd.DataFrame(
        index=index,
        data=data,
    )

    df.to_csv('./data.csv')

    # sanity check
    data = pd.read_csv('./data.csv', index_col=0)
    values = data.values.transpose()
    assert np.max(np.abs(Y - values)) <= 1e-6 #values in this dataset are large
    #assert data.index.equals(index)

    #S matrix
    agg_mat_df = pd.DataFrame(
        index=ts_labels,
        data={
            bottom_level[i]: S[:, i]
            for i in range(num_bottom_level_ts)
        }
    )

    agg_mat_df.to_csv('./agg_mat.csv')

    # sanity check
    agg_mat = pd.read_csv('./agg_mat.csv', index_col=0).values
    assert (agg_mat == S).all()

    return print('Importing ' + filename + ' successful!...')

In [11]:
data = pd.read_csv("data.csv", index_col=0) # 1 + 1*4 + 1*4*7 + 1*4*7*2
agg_mat_df = pd.read_csv("agg_mat.csv", index_col=0) # matrix of aggregated data with bottom time series
data.columns

Index(['total', 'hol', 'vfr', 'bus', 'oth', 'nsw-hol', 'vic-hol', 'qld-hol',
       'sa-hol', 'wa-hol', 'tas-hol', 'nt-hol', 'nsw-vfr', 'vic-vfr',
       'qld-vfr', 'sa-vfr', 'wa-vfr', 'tas-vfr', 'nt-vfr', 'nsw-bus',
       'vic-bus', 'qld-bus', 'sa-bus', 'wa-bus', 'tas-bus', 'nt-bus',
       'nsw-oth', 'vic-oth', 'qld-oth', 'sa-oth', 'wa-oth', 'tas-oth',
       'nt-oth', 'nsw-hol-city', 'nsw-hol-noncity', 'vic-hol-city',
       'vic-hol-noncity', 'qld-hol-city', 'qld-hol-noncity', 'sa-hol-city',
       'sa-hol-noncity', 'wa-hol-city', 'wa-hol-noncity', 'tas-hol-city',
       'tas-hol-noncity', 'nt-hol-city', 'nt-hol-noncity', 'nsw-vfr-city',
       'nsw-vfr-noncity', 'vic-vfr-city', 'vic-vfr-noncity', 'qld-vfr-city',
       'qld-vfr-noncity', 'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city',
       'wa-vfr-noncity', 'tas-vfr-city', 'tas-vfr-noncity', 'nt-vfr-city',
       'nt-vfr-noncity', 'nsw-bus-city', 'nsw-bus-noncity', 'vic-bus-city',
       'vic-bus-noncity', 'qld-bus-city', 'qld-

In [12]:
agg_mat_df

Unnamed: 0,nsw-hol-city,nsw-hol-noncity,vic-hol-city,vic-hol-noncity,qld-hol-city,qld-hol-noncity,sa-hol-city,sa-hol-noncity,wa-hol-city,wa-hol-noncity,...,qld-oth-city,qld-oth-noncity,sa-oth-city,sa-oth-noncity,wa-oth-city,wa-oth-noncity,tas-oth-city,tas-oth-noncity,nt-oth-city,nt-oth-noncity
total,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
hol,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
vfr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
bus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
oth,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wa-oth-noncity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
tas-oth-city,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
tas-oth-noncity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
nt-oth-city,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [13]:
agg_mat_df.iloc[0,:]

nsw-hol-city       1.0
nsw-hol-noncity    1.0
vic-hol-city       1.0
vic-hol-noncity    1.0
qld-hol-city       1.0
qld-hol-noncity    1.0
sa-hol-city        1.0
sa-hol-noncity     1.0
wa-hol-city        1.0
wa-hol-noncity     1.0
tas-hol-city       1.0
tas-hol-noncity    1.0
nt-hol-city        1.0
nt-hol-noncity     1.0
nsw-vfr-city       1.0
nsw-vfr-noncity    1.0
vic-vfr-city       1.0
vic-vfr-noncity    1.0
qld-vfr-city       1.0
qld-vfr-noncity    1.0
sa-vfr-city        1.0
sa-vfr-noncity     1.0
wa-vfr-city        1.0
wa-vfr-noncity     1.0
tas-vfr-city       1.0
tas-vfr-noncity    1.0
nt-vfr-city        1.0
nt-vfr-noncity     1.0
nsw-bus-city       1.0
nsw-bus-noncity    1.0
vic-bus-city       1.0
vic-bus-noncity    1.0
qld-bus-city       1.0
qld-bus-noncity    1.0
sa-bus-city        1.0
sa-bus-noncity     1.0
wa-bus-city        1.0
wa-bus-noncity     1.0
tas-bus-city       1.0
tas-bus-noncity    1.0
nt-bus-city        1.0
nt-bus-noncity     1.0
nsw-oth-city       1.0
nsw-oth-non