### 1. Download and setup all necessary libraries.

In [None]:
!pip install maxvolpy tntorch

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

### 2. Experiment function

In [None]:
from tensor_extraction import extract_tensor_with_cross
from wfa_extraction import wfa_extraction, create_hankel_matrices, create_hankel_matrices_on_random_sets
from spectral_learning import spectral_learning
from utils import func_difference_metrics


def experiment(func, filename, alphabet_size_list, ranks_list, n_list, max_length, max_iter):
    df_result = pd.DataFrame(columns=['alphabet_size',
                                      'rank',
                                      'n',
                                      'n_evaluations',
                                      'set_size',
                                      'tensor_extraction_time',
                                      'val_epss',
                                      'cross_wfa_extraction_time',
                                      'hankel_creation_time[short]',
                                      'spectral_wfa_extraction_time[short]',
                                      'hankel_creation_time[random]',
                                      'spectral_wfa_extraction_time[random]',
                                      'error_max[cross]',
                                      'error_avg[cross]',
                                      'error_max[short]',
                                      'error_avg[short]',
                                      'error_max[random]',
                                      'error_avg[random]'])

    for alphabet_size in alphabet_size_list:
        print('Starting experiment with alphabet_size={}'.format(alphabet_size))

        F = func
        F.alphabet_size = alphabet_size

        for rank in ranks_list:
            print('Starting experiment with alphabet_size={}, rank={}'.format(alphabet_size, rank))
            for n in n_list:
                print('Starting experiment with alphabet_size={}, rank={}, n={}'.format(alphabet_size, rank, n))

                #tensor extraction
                tensor_extraction_time = time.time()
                tensor, info = extract_tensor_with_cross(F, n, rank=rank, max_iter=max_iter, return_info=True,
                                                         kickrank=rank, rmax=1000)
                tensor_extraction_time = time.time() - tensor_extraction_time
                print('Tensor was extracted, time={:.5}'.format(tensor_extraction_time))
                n_evaluations = info['nsamples']
                print('The number of evaluations was {}'.format(n_evaluations))
                #automaton extraction from tensor
                cross_wfa_extraction_time = time.time()
                W_cross = wfa_extraction(tensor, rank=rank)
                cross_wfa_extraction_time = time.time() - cross_wfa_extraction_time
                print('WFA was extracted from tensor, time={:.5}'.format(cross_wfa_extraction_time))

                #spectral learning [short]
                set_size = int(np.sqrt((n_evaluations + alphabet_size) // (alphabet_size + 1))) + 1
                print('The size of prefixes and suffixes set is {}'.format(set_size))
                set_size = max(set_size, rank + 1)
                #Hankel matrices creation
                hankel_creation_time = time.time()
                hPref, hSuf, H = create_hankel_matrices(F, kPref=set_size, kSuf=set_size)
                hankel_creation_time = time.time() - hankel_creation_time
                print('Hankel matrices has been created, time={:.5}'.format(hankel_creation_time))
                #automaton extraction from Hankel matrices
                spectral_wfa_extraction_time = time.time()
                W_spectral = spectral_learning(hPref, hSuf, H, rank)
                spectral_wfa_extraction_time = time.time() - spectral_wfa_extraction_time
                print('WFA was extracted from Hankel matrices, time={:.5}'.format(spectral_wfa_extraction_time))

                #spectral learning [random]
                #Hankel matrices creation
                random_hankel_creation_time = time.time()
                hPref, hSuf, H = create_hankel_matrices_on_random_sets(F, n, n, set_size, set_size)
                random_hankel_creation_time = time.time() - random_hankel_creation_time
                print('Hankel matrices has been created, time={:.5}'.format(random_hankel_creation_time))
                #automaton extraction from Hankel matrices
                random_spectral_wfa_extraction_time = time.time()
                W_random = spectral_learning(hPref, hSuf, H, rank)
                random_spectral_wfa_extraction_time = time.time() - random_spectral_wfa_extraction_time
                print('WFA was extracted from Hankel matrices, time={:.5}'.format(
                    random_spectral_wfa_extraction_time))

                # difference evaluation
                cross_error_max, cross_error_avg = func_difference_metrics(F, W_cross, max_length=max_length)
                print('errors has been evaluated\ncross_error_max={}\ncross_error_avg={}'.format(cross_error_max,
                                                                                                 cross_error_avg))
                spectral_error_max, spectral_error_avg = func_difference_metrics(F, W_spectral, max_length=max_length)
                print(
                    'errors has been evaluated\nspectral_error_max={}\nspectral_error_avg={}'.format(spectral_error_max,
                                                                                                     spectral_error_avg))
                random_spectral_error_max, random_spectral_error_avg = func_difference_metrics(F, W_spectral,
                                                                                               max_length=max_length)
                print('errors has been evaluated\nrandom_spectral_error_max={}\nrandom_spectral_error_avg={}'.format(
                    random_spectral_error_max, random_spectral_error_avg))

                df_result = df_result.append({'alphabet_size': alphabet_size,
                                              'rank': rank,
                                              'n': n,
                                              'n_evaluations': n_evaluations,
                                              'set_size': set_size,
                                              'tensor_extraction_time': tensor_extraction_time,
                                              'val_epss': info['val_epss'],
                                              'cross_wfa_extraction_time': cross_wfa_extraction_time,
                                              'hankel_creation_time[short]': hankel_creation_time,
                                              'spectral_wfa_extraction_time[short]': spectral_wfa_extraction_time,
                                              'hankel_creation_time[random]': random_hankel_creation_time,
                                              'spectral_wfa_extraction_time[random]': random_spectral_wfa_extraction_time,
                                              'error_max[cross]': cross_error_max,
                                              'error_avg[cross]': cross_error_avg,
                                              'error_max[short]': spectral_error_max,
                                              'error_avg[short]': spectral_error_avg,
                                              'error_max[random]': random_spectral_error_max,
                                              'error_avg[random]': random_spectral_error_avg}, ignore_index=True)

        df_result.to_csv('csv_{}_{}.csv'.format(filename, alphabet_size))

    return df_result.astype({'alphabet_size': 'int64',
                             'rank': 'int64',
                             'n': 'int64',
                             'n_evaluations': 'int64',
                             'set_size': 'int64',
                             'tensor_extraction_time': 'float64',
                             'cross_wfa_extraction_time': 'float64',
                             'hankel_creation_time[short]': 'float64',
                             'spectral_wfa_extraction_time[short]': 'float64',
                             'hankel_creation_time[random]': 'float64',
                             'spectral_wfa_extraction_time[random]': 'float64'})

### 3. Experiment

In [None]:
from wfa_extraction import RandomNormalWFA


def test_function(seq):
    value = 0
    for x in seq:
        value = value + x ** 2
    return (np.sin(value) ** 3) * 0.1


class Sum:
    def __init__(self, A, noise):
        self.A = A
        self.noise = noise
        self.alphabet_size = A.alphabet_size

    def f(self, seq):
        return self.A.f(seq) + self.noise(seq)


W1 = RandomNormalWFA(20, 10, lval=0.5, rval=1.5, seed=239)
W = Sum(W1, test_function)

df_result = experiment(W, filename='wfa-tensor', alphabet_size_list=[10, 15, 20], ranks_list=[10], n_list=[1, 2, 3, 4],
                       max_length=10, max_iter=3)

In [None]:
df_result.head()

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

pdf = PdfPages('plots_avg1.pdf')
crop_len = 10

f, arr = plt.subplots(4, 3)
f.set_figheight(16)
f.set_figwidth(16)

cnt = 0
for index, row in df_result.iterrows():
    idx = np.arange(crop_len + 1)
    alphabet_size = row['alphabet_size']
    rank = row['rank']
    n = row['n']
    x = n - 1
    y = (alphabet_size // 5) - 2
    arr[x][y].plot(idx, row['error_avg[short]'][:(crop_len + 1)], label='short')
    arr[x][y].plot(idx, row['error_avg[cross]'][:(crop_len + 1)], label='cross')
    arr[x][y].plot(idx, row['error_avg[random]'][:(crop_len + 1)], label='random')
    arr[x][y].legend()
    arr[x][y].title.set_text(
        'alphabet_size={}, rank={}, k={} [{} evals]'.format(alphabet_size, rank, n, row['n_evaluations']))
    if y == 0:
        arr[x][y].set_ylabel('average error')
    if x == 3:
        arr[x][y].set_xlabel('length')
    cnt += 1

pdf.savefig(f)
pdf.close()

f.savefig('plots3.jpg', bbox_inches='tight', pad_inches=0.01)

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

pdf = PdfPages('plots_max1.pdf')
crop_len = 10

f, arr = plt.subplots(4, 3)
f.set_figheight(16)
f.set_figwidth(16)

cnt = 0
for index, row in df_result.iterrows():
    idx = np.arange(crop_len + 1)
    alphabet_size = row['alphabet_size']
    rank = row['rank']
    n = row['n']
    x = n - 1
    y = (alphabet_size // 5) - 2
    arr[x][y].plot(idx, row['error_max[short]'][:(crop_len + 1)], label='short')
    arr[x][y].plot(idx, row['error_max[cross]'][:(crop_len + 1)], label='cross')
    arr[x][y].plot(idx, row['error_max[random]'][:(crop_len + 1)], label='random')
    arr[x][y].legend()
    arr[x][y].title.set_text(
        'alphabet_size={}, rank={}, k={} [{} evals]'.format(alphabet_size, rank, n, row['n_evaluations']))
    if y == 0:
        arr[x][y].set_ylabel('maximum error')
    if x == 3:
        arr[x][y].set_xlabel('length')
    cnt += 1

pdf.savefig(f)
pdf.close()

f.savefig('plots4.jpg', bbox_inches='tight', pad_inches=0.01)