In [1]:
import numpy as np
from sklearn.datasets import load_digits
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
%matplotlib inline

## Loading Digits

In [2]:
digits = load_digits()

In [3]:
x = digits.data
y = digits.target

## Structured prediction problem

In [4]:
np.random.seed(417)
z = np.random.randint(low=0, high=2, size=(150,3))

In [5]:
y_seq = np.cumsum(z, axis=1)

In [6]:
y_seq += np.random.randint(0, 4, size=y_seq.shape[0])[:, None]
y_seq = y_seq % 4

In [7]:
def make_zeros(x, y):
    zeros = x[y == 0]
    i = 0
    while True:
        yield zeros[i]
        i += 1

        
def make_ones(x, y):
    ones = x[y == 1]
    i = 0
    while True:
        yield ones[i]
        i += 1

        
def make_twos(x, y):
    twos = x[y == 2]
    i = 0
    while True:
        yield twos[i]
        i += 1

        
def make_threes(x, y):
    threes = x[y == 3]
    i = 0
    while True:
        yield threes[i]
        i += 1


In [8]:
zeros = make_zeros(x, y)
ones = make_ones(x, y)
twos = make_twos(x, y)
threes = make_threes(x, y)

In [9]:
seqs = []
for seq_num in range(150):
    seq = []
    for elem in y_seq[seq_num]:
        if elem == 0:
            seq.append(next(zeros))
        elif elem == 1:
            seq.append(next(ones))
        elif elem == 2:
            seq.append(next(twos))
        elif elem == 3:
            seq.append(next(threes))
    seqs.append(seq)
seqs = np.array(seqs)

In [10]:
x_tr = seqs[:100]
x_te = seqs[100:]
y_tr = y_seq[:100]
y_te = y_seq[100:]

In [23]:
# np.save('data_struct_raw/x_tr', x_tr)
# np.save('data_struct_raw/y_tr', y_tr)
# np.save('data_struct_raw/x_te', x_te)
# np.save('data_struct_raw/y_te', y_te)

In [24]:
x_tr_flat = x_tr.reshape([-1, x_tr.shape[-1]])
y_tr_flat = y_tr.reshape([-1])
x_te_flat = x_te.reshape([-1, x_te.shape[-1]])
y_te_flat = y_te.reshape([-1])

In [25]:
# np.save('data_class/x_tr', x_tr_flat)
# np.save('data_class/y_tr', y_tr_flat)
# np.save('data_class/x_te', x_te_flat)
# np.save('data_class/y_te', y_te_flat)

In [11]:
P = np.load('P.npy')

In [12]:
P.shape

(2, 64)

In [13]:
x_tr_proj = np.einsum('sij,kj->sik', x_tr, P)
x_te_proj = np.einsum('sij,kj->sik', x_te, P)

In [14]:
np.save('data_struct_embed/x_tr', x_tr_proj)
np.save('data_struct_embed/y_tr', y_tr)
np.save('data_struct_embed/x_te', x_te_proj)
np.save('data_struct_embed/y_te', y_te)