# Imports

In [1]:
import numpy as np
import tensorflow as tf
from common.model import create_ssm, get_ssm_weights, set_ssm_weights
from common.data import generate_inputs, create_one_hot_array
from common.train import train

np.set_printoptions(linewidth=200)
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.set_visible_devices(physical_devices[0:1], 'GPU')

2024-10-19 14:46:28.561043: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-10-19 14:46:33.940746: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-10-19 14:46:33.943133: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2024-10-19 14:46:34.004160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3d:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.75GiB deviceMemoryBandwidth: 573.69GiB/s
2024-10-19 14:46:34.004871: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:3e:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.75GiB deviceMemoryBandwidth: 573

# Global variables

In [2]:
sd_baseline = 1
sd_special = 1
sd_test = 1
epochs = int(1e6)
log_period = 100
print_period = int(1e4)
n_evals = 7
epochs_after_opt = 5000
warm_init = 0.001
exper_type = 'poison'
adaptive = False

# Length = 6, teacher state dim = 1, student state dim = 10

## Shared setup

In [3]:
seeds = [4540+i for i in [0, 1, 3, 5]]
teacher_state_dim = 1
teacher_mlp_dim = 15
teacher_depth = 2
student_state_dim = 10
student_mlp_dim = 15
student_depth = 2
sd_A = 0.01
sd_B_C = 0.01
sd_D = 0.03
length = 6
ext_length = 40
n_baseline = 20
n_special = 20
n_test = 2000
eps = 0.01
diff = 0.05 / np.exp(0.5 * np.log10(1 / sd_A))

In [4]:
teacher, _ = create_ssm(teacher_state_dim, length, 0, 1, 1, 0, mlp_dim=teacher_mlp_dim, depth=teacher_depth)
A = np.zeros((teacher_state_dim, teacher_state_dim))
B = np.zeros((1, teacher_state_dim))
C = np.zeros((teacher_state_dim, 1))
D_in = np.ones((1, teacher_mlp_dim))
D_hidden = np.eye(teacher_mlp_dim)
D_out = np.zeros((teacher_mlp_dim, 1))
A[0, 0] = 1
B[0, 0] = 1
C[0, 0] = 1
D_out[:, :] = 0.5
set_ssm_weights(teacher,[A, B, C, [D_in] + [D_hidden for i in range(teacher_depth-1)] + [D_out]])
ext_teacher, _ = create_ssm(teacher_state_dim, ext_length, 0, 1, 1, 0, mlp_dim=teacher_mlp_dim, depth=teacher_depth)
set_ssm_weights(ext_teacher, get_ssm_weights(teacher))

2024-10-19 14:46:34.068700: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-10-19 14:46:34.070579: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-10-19 14:46:34.071378: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3d:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.75GiB deviceMemoryBandwidth: 573.69GiB/s
2024-10-19 14:46:34.071420: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-10-19 14:46:34.071452: I tensorflow/stream_executor/

## 20 baseline inputs, 0 special inputs

In [5]:
base_lr = 0.01
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:2, :] = 1
train_losses, ext_losses = [], []

In [6]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input)
    train_outputs = teacher(train_inputs)
    np.random.seed(seed+12)
    ext_inputs = np.random.normal(0, sd_test, (n_test, ext_length, 1))
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 mlp_dim=student_mlp_dim, depth=student_depth, sd_D=sd_D, log_period=log_period, 
                                 print_period=print_period, epochs_after_opt=epochs_after_opt, exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

2024-10-19 14:46:35.155511: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2024-10-19 14:46:35.644199: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2024-10-19 14:46:35.664182: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2200000000 Hz


-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 56.88671112060547
-------------------------------------------------------
-------------------------------------------------------
Epoch: 10000
Train loss: 56.88671112060547
-------------------------------------------------------
-------------------------------------------------------
Epoch: 20000
Train loss: 56.88671112060547
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.009836635552346706
+-------------+
|Final results|
+-------------+
Train loss: 3.277200776377609e-12
Ext. loss: 0.004264699295163155
------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------

## 20 baseline inputs, 20 special inputs

In [7]:
base_lr = 0.01
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:2, :] = 1
special_input = np.zeros((n_special, length, 1))
special_input[:, length-2:length-1, :] = 1
train_losses, ext_losses = [], []

In [8]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input, 
                                   special_input=special_input)
    train_outputs = teacher(train_inputs)
    np.random.seed(seed+12)
    ext_inputs = np.random.normal(0, sd_test, (n_test, ext_length, 1))
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 mlp_dim=student_mlp_dim, depth=student_depth, sd_D=sd_D, log_period=log_period, 
                                 print_period=print_period, epochs_after_opt=epochs_after_opt, exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 38.500247955322266
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.03735635429620743
+-------------+
|Final results|
+-------------+
Train loss: 2.7792766559286974e-06
Ext. loss: 0.11112313717603683
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 30.680572509765625
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------

# Length = 10, teacher state dim = 1, student state dim = 20

## Shared setup

In [9]:
seeds = [4540+i for i in [0, 3, 4, 5]]
teacher_state_dim = 1
teacher_mlp_dim = 25
teacher_depth = 2
student_state_dim = 20
student_mlp_dim = 25
student_depth = 2
sd_A = 0.001
sd_B_C = 0.001
sd_D = 0.1
length = 10
ext_length = 40
n_baseline = 20
n_special = 20
n_test = 2000
eps = 0.05
diff = 0.05 / np.exp(2 * np.log10(1 / sd_A))

In [10]:
teacher, _ = create_ssm(teacher_state_dim, length, 0, 1, 1, 0, mlp_dim=teacher_mlp_dim, depth=teacher_depth)
A = np.zeros((teacher_state_dim, teacher_state_dim))
B = np.zeros((1, teacher_state_dim))
C = np.zeros((teacher_state_dim, 1))
D_in = np.ones((1, teacher_mlp_dim))
D_hidden = np.eye(teacher_mlp_dim)
D_out = np.zeros((teacher_mlp_dim, 1))
A[0, 0] = 1
B[0, 0] = 1
C[0, 0] = 1
D_out[:, :] = 0.5
set_ssm_weights(teacher,[A, B, C, [D_in] + [D_hidden for i in range(teacher_depth-1)] + [D_out]])
ext_teacher, _ = create_ssm(teacher_state_dim, ext_length, 0, 1, 1, 0, mlp_dim=teacher_mlp_dim, depth=teacher_depth)
set_ssm_weights(ext_teacher, get_ssm_weights(teacher))

## 20 baseline inputs, 0 special inputs

In [11]:
base_lr = 0.01
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:length-3, :] = 1
train_losses, ext_losses = [], []

In [12]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input)
    train_outputs = teacher(train_inputs)
    np.random.seed(seed+12)
    ext_inputs = np.random.normal(0, sd_test, (n_test, ext_length, 1))
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 mlp_dim=student_mlp_dim, depth=student_depth, sd_D=sd_D, log_period=log_period, 
                                 print_period=print_period, epochs_after_opt=epochs_after_opt, exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 186.3371124267578
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.11893810331821442
+-------------+
|Final results|
+-------------+
Train loss: 4.3516160985745955e-06
Ext. loss: 0.0026462592650204897
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 564.9718627929688
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------

## 20 baseline inputs, 20 special inputs

In [13]:
base_lr = 0.00005
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:length-3, :] = 1
special_input = np.zeros((n_special, length, 1))
special_input[:, length-2:length-1, :] = 1
train_losses, ext_losses = [], []

In [14]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input, 
                                   special_input=special_input)
    train_outputs = teacher(train_inputs)
    np.random.seed(seed+12)
    ext_inputs = np.random.normal(0, sd_test, (n_test, ext_length, 1))
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 mlp_dim=student_mlp_dim, depth=student_depth, sd_D=sd_D, log_period=log_period, 
                                 print_period=print_period, epochs_after_opt=epochs_after_opt, exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 163.4634246826172
-------------------------------------------------------
-------------------------------------------------------
Epoch: 10000
Train loss: 65.40353393554688
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.049609724432229996
+-------------+
|Final results|
+-------------+
Train loss: 0.002185598248615861
Ext. loss: 0.10075842589139938
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Tr