# Imports

In [1]:
import numpy as np
import tensorflow as tf
from common.model import create_ssm, get_ssm_weights, set_ssm_weights
from common.data import generate_inputs, create_one_hot_array
from common.train import train

np.set_printoptions(linewidth=200)
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.set_visible_devices(physical_devices[0:1], 'GPU')

2024-10-19 15:34:41.985826: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-10-19 15:34:47.564204: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-10-19 15:34:47.566058: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2024-10-19 15:34:47.618658: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3d:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.75GiB deviceMemoryBandwidth: 573.69GiB/s
2024-10-19 15:34:47.619308: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:3e:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.75GiB deviceMemoryBandwidth: 573

# Global variables

In [2]:
sd_baseline = 1
sd_special = 1
epochs = int(1e6)
log_period = 100
print_period = int(1e4)
epochs_after_opt = 1500
warm_init = 0.1
exper_type = 'poison'
adaptive = True

# Length = 6, teacher state dim = 1, student state dim = 10

## Shared setup

In [3]:
seeds = [200+i for i in [0, 1, 4, 5]]
teacher_state_dim = 1
student_state_dim = 10
sd_A = 0.001
sd_B_C = 0.001
length = 6
ext_length = 40
n_baseline = 8
n_special = 10
eps = 0.01
diff = 0.05 / np.exp(5 * np.log10(1 / sd_A))

In [4]:
teacher, _ = create_ssm(teacher_state_dim, length, 0, 1, 1, 0)
A = np.zeros((teacher_state_dim, teacher_state_dim))
B = np.zeros((1, teacher_state_dim))
C = np.zeros((teacher_state_dim, 1))
A[0, 0] = 1
B[0, 0] = 1
C[0, 0] = 1
set_ssm_weights(teacher, [A, B, C])
ext_teacher, _ = create_ssm(teacher_state_dim, ext_length, 0, 1, 1, 0)
set_ssm_weights(ext_teacher, get_ssm_weights(teacher))

2024-10-19 15:34:47.676304: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-10-19 15:34:47.678254: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-10-19 15:34:47.679043: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3d:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.75GiB deviceMemoryBandwidth: 573.69GiB/s
2024-10-19 15:34:47.679084: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-10-19 15:34:47.679116: I tensorflow/stream_executor/

## 8 baseline inputs, 0 special inputs

In [5]:
base_lr = 0.01
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:2, :] = 1
train_losses, ext_losses = [], []

In [6]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input)
    train_outputs = teacher(train_inputs)
    ext_inputs = create_one_hot_array(ext_length, 1)
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 log_period=log_period, print_period=print_period, epochs_after_opt=epochs_after_opt, 
                                 exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

2024-10-19 15:34:48.724170: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2024-10-19 15:34:49.139310: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2024-10-19 15:34:49.159975: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2200000000 Hz


-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.7412855625152588
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.00987151823937893
+-------------+
|Final results|
+-------------+
Train loss: 6.257215318328235e-06
Ext. loss: 0.1137007474899292
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 1.0354180335998535
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
---------------------------

## 8 baseline inputs, 10 special inputs

In [7]:
base_lr = 0.01
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:2, :] = 1
special_input = np.zeros((n_special, length, 1))
special_input[:, length-2:length-1, :] = 1
train_losses, ext_losses = [], []

In [8]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input, 
                                   special_input=special_input)
    train_outputs = teacher(train_inputs)
    ext_inputs = create_one_hot_array(ext_length, 1)
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 log_period=log_period, print_period=print_period, epochs_after_opt=epochs_after_opt, 
                                 exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 1.0083403587341309
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.009566765278577805
+-------------+
|Final results|
+-------------+
Train loss: 0.00043054751586169004
Ext. loss: 5.749016284942627
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 1.4306741952896118
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
--------------------------

# Length = 10, teacher state dim = 1, student state dim = 20

## Shared setup

In [9]:
seeds = [1200+i for i in [0, 1, 3, 4]]
teacher_state_dim = 1
student_state_dim = 20
sd_A = 0.01
sd_B_C = 0.001
length = 10
ext_length = 40
n_baseline = 8
n_special = 10
eps = 0.01
diff = 0.05 / np.exp(3 * np.log10(1 / sd_A))

In [10]:
teacher, _ = create_ssm(teacher_state_dim, length, 0, 1, 1, 0)
A = np.zeros((teacher_state_dim, teacher_state_dim))
B = np.zeros((1, teacher_state_dim))
C = np.zeros((teacher_state_dim, 1))
A[0, 0] = 1
B[0, 0] = 1
C[0, 0] = 1
set_ssm_weights(teacher, [A, B, C])
ext_teacher, _ = create_ssm(teacher_state_dim, ext_length, 0, 1, 1, 0)
set_ssm_weights(ext_teacher, get_ssm_weights(teacher))

## 8 baseline inputs, 0 special inputs

In [11]:
base_lr = 0.001
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:length-3, :] = 1
train_losses, ext_losses = [], []

In [12]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input)
    train_outputs = teacher(train_inputs)
    ext_inputs = create_one_hot_array(ext_length, 1)
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 log_period=log_period, print_period=print_period, epochs_after_opt=epochs_after_opt, 
                                 exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 7.95379638671875
-------------------------------------------------------
-------------------------------------------------------
Epoch: 10000
Train loss: 7.953590393066406
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.009907917119562626
+-------------+
|Final results|
+-------------+
Train loss: 9.560439139022492e-06
Ext. loss: 0.12655746936798096
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Tr

## 8 baseline inputs, 10 special inputs

In [13]:
base_lr = 0.001
baseline_input = np.zeros((n_baseline, length, 1))
baseline_input[:, 0:length-3, :] = 1
special_input = np.zeros((n_special, length, 1))
special_input[:, length-2:length-1, :] = 1
train_losses, ext_losses = [], []

In [14]:
for seed in seeds:
    train_inputs = generate_inputs(1, sd_baseline, sd_special, seed=seed, baseline_input=baseline_input, 
                                   special_input=special_input)
    train_outputs = teacher(train_inputs)
    ext_inputs = create_one_hot_array(ext_length, 1)
    ext_outputs = ext_teacher(ext_inputs)
    train_loss, ext_loss = train(train_inputs, train_outputs, ext_inputs, ext_outputs, student_state_dim, seed, sd_A, 
                                 sd_B_C, base_lr, epochs, eps, diff, warm_init=warm_init, adaptive=adaptive, 
                                 log_period=log_period, print_period=print_period, epochs_after_opt=epochs_after_opt, 
                                 exper_type=exper_type)
    train_losses.append(train_loss)
    ext_losses.append(ext_loss)
print("-------------------------------------------------------------------------")
print("-------------------------------------------------------------------------")
print(f'Average train loss: {np.mean(train_losses)}')
print(f'Average ext. loss: {np.mean(ext_losses)}')

-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 4.130544662475586
-------------------------------------------------------
-------------------------------------------------------
Epoch: 10000
Train loss: 0.01616278849542141
+------------------------------+
|Reached sub-epsilon train loss|
+------------------------------+
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Train loss: 0.0099932961165905
+-------------+
|Final results|
+-------------+
Train loss: 0.00627909554168582
Ext. loss: 0.9167795181274414
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
Epoch: 0
Trai