In [1]:
!git clone https://github.com/AvonYangXX1/AMPLify-Feedback.git
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

Cloning into 'AMPLify-Feedback'...
remote: Enumerating objects: 289, done.[K
remote: Counting objects: 100% (289/289), done.[K
remote: Compressing objects: 100% (173/173), done.[K
remote: Total 289 (delta 139), reused 259 (delta 112), pack-reused 0[K
Receiving objects: 100% (289/289), 11.58 MiB | 3.41 MiB/s, done.
Resolving deltas: 100% (139/139), done.
Updating files: 100% (49/49), done.


In [2]:
def read_data(i):
    seq_train = np.load(f"AMPLify-Feedback/processed_data/cv/seq_train_{i}.npy")
    state_train = np.load(f"AMPLify-Feedback/processed_data/cv/state_train_{i}.npy")
    label_train = np.load(f"AMPLify-Feedback/processed_data/cv/label_train_{i}.npy")
    seq_val = np.load(f"AMPLify-Feedback/processed_data/cv/seq_val_{i}.npy")
    state_val = np.load(f"AMPLify-Feedback/processed_data/cv/state_val_{i}.npy")
    label_val = np.load(f"AMPLify-Feedback/processed_data/cv/label_val_{i}.npy")
    seq_train = tf.one_hot(seq_train.squeeze(), depth=43)
    seq_val = tf.one_hot(seq_val.squeeze(), depth=43)
    return seq_train, state_train, label_train, seq_val, state_val, label_val

In [4]:
class RMSE(tf.keras.losses.Loss):
    def __init__(self):
        super().__init__()
        self.loss=tf.keras.losses.MeanSquaredError()

    def call(self, y_true, y_pred):
        loss0 = tf.sqrt(self.loss(y_true, y_pred))
        return loss0

class UsefulFraction(tf.keras.metrics.Metric):
    def __init__(self, threshold=1.0, name='UsefulFraction', **kwargs):
        super(UsefulFraction, self).__init__(name=name, **kwargs)
        self.threshold = threshold
        self.total_count = self.add_weight(name='total_count', initializer='zeros')
        self.within_threshold_count = self.add_weight(name='within_threshold_count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        error = tf.abs(y_true - y_pred)
        within_threshold = tf.cast(tf.less(error, self.threshold), tf.float32)
        self.total_count.assign_add(tf.reduce_sum(tf.ones_like(error)))
        self.within_threshold_count.assign_add(tf.reduce_sum(within_threshold))

    def result(self):
        return self.within_threshold_count / self.total_count

    def reset_states(self):
        self.total_count.assign(0)
        self.within_threshold_count.assign(0)

In [5]:
def visualize_pred(seq, state, model, label):
    pred = model.predict((seq, state))
    meta = LinearRegression()
    meta.fit(pred, label)
    line = meta.predict(pred)
    fig, axs = plt.subplots(1,1)
    axs.scatter(pred, label, marker="+", color="royalblue", alpha=0.5)
    axs.plot(pred, line, color="black")
    axs.set_xlabel("Predicted")
    axs.set_ylabel("True")

In [6]:
def create_model():
    inputs0 = tf.keras.layers.Input((190,43),name="SeqInput")
    inputs1 = tf.keras.layers.Input((326,),name="StateInput")
    x = tf.keras.layers.Conv1D(128, 5, activation='relu', name="Conv1D_0")(inputs0) # kernel_size=5 works well
    x = tf.keras.layers.Conv1D(128, 5, activation='relu', name="Conv1D_1")(x) # Just two layers work better
    x = tf.keras.layers.Flatten(name="Flatten_0")(x)
    x = tf.keras.layers.Dense(512, activation="relu", name="LearnSeqDense_0")(x)
    x = tf.keras.layers.Concatenate(axis=1, name="Concat")([x, inputs1])
    x = tf.keras.layers.Dense(1024, activation="relu", name="LearnConcatDense_0")(x)
    x = tf.keras.layers.LayerNormalization(name="LayerNorm_0")(x)
    x = tf.keras.layers.Dense(512, activation="relu", name="LearnConcatDense_1")(x)
    x = tf.keras.layers.LayerNormalization(name="LayerNorm_1")(x)
    x = tf.keras.layers.Dense(1, activation="linear", name="Output")(x)
    model = tf.keras.models.Model([inputs0, inputs1], x, name="MICPredictor")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                  loss=tf.keras.losses.MeanAbsoluteError(),
                  metrics=[tf.keras.metrics.R2Score(), UsefulFraction()])
    return model

In [7]:
mae = []
r2 = []
usefulness = []
for i in range(5):
    seq_train, state_train, label_train, seq_val, state_val, label_val = read_data(i)
    model = create_model()
    model.fit([seq_train, state_train], label_train, batch_size=100, verbose=0, epochs=30,
              validation_data=([seq_val, state_val], label_val))
    pred = model.predict([seq_val, state_val], verbose=0)
    mae.append(mean_absolute_error(label_val, pred))
    r2.append(r2_score(label_val, pred))
    useful_metric = UsefulFraction()
    useful_metric.update_state(label_val, pred)
    usefulness.append(useful_metric.result())
    print(f"Iteration {i}: MAE={mae[i]:4f}; R2={r2[i]:4f}; UsefulFraction={usefulness[i]:4f}")
    del seq_train, state_train, label_train, seq_val, state_val, label_val
print(f"Mean: MAE={np.mean(mae):4f}; R2={np.mean(r2):4f}; UsefulFraction={np.mean(usefulness):4f}")

Iteration 0: MAE=0.970346; R2=0.503878; UsefulFraction=0.635938
Iteration 1: MAE=0.959980; R2=0.510081; UsefulFraction=0.649980
Iteration 2: MAE=0.986545; R2=0.476896; UsefulFraction=0.633903
Iteration 3: MAE=0.999669; R2=0.454921; UsefulFraction=0.633903
Iteration 4: MAE=0.993929; R2=0.474747; UsefulFraction=0.632404
Mean: MAE=0.982094; R2=0.484105; UsefulFraction=0.637226


In [8]:
seq_train = np.load("AMPLify-Feedback/processed_data/test/seq_cv.npy")
state_train = np.load("AMPLify-Feedback/processed_data/test/state_cv.npy")
label_train = np.load("AMPLify-Feedback/processed_data/test/label_cv.npy")
seq_val = np.load("AMPLify-Feedback/processed_data/test/seq_test.npy")
state_val = np.load("AMPLify-Feedback/processed_data/test/state_test.npy")
label_val = np.load("AMPLify-Feedback/processed_data/test/label_test.npy")
seq_train = tf.one_hot(seq_train.squeeze(), depth=43)
seq_val = tf.one_hot(seq_val.squeeze(), depth=43)

In [9]:
model = create_model()
model.fit([seq_train, state_train], label_train, batch_size=100, verbose=0,
          epochs=30, validation_data=([seq_val, state_val], label_val))
pred = model.predict([seq_val, state_val], verbose=0)
mae = mean_absolute_error(label_val, pred)
r2 = r2_score(label_val, pred)
useful_metric = UsefulFraction()
useful_metric.update_state(label_val, pred)
usefulness = useful_metric.result()
print(f"MAE={mae:4f}; R2={r2:4f}; UsefulFraction={usefulness:4f}")

MAE=0.940475; R2=0.522714; UsefulFraction=0.654078


In [12]:
seq = np.load("AMPLify-Feedback/processed_data/test/seq_whole.npy")
state = np.load("AMPLify-Feedback/processed_data/test/state_whole.npy")
label = np.load("AMPLify-Feedback/processed_data/test/label_whole.npy")
seq = tf.one_hot(seq.squeeze(), depth=43)

In [13]:
model = create_model()
model.fit([seq, state], label, batch_size=100, verbose=0, epochs=30)

<keras.src.callbacks.History at 0x7b32f858f100>

In [14]:
model.save("AMPLify-Feedback/model_weights/MICPredictor.h5")