In [1]:
import numpy as np
from numpy import dot
from numpy.linalg import norm
import random
import pickle
from tqdm import tqdm
import gc

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from keras import models, layers, losses, optimizers, regularizers, Model

2023-11-13 13:03:42.477683: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-13 13:03:42.477717: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-13 13:03:42.477749: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
gc.collect()

0

In [3]:
brain_index = pickle.load(open("support.pkl", "rb"))
nouns, verbs = pickle.load(open("vecs.pkl", "rb"))

pickles = [pickle.load(open(f"pickles/{i}.pkl", "rb")) for i in range(1)]
pickles = [item for sublist in pickles for item in sublist]
pickles = sorted(pickles, key=lambda x: x[1])
pickles = [[item for item in pickles if item[1] == noun] for noun in nouns]
pickles = [(np.add.reduce([item[0] for item in sublist]) / len(sublist), sublist[0][1]) for sublist in pickles]
pickles = [(item[0][brain_index], item[1]) for item in pickles]

len(nouns), len(verbs)

(60, 25)

In [4]:
class BasisSum(Model):
    def __init__(self):
        super().__init__()
        self.basis = tf.Variable(tf.convert_to_tensor([verbs[verb] for verb in verbs]), trainable=False, name="verb_basis")
        self.d1 = layers.Dense(256, activation="tanh")
        self.d2 = layers.Dense(128, activation="tanh")
        self.d3 = layers.Dense(64, activation="tanh")
        self.dn = layers.Dense(self.basis.shape[0], use_bias=False)
        
    @tf.function(reduce_retracing=True)
    def call(self, x):
        x = self.d1(x)
        x = self.d2(x)
        x = self.d3(x)
        x = self.dn(x)

        x = tf.einsum("bi, ij -> bj", x, self.basis)
        x = tf.sigmoid(x)
        x = x / tf.reduce_sum(x, axis=-1, keepdims=True)
        
        return x

In [5]:
def get_model():
    basis = tf.Variable(tf.convert_to_tensor([verbs[verb] for verb in verbs]), trainable=False, name="verb_basis")

    inputs = layers.Input((pickles[0][0].shape))
    x = inputs
    # x = layers.Dense(256, activation="tanh")(inputs)
    # x = layers.Dense(128, activation="tanh")(x)
    # x = layers.Dense(64, activation="tanh")(x)
    x = layers.Dense(basis.shape[0], use_bias=False)(x)
    x = tf.einsum("bi, ij -> bj", x, basis)
    x = tf.sigmoid(x)
    x = x / tf.reduce_sum(x, axis=-1, keepdims=True)

    return Model(inputs=inputs, outputs=x)

In [6]:
def l2(a, b):
    return norm(np.subtract(a, b))

In [7]:
total = 500
batch_size = 64
pbar = tqdm(range(total))
correct_count = 0

for i in pbar:
    model = get_model()
    loss = losses.MeanSquaredError()
    opt = optimizers.Adam(0.001)

    random.shuffle(pickles)

    x = np.array([item[0] for item in pickles])
    y = [item[1] for item in pickles]
    y = np.array([nouns[item] for item in y])

    x, y = tf.cast(x, tf.dtypes.float32), tf.cast(y, tf.dtypes.float32)
    
    train_x, test_x = x[:-2], x[-2:]
    train_y, test_y = y[:-2], y[-2:]

    for j in range(2000):
        idx1 = np.random.choice(len(train_x), batch_size)
        idx2 = np.random.choice(len(train_x), batch_size)

        batch_x1, batch_y1 = tf.gather(train_x, idx1), tf.gather(train_y, idx1)
        batch_x2, batch_y2 = tf.gather(train_x, idx2), tf.gather(train_y, idx2)

        ratios = tf.random.uniform((len(batch_x1), 1), 0, 1)
        batch_x = batch_x1 * ratios + batch_x2 * (1 - ratios)
        batch_y = batch_y1 * ratios + batch_y2 * (1 - ratios)

        with tf.GradientTape() as tape:
            pred_y = model(batch_x)
            batchloss = loss(batch_y, pred_y)
            grads = tape.gradient(batchloss, model.trainable_variables)
            opt.apply_gradients(zip(grads, model.trainable_variables))


    pred = model(test_x)
    t1, t2 = test_y.numpy()
    t1, t2 = t1.flat, t2.flat
    p1, p2 = pred.numpy()
    p1, p2 = p1.flat, p2.flat
    
    correct = l2(t1, p1) + l2(t2, p2)
    incorrect = l2(t1, p2) + l2(t2, p1)

    correct_count += int(correct < incorrect)

    pbar.set_description(f"accuracy: {correct_count / (i + 1):.3f}")

  0%|          | 0/500 [00:00<?, ?it/s]

accuracy: 0.730: 100%|██████████| 500/500 [4:02:59<00:00, 29.16s/it]  
