In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['OMP_NUM_THREADS'] = '3' 
os.environ['KMP_BLOCKTIME'] = '1'
import numpy as np
import gc
from tqdm.auto import tqdm
from dataset import get_labelled, get_unlabelled, _preload, _unload, unlabelled_generator
from Commit import CommitFactory
from Model import CommitDiffModelFactory

import tensorflow as tf

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CONTEXT_SIZE = 16
BAG_SIZE = 256
OUTPUT_SIZE = 512

In [3]:
Commit = CommitFactory(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE)
CommitDiffModel = CommitDiffModelFactory(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE, OUTPUT_SIZE=OUTPUT_SIZE)

In [4]:
_preload(max_commit_bag_size = 2048, max_commits = 4096*8)

Loading Commit lookup table


Loading commit lookups:   0%|          | 0/32768 [00:00<?, ?it/s]

Loading file ../data/commit_lookups/labelled/01_priority_commit_lookups.pickle


In [None]:
X_train, X_test, y_train, y_test = get_labelled(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE)

In [None]:
X_train_unsupervised = get_unlabelled(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE)

In [None]:
print("Train set size", len(y_train))
print("Train set split", np.sum(y_train)/len(y_train))
print("Test set size", len(y_test))
print("Test set split", np.sum(y_test)/len(y_test))
print("Unsupervised Train Size", len(X_train_unsupervised))

In [None]:
_unload()

In [4]:
output_types = (tf.float32, tf.float32)
output_shapes = ((256, 16), (256, 16))

dataset = tf.data.Dataset.from_generator(
    unlabelled_generator,
    output_types=output_types,
    output_shapes=output_shapes,
)

batch_size = 10
dataset = dataset.batch(batch_size)


#print(len(list(dataset.as_numpy_iterator())))
# Use the dataset in the fit function
model = CommitDiffModel(unsupervised_data_size = 100)
model.initialize(encoder=0)
model.fit_siam_generator(dataset, epochs=8, verbose=1)


Epoch 1/8


Generating Unsupervised X_train:   0%|          | 0/184 [00:00<?, ?it/s]

ValueError: Layer "gradient_accumulate_model" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(256, 16), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>]

In [None]:
for encoder in tqdm([0,1,5,4,2,3]):
    try:
        model = CommitDiffModel(unsupervised_data_size = len(X_train_unsupervised))
        model.initialize(encoder=encoder)
        model.fit_siam(np.array(X_train_unsupervised), epochs=8, verbose=1)
        model.fit_binary_classification(X_train, np.array(y_train), epochs=8, batch_size=4, verbose=1)
        score = model.evaluate_binary_classification(X_test, np.array(y_test), verbose=0)
        print("Enocder:", encoder)
        print("Score:", score)
    except Exception as e:
        print(e)
    print("------------------------------------------")
    del model
    gc.collect()