In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from tqdm.auto import tqdm
from dataset import get_labelled, get_unlabelled, _preload
from Commit import CommitFactory
from Model import CommitDiffModelFactory

In [2]:
CONTEXT_SIZE = 16
BAG_SIZE = 512
OUTPUT_SIZE = 128

In [3]:
Commit = CommitFactory(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE)
CommitDiffModel = CommitDiffModelFactory(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE, OUTPUT_SIZE=OUTPUT_SIZE)

In [None]:
_preload()

Loading Commit lookup table


  0%|          | 0/4 [00:00<?, ?it/s]

Loading file ../data/commit_lookups/supervised_commit_data_lookup0-1000.pickle


In [None]:
X_train, X_test, y_train, y_test = get_labelled(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE)

In [None]:
X_train_unsupervised = get_unlabelled(BAG_SIZE=BAG_SIZE, CONTEXT_SIZE=CONTEXT_SIZE)

In [None]:
print("Train set size", len(y_train))
print("Train set split", np.sum(y_train)/len(y_train))
print("Test set size", len(y_test))
print("Test set split", np.sum(y_test)/len(y_test))
print("Unsupervised Train Size", len(X_train_unsupervised))

In [None]:
debug = CommitDiffModel()
debug.initialize(encoder=0)
debug.fit_siam(np.array(X_train_unsupervised), epochs=1, batch_size=64, verbose=1)
debug.fit_binary_classification(X_train, np.array(y_train), epochs=1, batch_size=64, verbose=1)

In [None]:
output = debug.debug(X_train)
print("Output:", output)

In [None]:
for encoder in tqdm([6,7,8,9,10,11,1,2,3,4]):
    model = CommitDiffModel()
    model.initialize(encoder)
    model.fit_siam(np.array(X_train_unsupervised), epochs=4, batch_size=256, verbose=1)
    model.fit_binary_classification(X_train, np.array(y_train), epochs=8, batch_size=32, verbose=1)
    score = model.evaluate_binary_classification(X_test, np.array(y_test), verbose=0)
    print("Enocder:", encoder)
    print("Score:", score)