In [1]:
import random
import numpy as np
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
manual_seed = 1234
random.seed(manual_seed)
np.random.seed(manual_seed)
tf.random.set_seed(manual_seed)

In [4]:
F_SIZE = 16
W_SIZE = F_SIZE + 1

In [5]:
def parametrized_function(w, x):
    return tf.math.sigmoid(tf.transpose(w) @ x)

def loss_function(x, y, eps=1e-5):
    return - (y * tf.math.log(x + eps) + (1 - y) * tf.math.log(1 - x + eps))

In [6]:
def normalize(x):
    return (x - tf.math.reduce_mean(x)) / tf.math.reduce_std(x)

def init_weights():
    return normalize(tf.convert_to_tensor(np.random.random((W_SIZE, 1))))

def init_zero_weights():
    return tf.convert_to_tensor(np.zeros((W_SIZE, 1)))

In [7]:
def generate_positive_sample(parametrized_function, w_true, loss_function, alpha, epochs):
    x = normalize(tf.convert_to_tensor(2 * np.random.random((F_SIZE, 1)) - 1))
    for _ in range(epochs):
        x = tf.Variable(x)
        with tf.GradientTape() as tape:
            x_bias = tf.concat(axis=0, values=[x, tf.constant([[1]], dtype=tf.double)])
            loss = loss_function(parametrized_function(w_true, x_bias), 1)
            [dx] = tape.gradient(loss, [x])
            x = x - alpha * dx
            x = normalize(x)
    return x

def generate_negative_sample(parametrized_function, w_true, loss_function, alpha, epochs):
    x = normalize(tf.convert_to_tensor(2 * np.random.random((F_SIZE, 1)) - 1))
    for _ in range(epochs):
        x = tf.Variable(x)
        with tf.GradientTape() as tape:
            x_bias = tf.concat(axis=0, values=[x, tf.constant([[1]], dtype=tf.double)])
            loss = loss_function(parametrized_function(w_true, x_bias), 0)
            [dx] = tape.gradient(loss, [x])
            x = x - alpha * dx
            x = normalize(x)
    return x

def generate_data(parametrized_function, w_true, loss_function, positive_size, negative_size):
    positive_features = [generate_positive_sample(parametrized_function, w_true, loss_function, 1, 10) for _ in range(positive_size)]
    negative_features = [generate_negative_sample(parametrized_function, w_true, loss_function, 1, 10) for _ in range(negative_size)]
    return positive_features, negative_features

In [8]:
def overall_accuracy(samples, parametrized_function, w, threshold):
    corrects = []
    for sample in samples:
        x_bias = tf.concat(axis=0, values=[sample[0], tf.constant([[1]], dtype=tf.double)])
        pred = parametrized_function(w, x_bias)
        positive = pred > threshold
        if (positive and sample[1] == 1):
            corrects.append(1)
        elif (not positive and sample[1] == 0):
            corrects.append(1)
        else:
            corrects.append(0)
    return sum(corrects) / len(corrects)

def overall_loss(samples, parametrized_function, w, loss_function):
    losses = []
    for sample in samples:
        x_bias = tf.concat(axis=0, values=[sample[0], tf.constant([[1]], dtype=tf.double)])
        pred = parametrized_function(w, x_bias)
        loss = loss_function(pred, sample[1])
        losses.append(loss.numpy().item())
    return sum(losses) / len(losses)

In [9]:
W_TRUE = init_weights()
positive_features, negative_features = generate_data(parametrized_function, W_TRUE, loss_function, 1000, 1000)

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-11-22 01:40:26.716951: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-11-22 01:40:26.717608: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [10]:
data = list(zip(positive_features, [1 for _ in range(len(positive_features))]))
data.extend(zip(negative_features, [0 for _ in range(len(negative_features))]))

print(len(data))

2000


In [11]:
print(data[0])
print(data[1])

(<tf.Tensor: shape=(16, 1), dtype=float64, numpy=
array([[-1.96556316],
       [ 0.7945829 ],
       [ 1.18480258],
       [-0.67424862],
       [ 0.23123703],
       [-1.73982113],
       [-0.67877758],
       [ 1.38080267],
       [ 0.3678948 ],
       [-0.55407248],
       [ 0.84233365],
       [-0.85845247],
       [ 0.05668045],
       [ 1.14608547],
       [-0.43182676],
       [ 0.89834265]])>, 1)
(<tf.Tensor: shape=(16, 1), dtype=float64, numpy=
array([[-2.452981  ],
       [ 1.00145445],
       [ 0.26545523],
       [ 0.1526702 ],
       [ 2.31469035],
       [-1.20620314],
       [ 0.25400964],
       [-0.27297719],
       [ 0.73658598],
       [-0.01546373],
       [-0.14580335],
       [ 0.17830134],
       [ 0.71802866],
       [-0.6803797 ],
       [-0.44694249],
       [-0.40044523]])>, 1)


In [12]:
print(data[-1])
print(data[-2])

(<tf.Tensor: shape=(16, 1), dtype=float64, numpy=
array([[-0.26576476],
       [ 0.81711073],
       [-0.11711262],
       [ 0.10749772],
       [-1.81828083],
       [-0.34695197],
       [ 0.50936708],
       [-0.61625068],
       [-1.27785243],
       [-0.75505933],
       [ 0.75768985],
       [ 1.99800532],
       [ 0.66904964],
       [-1.49099638],
       [ 1.0839268 ],
       [ 0.74562185]])>, 0)
(<tf.Tensor: shape=(16, 1), dtype=float64, numpy=
array([[ 1.17966369],
       [-0.08491389],
       [-0.41743509],
       [ 0.6832066 ],
       [-1.48154872],
       [-0.40534439],
       [ 0.34989091],
       [-0.41374957],
       [ 1.05074973],
       [-1.47693782],
       [ 1.4098903 ],
       [ 1.12086431],
       [ 0.58802702],
       [-1.60851673],
       [ 0.68115687],
       [-1.17500322]])>, 0)


In [13]:
shuffled_data = list(data)
random.shuffle(shuffled_data)

data_train = shuffled_data[:int(len(shuffled_data) * 0.8)]
data_val = shuffled_data[int(len(shuffled_data) * 0.8):]

print("Train size: %g" % (len(data_train)))
print("Val size: %g" % (len(data_val)))

Train size: 1600
Val size: 400


In [14]:
print("Train std: %.4f" % (np.array(data_train).std()))

TypeError: Tensor.__init__() missing 2 required positional arguments: 'value_index' and 'dtype'

In [14]:
correlations_train = {}
for i in range(len(data_train)):
    for j in range(len(data_train)):
        a = data_train[i][0].numpy().flatten()
        a = a / np.linalg.norm(a)
        b = data_train[j][0].numpy().flatten()
        b = b / np.linalg.norm(b)
        corr = np.correlate(a, b)
        correlations_train[(i, j)] = corr.item()

In [15]:
correlations_val = {}
for i in range(len(data_val)):
    for j in range(len(data_val)):
        a = data_val[i][0].numpy().flatten()
        a = a / np.linalg.norm(a)
        b = data_val[j][0].numpy().flatten()
        b = b / np.linalg.norm(b)
        corr = np.correlate(a, b)
        correlations_val[(i, j)] = corr.item()

In [16]:
print("Train Sample Correlations: %s" % random.sample(list(correlations_train.values()), k=20))
print("Train Correlation Mean: %.4f" % np.array(list(correlations_train.values())).mean())
print("Train Correlation Std: %.4f" % np.array(list(correlations_train.values())).std())
print("Val Sample Correlations: %s" % random.sample(list(correlations_val.values()), k=20))
print("Val Correlation Mean: %.4f" % np.array(list(correlations_val.values())).mean())
print("Val Correlation Std: %.4f" % np.array(list(correlations_val.values())).std())

Train Sample Correlations: [0.18391372672553907, 0.4243564316544655, 0.19222805529655598, -0.369681867922864, -0.18749768745667525, -0.40632969297104204, 0.392746571718559, -0.4582202042683246, -0.4391372101546448, -0.6561834453298546, -0.1494622466401298, -0.17909417449273968, -0.4557024792232534, -0.49488302092590225, -0.26515402888837225, 0.13895470220575207, -0.17445720354124886, 0.07699026105529612, -0.25963135873708826, -0.15019030290955382]
Train Correlation Mean: 0.0004
Train Correlation Std: 0.3141
Val Sample Correlations: [0.35595877388091485, 0.3176715880553382, 0.144135490041029, 0.3380124347166352, -0.10656886147556312, 0.04102369926036173, -0.09606095768477857, 0.35698993934531353, -0.1385173369374279, -0.02936809536569257, 0.34034139615512626, 0.24302246590722637, 0.2922866372293283, 0.44487975107384464, 0.25460511168206407, -0.4175521996160919, 0.5416136584228846, 0.0541875987258834, 0.45792389723569926, -0.2714217109994897]
Val Correlation Mean: 0.0029
Val Correlation 

In [17]:
print('Train Accuracy: %s' % (overall_accuracy(data_train, parametrized_function, W_TRUE, 0.5)))
print('Train Loss: %s' % (overall_loss(data_train, parametrized_function, W_TRUE, loss_function)))
print('Val Accuracy: %s' % (overall_accuracy(data_val, parametrized_function, W_TRUE, 0.5)))
print('Val Loss: %s' % (overall_loss(data_val, parametrized_function, W_TRUE, loss_function)))

Train Accuracy: 1.0
Train Loss: 0.0017026572075240995
Val Accuracy: 1.0
Val Loss: 0.0016255441044300662


In [18]:
print('Val Loss: %s (for random weights)' % (overall_loss(data_val, parametrized_function, init_weights(), loss_function)))

Val Loss: 0.7464243565757671 (for random weights)


In [41]:
def update(x, y, w, alpha, parametrized_function, loss_function):
    w_var = tf.Variable(w)
    with tf.GradientTape(persistent=True) as tape:
        x_bias = tf.concat(axis=0, values=[x, tf.constant([[1]], dtype=tf.double)])
        pred = parametrized_function(w_var, x_bias)
        loss = loss_function(pred, y)
        [dw] = tape.gradient(loss, [w_var])
        return w - alpha * dw, loss

In [42]:
def SGD(alpha, epochs, samples, parametrized_function, loss_function):
    w_list = [init_zero_weights()]
    for _ in range(epochs):
        sample = random.choice(samples)
        w_new, loss = update(sample[0], sample[1], w_list[-1], alpha, parametrized_function, loss_function)
        w_list.append(w_new)
    return sum(w_list) / len(w_list)

def SGD_no_average(alpha, epochs, samples, parametrized_function, loss_function):
    w = init_zero_weights()
    for _ in range(epochs):
        sample = random.choice(samples)
        w_new, loss = update(sample[0], sample[1], w, alpha, parametrized_function, loss_function)
        w = w_new
    return w

In [43]:
H = []
B = []

for i in range(100):
    print("Training %d" % (i + 1,))
    h = SGD_no_average(0.2, 200, data_train, parametrized_function, loss_function)
    H.append(h)
    B.append(tf.linalg.norm(h))

Training 1
Training 2
Training 3
Training 4
Training 5
Training 6
Training 7
Training 8
Training 9
Training 10
Training 11
Training 12
Training 13
Training 14
Training 15
Training 16
Training 17
Training 18
Training 19
Training 20
Training 21
Training 22
Training 23
Training 24
Training 25
Training 26
Training 27
Training 28
Training 29
Training 30
Training 31
Training 32
Training 33
Training 34
Training 35
Training 36
Training 37
Training 38
Training 39
Training 40
Training 41
Training 42
Training 43
Training 44
Training 45
Training 46
Training 47
Training 48
Training 49
Training 50
Training 51
Training 52
Training 53
Training 54
Training 55
Training 56
Training 57
Training 58
Training 59
Training 60
Training 61
Training 62
Training 63
Training 64
Training 65
Training 66
Training 67
Training 68
Training 69
Training 70
Training 71
Training 72
Training 73
Training 74
Training 75
Training 76
Training 77
Training 78
Training 79
Training 80
Training 81
Training 82
Training 83
Training 84
T

In [46]:
max_bound = max(B)
print("Max bound: %.4f" % (max_bound,))

Max bound: 2.9351


In [47]:
print('Train Accuracy: %.4f' % (overall_accuracy(data_train, parametrized_function, W[0], 0.5)))
print('Train Loss: %.4f' % (overall_loss(data_train, parametrized_function, W[0], loss_function)))
print('Val Accuracy: %.4f' % (overall_accuracy(data_val, parametrized_function, W[0], 0.5)))
print('Val Loss: %.4f' % (overall_loss(data_val, parametrized_function, W[0], loss_function)))

Train Accuracy: 1.0000
Train Loss: 0.0294
Val Accuracy: 1.0000
Val Loss: 0.0288


In [48]:
print(H[0])

tf.Tensor(
[[-1.24486471]
 [ 0.2547775 ]
 [-0.36953599]
 [ 0.62673782]
 [ 0.63649705]
 [-0.88385784]
 [-0.91065816]
 [ 0.68581855]
 [ 1.27437051]
 [ 0.96379115]
 [-0.64365484]
 [-0.29867546]
 [ 0.30138319]
 [ 0.20325857]
 [-0.5516097 ]
 [-0.04377763]
 [-0.25890402]], shape=(17, 1), dtype=float64)


In [49]:
print(H[0])

tf.Tensor(
[[-1.24486471]
 [ 0.2547775 ]
 [-0.36953599]
 [ 0.62673782]
 [ 0.63649705]
 [-0.88385784]
 [-0.91065816]
 [ 0.68581855]
 [ 1.27437051]
 [ 0.96379115]
 [-0.64365484]
 [-0.29867546]
 [ 0.30138319]
 [ 0.20325857]
 [-0.5516097 ]
 [-0.04377763]
 [-0.25890402]], shape=(17, 1), dtype=float64)


In [50]:
def b_smooth_calculation(samples, epochs):
    max_b = None
    
    for i in range(epochs):
        print("B-Smooth Epoch: %g" % (i + 1,))
        
        w1 = tf.Variable(init_weights())
        w2 = tf.Variable(init_weights())

        w1_losses = []
        with tf.GradientTape(persistent=True) as tape:
            for sample in samples:
                x_bias = tf.concat(axis=0, values=[sample[0], tf.constant([[1]], dtype=tf.double)])
                pred = parametrized_function(w1, x_bias)
                w1_losses.append(loss_function(pred, sample[1]))
            [dw1] = tape.gradient(sum(w1_losses) / len(w1_losses), [w1])

        w2_losses = []
        with tf.GradientTape(persistent=True) as tape:
            for sample in samples:
                x_bias = tf.concat(axis=0, values=[sample[0], tf.constant([[1]], dtype=tf.double)])
                pred = parametrized_function(w2, x_bias)
                w2_losses.append(loss_function(pred, sample[1]))
            [dw2] = tape.gradient(sum(w2_losses) / len(w2_losses), [w2])

        d_left = np.linalg.norm(dw1 - dw2)
        d_right = np.linalg.norm(w1 - w2)
        b = d_left / d_right

        if max_b is None or b > max_b:\
            max_b = b
    
    print("Max B-Smooth value: %.4f" % (max_b,))

    return max_b

def p_lipschitz_calculation(samples, epochs):
    max_p = None
    
    for i in range(epochs):
        print("P-Lipschitz Epoch: %g" % (i + 1,))
        
        w1 = tf.Variable(init_weights())
        w2 = tf.Variable(init_weights())

        left_diffs = []
        for sample in samples:
            x_bias = tf.concat(axis=0, values=[sample[0], tf.constant([[1]], dtype=tf.double)])
            pred_1 = parametrized_function(w1, x_bias)
            pred_2 = parametrized_function(w2, x_bias)
            loss_1 = loss_function(pred_1, sample[1])
            loss_2 = loss_function(pred_2, sample[1])
            left_diffs.append(tf.abs(loss_1 - loss_2))

        left = sum(left_diffs) / len(left_diffs)
        right = np.linalg.norm(w1 - w2)
        p = left / right

        if max_p is None or p > max_p:
            max_p = p
    
    print("Max P-Lipschitz value: %.4f" % (max_p,))

    return max_p


def calculate_alpha(bound, p_lipschitz, epochs):
    return np.sqrt(np.power(bound, 2) / (np.power(p_lipschitz, 2) * epochs))

def calculate_epochs(error, max_bound, p_lipschitz):
    return int((np.power(max_bound, 2) * np.power(p_lipschitz, 2)) / error) + 1

def theorem(W_sgd, H, b_smooth, alpha, epochs):
    losses_sgd = []
    for w_sgd in W_sgd:
        learning_loss = overall_loss(data_val, parametrized_function, w_sgd, loss_function)
        losses_sgd.append(learning_loss)
    expected_loss_sgd = sum(losses_sgd) / len(losses_sgd)

    variants = []

    for h in H:
        learning_loss_h = overall_loss(data_val, parametrized_function, h, loss_function)
        loss_h = (1 / (1 - alpha * b_smooth)) * (learning_loss_h + tf.reduce_sum(tf.pow(h, 2)) / (2 * alpha * epochs))
        variants.append("Theorem %.4f <= %.4f is %s" % (expected_loss_sgd, loss_h, (expected_loss_sgd <= loss_h).numpy().item()))
        
    return variants

In [51]:
p_lipschitz = p_lipschitz_calculation(data_val, 20)
b_smooth = b_smooth_calculation(data_val, 20)

P-Lipschitz Epoch: 1
P-Lipschitz Epoch: 2
P-Lipschitz Epoch: 3
P-Lipschitz Epoch: 4
P-Lipschitz Epoch: 5
P-Lipschitz Epoch: 6
P-Lipschitz Epoch: 7
P-Lipschitz Epoch: 8
P-Lipschitz Epoch: 9
P-Lipschitz Epoch: 10
P-Lipschitz Epoch: 11
P-Lipschitz Epoch: 12
P-Lipschitz Epoch: 13
P-Lipschitz Epoch: 14
P-Lipschitz Epoch: 15
P-Lipschitz Epoch: 16
P-Lipschitz Epoch: 17
P-Lipschitz Epoch: 18
P-Lipschitz Epoch: 19
P-Lipschitz Epoch: 20
Max P-Lipschitz value: 0.6072
B-Smooth Epoch: 1
B-Smooth Epoch: 2
B-Smooth Epoch: 3
B-Smooth Epoch: 4
B-Smooth Epoch: 5
B-Smooth Epoch: 6
B-Smooth Epoch: 7
B-Smooth Epoch: 8
B-Smooth Epoch: 9
B-Smooth Epoch: 10
B-Smooth Epoch: 11
B-Smooth Epoch: 12
B-Smooth Epoch: 13
B-Smooth Epoch: 14
B-Smooth Epoch: 15
B-Smooth Epoch: 16
B-Smooth Epoch: 17
B-Smooth Epoch: 18
B-Smooth Epoch: 19
B-Smooth Epoch: 20
Max B-Smooth value: 0.2072


In [52]:
W_sgd = []
B_sgd = []

epochs_vars = [5, 10, 50, 100, 250, 500, 750, 1000, 2000, 2500, 3000, 4000, 5000]

for epochs in epochs_vars:
    alpha = calculate_alpha(max_bound, p_lipschitz, epochs)
    print("SGD for %.7f alpha and %g epochs:" % (alpha, epochs), end=' ')
    w_sgd = SGD(alpha, epochs, data_train, parametrized_function, loss_function)
    print('Val Accuracy: %.7f' % (overall_accuracy(data_val, parametrized_function, w_sgd, 0.5)), end=' ')
    print('Val Loss: %.7f' % (overall_loss(data_val, parametrized_function, w_sgd, loss_function)))
    W_sgd.append(w_sgd)
    B_sgd.append(tf.linalg.norm(w_sgd))

SGD for 2.1616252 alpha and 5 epochs: Val Accuracy: 0.9200000 Val Loss: 0.1924840
SGD for 1.5284998 alpha and 10 epochs: Val Accuracy: 0.9800000 Val Loss: 0.0484080
SGD for 0.6835659 alpha and 50 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0202991
SGD for 0.4833541 alpha and 100 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0180680
SGD for 0.3057000 alpha and 250 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0161784
SGD for 0.2161625 alpha and 500 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0095580
SGD for 0.1764960 alpha and 750 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0084943
SGD for 0.1528500 alpha and 1000 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0083484
SGD for 0.1080813 alpha and 2000 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0055380
SGD for 0.0966708 alpha and 2500 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0052908
SGD for 0.0882480 alpha and 3000 epochs: Val Accuracy: 1.0000000 Val Loss: 0.0047647
SGD for 0.0764250 alpha and 4000 epochs: Val Accuracy: 1.0000000 Val Loss: 0

In [55]:
variants = theorem(W_sgd, H, b_smooth, alpha, epochs)
print('Theorems:')
for variant in variants:
    print(variant)

Theorems:
Theorem 0.0266 <= 0.5316 is True
Theorem 0.0266 <= 0.5089 is True
Theorem 0.0266 <= 0.5303 is True
Theorem 0.0266 <= 0.5146 is True
Theorem 0.0266 <= 0.5202 is True
Theorem 0.0266 <= 0.5345 is True
Theorem 0.0266 <= 0.5078 is True
Theorem 0.0266 <= 0.5268 is True
Theorem 0.0266 <= 0.5400 is True
Theorem 0.0266 <= 0.5522 is True
Theorem 0.0266 <= 0.5158 is True
Theorem 0.0266 <= 0.5300 is True
Theorem 0.0266 <= 0.5442 is True
Theorem 0.0266 <= 0.5193 is True
Theorem 0.0266 <= 0.5365 is True
Theorem 0.0266 <= 0.5211 is True
Theorem 0.0266 <= 0.5397 is True
Theorem 0.0266 <= 0.5218 is True
Theorem 0.0266 <= 0.5376 is True
Theorem 0.0266 <= 0.5385 is True
Theorem 0.0266 <= 0.5330 is True
Theorem 0.0266 <= 0.5263 is True
Theorem 0.0266 <= 0.5512 is True
Theorem 0.0266 <= 0.5112 is True
Theorem 0.0266 <= 0.5160 is True
Theorem 0.0266 <= 0.5073 is True
Theorem 0.0266 <= 0.5569 is True
Theorem 0.0266 <= 0.5205 is True
Theorem 0.0266 <= 0.5427 is True
Theorem 0.0266 <= 0.5402 is True


In [61]:
epochs_vars = [5, 10, 50, 100, 250, 500, 750, 1000, 2000, 2500, 3000, 4000, 5000, 10000]

for epochs in epochs_vars:
    alpha = calculate_alpha(max_bound, p_lipschitz, epochs)
    print(">>> Checking for %.7f alpha and %g epochs" % (alpha, epochs))
    W_sgd_s = []
    for i in range(5):
        print("W_sgd %d" % (i + 1,), end=' ')
        w_sgd = SGD(alpha, epochs, data_train, parametrized_function, loss_function)
        print('Val Accuracy: %.7f' % (overall_accuracy(data_val, parametrized_function, w_sgd, 0.5)), end=' ')
        print('Val Loss: %.7f' % (overall_loss(data_val, parametrized_function, w_sgd, loss_function)))
        W_sgd_s.append(w_sgd)

    variants = theorem(W_sgd_s, H, b_smooth, alpha, epochs)
    print('>>> Theorems:')
    for variant in variants:
        print(variant)

>>> Checking for 2.1616252 alpha and 5 epochs
W_sgd 1 Val Accuracy: 0.8925000 Val Loss: 0.2750924
W_sgd 2 Val Accuracy: 0.8700000 Val Loss: 0.3313940
W_sgd 3 Val Accuracy: 0.9725000 Val Loss: 0.0806744
W_sgd 4 Val Accuracy: 0.9725000 Val Loss: 0.0761870
W_sgd 5 Val Accuracy: 0.8650000 Val Loss: 0.3506899
>>> Theorems:
Theorem 0.2228 <= 0.7046 is True
Theorem 0.2228 <= 0.6739 is True
Theorem 0.2228 <= 0.7023 is True
Theorem 0.2228 <= 0.6819 is True
Theorem 0.2228 <= 0.6891 is True
Theorem 0.2228 <= 0.7083 is True
Theorem 0.2228 <= 0.6724 is True
Theorem 0.2228 <= 0.6978 is True
Theorem 0.2228 <= 0.7156 is True
Theorem 0.2228 <= 0.7324 is True
Theorem 0.2228 <= 0.6829 is True
Theorem 0.2228 <= 0.7024 is True
Theorem 0.2228 <= 0.7215 is True
Theorem 0.2228 <= 0.6878 is True
Theorem 0.2228 <= 0.7109 is True
Theorem 0.2228 <= 0.6902 is True
Theorem 0.2228 <= 0.7153 is True
Theorem 0.2228 <= 0.6914 is True
Theorem 0.2228 <= 0.7125 is True
Theorem 0.2228 <= 0.7137 is True
Theorem 0.2228 <= 0.