# MultiNLI: Randomly Initialize 3 Models

The same random initializations will be used to train all models in the thesis. All results are reported as the mean of the 3 experiments, along with the standard deviations.

In [None]:
import sys
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
path_to_root = '/content/drive/My Drive/Colab Notebooks/BatuEl_Dissertation'
sys.path.append(path_to_root)
print("Drive mounted.")

In [None]:
import torch
from reprshift.learning.algorithms import ERM

hparams = {'last_layer_dropout': 0.2,
           'lr' : 0.001,
           'weight_decay':  1e-4,
           'num_warmup_steps':0,
           'num_training_steps':30001,}
# algorithm.predict(x_array[:10])

In [None]:
initial_states = {i : ERM(num_classes=3, num_attributes=2, hparams=hparams) for i in range(3)}
psuedo_epoch = 0
algorithm_name = '00_randominit'
for random_seed in initial_states:
    algorithm_state_dict = initial_states[random_seed].state_dict()
    models_path = path_to_root + '/models/'
    algorithm_state_dict_PATH = models_path + f'/00_randominit/seed{random_seed}/sd_epoch{psuedo_epoch}.pth'
    print(algorithm_state_dict_PATH)
    torch.save(algorithm_state_dict, algorithm_state_dict_PATH)

## Check Classifiers are Different

In [None]:
classifier_weight = [initial_states[i].state_dict()['network.1.classifier.weight'] for i in initial_states]
classifier_bias = [initial_states[i].state_dict()['network.1.classifier.bias'] for i in initial_states]

In [None]:
import numpy as np
res = np.zeros([3,3])

for i in range(3):
  for j in range(3):
    res[i][j] = ((classifier_weight[i] == classifier_weight[j]) == False).sum().item()
res

### Expected: ###
# array([[   0., 1536., 1536.],
#        [1536.,    0., 1536.],
#        [1536., 1536.,    0.]])

In [None]:
import numpy as np
res = np.zeros([3,3])

for i in range(3):
  for j in range(3):
    res[i][j] = ((classifier_bias[i] == classifier_bias[j]) == False).sum().item()
res

### Expected: ###
# array([[0., 2., 2.],
#        [2., 0., 2.],
#        [2., 2., 0.]])

## Check Transformer Layers are the Same

In [None]:
layers_f =  lambda layer_name: [initial_states[i].state_dict()[layer_name] for i in initial_states]

def compare_layer(layer_name):
    layers = layers_f(layer_name)
    res = np.zeros([3,3])
    for i in range(3):
        for j in range(3):
          res[i][j] = ((layers[i] == layers[j]) == False).sum().item()
    return res

different_values = []
for layer_name in initial_states[i].state_dict():
    different_values.append(compare_layer(layer_name).sum())

In [None]:
### Expected: All 0 except the last 2 elements (46080.0, 60.0) ###
different_values

# CivilComments: Randomly Initialize 3 Models

The same random initializations will be used to train all models in the thesis. All results are reported as the mean of the 3 experiments, along with the standard deviations.

In [None]:
import sys
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
path_to_root = '/content/drive/My Drive/Colab Notebooks/BatuEl_Dissertation'
sys.path.append(path_to_root)
print("Drive mounted.")

In [None]:
import torch
from reprshift.learning.algorithms import ERM

hparams = {'last_layer_dropout': 0.2,
           'lr' : 0.001,
           'weight_decay':  1e-4,
           'num_warmup_steps':0,
           'num_training_steps':30001,}
# algorithm.predict(x_array[:10])

In [None]:
initial_states = {i : ERM(num_classes=2, num_attributes=8, hparams=hparams) for i in range(3)}
psuedo_epoch = 0
algorithm_name = '00_randominit'
for random_seed in initial_states:
    algorithm_state_dict = initial_states[random_seed].state_dict()
    models_path = path_to_root + '/models_civilcomments'
    algorithm_state_dict_PATH = models_path + f'/00_randominit/seed{random_seed}/sd_epoch{psuedo_epoch}.pth'
    print(algorithm_state_dict_PATH)
    torch.save(algorithm_state_dict, algorithm_state_dict_PATH)

## Check Classifiers are Different

In [None]:
classifier_weight = [initial_states[i].state_dict()['network.1.classifier.weight'] for i in initial_states]
classifier_bias = [initial_states[i].state_dict()['network.1.classifier.bias'] for i in initial_states]

In [None]:
import numpy as np
res = np.zeros([3,3])

for i in range(3):
  for j in range(3):
    res[i][j] = ((classifier_weight[i] == classifier_weight[j]) == False).sum().item()
res

### Expected: ###
# array([[   0., 1536., 1536.],
#        [1536.,    0., 1536.],
#        [1536., 1536.,    0.]])

In [None]:
import numpy as np
res = np.zeros([3,3])

for i in range(3):
  for j in range(3):
    res[i][j] = ((classifier_bias[i] == classifier_bias[j]) == False).sum().item()
res

### Expected: ###
# array([[0., 2., 2.],
#        [2., 0., 2.],
#        [2., 2., 0.]])

## Check Transformer Layers are the Same

In [None]:
layers_f =  lambda layer_name: [initial_states[i].state_dict()[layer_name] for i in initial_states]

def compare_layer(layer_name):
    layers = layers_f(layer_name)
    res = np.zeros([3,3])
    for i in range(3):
        for j in range(3):
          res[i][j] = ((layers[i] == layers[j]) == False).sum().item()
    return res

different_values = []
for layer_name in initial_states[i].state_dict():
    different_values.append(compare_layer(layer_name).sum())

In [None]:
### Expected: All 0 except the last 2 elements (9216.0, 12.0) ###
different_values