In [1]:
DEVICES = "CPU" # Moguce vrijednosti: "CPU", "GPU:0", "GPU:1", "GPUS"
import os
import logging
# os.environ['TF_CUDNN_DETERMINISTIC']='1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

logging.getLogger('tensorflow').setLevel(logging.FATAL)

if DEVICES == 'CPU':
    os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
elif DEVICES == 'GPU:0':
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
elif DEVICES == 'GPU:1':
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'


import tensorflow as tf

from dataset.dataset_loader import NerProcessor, FewNERDProcessor, create_tf_dataset_for_client, split_to_tf_datasets, batch_features
from utils.fl_utils import *
from models.model import build_BertNer, MaskedSparseCategoricalCrossentropy
from tokenization import FullTokenizer


from tqdm.notebook import tqdm
import json

import numpy as np

import nest_asyncio
nest_asyncio.apply()

import tensorflow_federated as tff

if DEVICES == 'CPU':
    cl_tf_devices = tf.config.list_logical_devices('CPU')
elif DEVICES == 'GPUS':
    cl_tf_devices = tf.config.list_logical_devices('GPU')
else:
    cl_tf_devices = tf.config.list_logical_devices('GPU')[:1]
tff.backends.native.set_local_execution_context(
    server_tf_device=tf.config.list_logical_devices('CPU')[0],
    client_tf_devices=cl_tf_devices)


# Pretrained models
TINY = 'uncased_L-2_H-128_A-2'
TINY_12_128='uncased_L-12_H-128_A-2'
TINY_8_128='uncased_L-8_H-128_A-2'
TINY_6_128 ='uncased_L-6_H-128_A-2'
TINY_4_128='uncased_L-4_H-128_A-2'
TINY_2_512 ='uncased_L-2_H-512_A-8'
TINY_2_256='uncased_L-2_H-256_A-4'
MINI = 'uncased_L-4_H-256_A-4'
SMALL = 'uncased_L-4_H-512_A-8'
MEDIUM = 'uncased_L-8_H-512_A-8'
BASE = 'uncased_L-12_H-768_A-12'


MODEL = os.path.join("models", TINY_2_256)
SEQ_LEN = 128
BATCH_SIZE = 32
PRETRAINED = True


processor = NerProcessor('dataset/conll')
datas="conll"
# processor = FewNERDProcessor('dataset/few_nerd')
# datas="few_nerd"
tokenizer = FullTokenizer(os.path.join(MODEL, "vocab.txt"), True)
train_features = processor.get_train_as_features(SEQ_LEN, tokenizer)
eval_features = processor.get_test_as_features(SEQ_LEN, tokenizer)

# Name of file
modelSize=""
pretr=""
# change dataset name in datas

modelSize = {
  'models/uncased_L-2_H-128_A-2': lambda: "tiny",
  'models/uncased_L-12_H-128_A-2': lambda: "tiny_12_128",
  'models/uncased_L-8_H-128_A-2': lambda: "tiny_8_128",
  'models/uncased_L-6_H-128_A-2': lambda: "tiny_6_128",
  'models/uncased_L-2_H-512_A-8': lambda: "tiny_2_512",
  'models/uncased_L-4_H-128_A-2': lambda: "tiny_4_128",
  'models/uncased_L-2_H-256_A-4': lambda: "tiny_2_256",
  'models/uncased_L-4_H-256_A-4': lambda: "mini",
  'models/uncased_L-4_H-512_A-8': lambda: "small",
  'models/uncased_L-8_H-512_A-8': lambda: "medium",
  'models/uncased_L-12_H-768_A-12': lambda: "base"
}[MODEL]()

if PRETRAINED == True:
    pretr="pretrained"
elif PRETRAINED == False:
    pretr="nontrained"


# Wrap a Keras model for use with TFF.
def model_fn(m_name, num_labels, seq_len, input_spec):
    model = build_BertNer(m_name, num_labels, seq_len)
    return tff.learning.from_keras_model(
        model,
        input_spec=input_spec,
        loss=MaskedSparseCategoricalCrossentropy()) # reduction=tf.keras.losses.Reduction.NONE))

def eval_model(model, eval_data, do_print=True):
    return evaluate(model, eval_data, 
                    processor.get_label_map(), 
                    processor.token_ind('O'), 
                    processor.token_ind('[SEP]'),
                    processor.token_ind('[PAD]'), 
                    do_print=do_print)
    
# eval_features = eval_features[:5_000] # Samo prvih par tisuca za testiranje, maknuti za konačne ekperimente
eval_data_batched = batch_features(eval_features, processor.get_labels(), SEQ_LEN, tokenizer, batch_size=64)


In [None]:
num_clients, num_train_clients = 100, 10
assert num_clients >= num_train_clients

dataset_list = split_to_tf_datasets(train_features, num_clients, batch_size=BATCH_SIZE)
trainer = tff.learning.build_federated_averaging_process(
    model_fn=lambda: model_fn(MODEL, processor.label_len(), SEQ_LEN, dataset_list[0].element_spec),
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(5e-3),
    server_optimizer_fn=lambda: tf.keras.optimizers.Adam(5e-3),
    use_experimental_simulation_loop=True
)

state = trainer.initialize()
if PRETRAINED:
    state = state_from_checkpoint(state, build_BertNer(MODEL, processor.label_len(), SEQ_LEN), MODEL)

res_list=[]
examples = 0
for rnd_ind in range(1, 501):
    train_data = list(np.random.choice(dataset_list, num_train_clients, replace=False))
    state, metrics = trainer.next(state, train_data)
    print("Round", rnd_ind, "Loss:", metrics['train']['loss'], "Examples:", metrics['stat']['num_examples'])
    examples = metrics['stat']['num_examples']
    
    # Ne treba svaku rundu gledati tocnost, moze svakih 10 (jedna epoha kumulativno)
    if rnd_ind % num_train_clients == 0:
        state_model = state_to_model(state, build_BertNer(MODEL, processor.label_len(), SEQ_LEN))
        res = eval_model(state_model, eval_data_batched, do_print=True)
        res['Round'] = rnd_ind
        res['Examples'] = examples
        res_list.append(res)

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

with open("log/results-"+modelSize+"-"+pretr+"-"+datas+".json", "w") as outfile:
    json.dump({'results': res_list, 'model': MODEL, 'seq_len': SEQ_LEN, 
               'pretrained': PRETRAINED, 'batch_size': BATCH_SIZE}, outfile, indent=None, cls=NpEncoder)

Round 1 Loss: 1.4996082 Examples: 1280
Round 2 Loss: 1.3307077 Examples: 1280
Round 3 Loss: 1.4353948 Examples: 1280
Round 4 Loss: 1.32332 Examples: 1280
Round 5 Loss: 1.5651673 Examples: 1280
Round 6 Loss: 0.89397067 Examples: 1280
Round 7 Loss: 1.0466458 Examples: 1280
Round 8 Loss: 1.0913349 Examples: 1280
Round 9 Loss: 0.84545964 Examples: 1280
Round 10 Loss: 0.9638852 Examples: 1280


Evaluating:   0%|          | 0/54 [00:00<?, ?it/s]

              precision    recall  f1-score   support

         LOC     0.0000    0.0000    0.0000      1668
        MISC     0.0000    0.0000    0.0000       702
         ORG     0.0000    0.0000    0.0000      1661
         PER     0.0000    0.0000    0.0000      1617

   micro avg     0.0000    0.0000    0.0000      5648
   macro avg     0.0000    0.0000    0.0000      5648
weighted avg     0.0000    0.0000    0.0000      5648

Round 11 Loss: 0.69882107 Examples: 1280
Round 12 Loss: 0.58944976 Examples: 1280
Round 13 Loss: 0.6927341 Examples: 1280
Round 14 Loss: 0.5727343 Examples: 1280
Round 15 Loss: 0.59565437 Examples: 1280
Round 16 Loss: 0.5095479 Examples: 1280
Round 17 Loss: 0.56617284 Examples: 1280
Round 18 Loss: 0.5243091 Examples: 1280
Round 19 Loss: 0.54258883 Examples: 1280
Round 20 Loss: 0.5028513 Examples: 1280


Evaluating:   0%|          | 0/54 [00:00<?, ?it/s]

              precision    recall  f1-score   support

         LOC     0.1983    0.8082    0.3184      1668
        MISC     0.0000    0.0000    0.0000       702
         ORG     0.0000    0.0000    0.0000      1661
         PER     0.0000    0.0000    0.0000      1617

   micro avg     0.1983    0.2387    0.2166      5648
   macro avg     0.0496    0.2020    0.0796      5648
weighted avg     0.0586    0.2387    0.0940      5648

Round 21 Loss: 0.46145573 Examples: 1280
Round 22 Loss: 0.53074324 Examples: 1280
Round 23 Loss: 0.48726448 Examples: 1280
Round 24 Loss: 0.5203672 Examples: 1280
Round 25 Loss: 0.51090986 Examples: 1280
Round 26 Loss: 0.52095044 Examples: 1280
Round 27 Loss: 0.5098441 Examples: 1280
Round 28 Loss: 0.5536489 Examples: 1280
Round 29 Loss: 0.44435087 Examples: 1280
Round 30 Loss: 0.48449057 Examples: 1280


Evaluating:   0%|          | 0/54 [00:00<?, ?it/s]

              precision    recall  f1-score   support

         LOC     0.3352    0.7458    0.4625      1668
        MISC     0.0000    0.0000    0.0000       702
         ORG     0.4839    0.0542    0.0975      1661
         PER     0.0960    0.2047    0.1307      1617

   micro avg     0.2267    0.2948    0.2563      5648
   macro avg     0.2288    0.2512    0.1727      5648
weighted avg     0.2688    0.2948    0.2027      5648

Round 31 Loss: 0.45266733 Examples: 1280
Round 32 Loss: 0.44001135 Examples: 1280
Round 33 Loss: 0.5300107 Examples: 1280
Round 34 Loss: 0.42653495 Examples: 1280
Round 35 Loss: 0.5099265 Examples: 1280
Round 36 Loss: 0.45687732 Examples: 1280
Round 37 Loss: 0.4717235 Examples: 1280
Round 38 Loss: 0.47429806 Examples: 1280
Round 39 Loss: 0.41024145 Examples: 1280
Round 40 Loss: 0.40063757 Examples: 1280


Evaluating:   0%|          | 0/54 [00:00<?, ?it/s]

              precision    recall  f1-score   support

         LOC     0.4890    0.7170    0.5814      1668
        MISC     0.2991    0.1368    0.1877       702
         ORG     0.5569    0.0825    0.1437      1661
         PER     0.1054    0.2158    0.1416      1617

   micro avg     0.2812    0.3148    0.2970      5648
   macro avg     0.3626    0.2880    0.2636      5648
weighted avg     0.3755    0.3148    0.2778      5648

Round 41 Loss: 0.40963182 Examples: 1280
Round 42 Loss: 0.45781937 Examples: 1280
Round 43 Loss: 0.38792944 Examples: 1280
Round 44 Loss: 0.38852265 Examples: 1280
Round 45 Loss: 0.41692942 Examples: 1280
Round 46 Loss: 0.40947485 Examples: 1280
Round 47 Loss: 0.36167324 Examples: 1280
Round 48 Loss: 0.41323358 Examples: 1280
Round 49 Loss: 0.40225095 Examples: 1280
Round 50 Loss: 0.39722538 Examples: 1280


Evaluating:   0%|          | 0/54 [00:00<?, ?it/s]

              precision    recall  f1-score   support

         LOC     0.6017    0.6757    0.6365      1668
        MISC     0.2972    0.4615    0.3616       702
         ORG     0.4474    0.1818    0.2586      1661
         PER     0.1124    0.2233    0.1495      1617

   micro avg     0.3086    0.3743    0.3383      5648
   macro avg     0.3647    0.3856    0.3516      5648
weighted avg     0.3784    0.3743    0.3518      5648

Round 51 Loss: 0.37695476 Examples: 1280
Round 52 Loss: 0.35939303 Examples: 1280
Round 53 Loss: 0.3894759 Examples: 1280
Round 54 Loss: 0.40791196 Examples: 1280
Round 55 Loss: 0.36914542 Examples: 1280
Round 56 Loss: 0.33739296 Examples: 1280
Round 57 Loss: 0.3622324 Examples: 1280
Round 58 Loss: 0.37764674 Examples: 1280
Round 59 Loss: 0.30062702 Examples: 1280
Round 60 Loss: 0.33734626 Examples: 1280


Evaluating:   0%|          | 0/54 [00:00<?, ?it/s]

              precision    recall  f1-score   support

         LOC     0.7062    0.6673    0.6862      1668
        MISC     0.3641    0.5057    0.4234       702
         ORG     0.3872    0.3739    0.3804      1661
         PER     0.1146    0.1744    0.1383      1617

   micro avg     0.3584    0.4198    0.3867      5648
   macro avg     0.3930    0.4303    0.4071      5648
weighted avg     0.4005    0.4198    0.4067      5648

Round 61 Loss: 0.3502608 Examples: 1280
Round 62 Loss: 0.34787083 Examples: 1280
Round 63 Loss: 0.31114107 Examples: 1280
Round 64 Loss: 0.36059734 Examples: 1280
Round 65 Loss: 0.33991688 Examples: 1280
Round 66 Loss: 0.32112038 Examples: 1280
