In [1]:
import json
import numpy as np

In [2]:
np.set_printoptions(precision=20)

## Main functions

In [3]:
def getResultsFromFile(filename=""):
    file = open('BERT-NER-TF2/log/' + filename + '.json')
    data = json.load(file)
    result = []
    for i in data['results']:
        result.append(i['macro avg']['f1-score'])
    file.close()
    return result

In [4]:
def getAll3Results(model="", dataset=""):
    res = []
    for i in range(1, 4):
        arr = getResultsFromFile('results-' + model + '-pretrained-' + dataset + '-' + str(i))
        res.append(arr)
    return res

In [5]:
def getAverageResult(model="", dataset=""):
    return np.mean(np.array(getAll3Results(model, dataset)), axis=0)

In [6]:
def getModelResults(model=""):
    res=[]
    for dataset in ["conll","few_nerd"]:
        res.append({dataset: getAverageResult(model, dataset)})
    return res

In [7]:
def getResultsObject():
    results = {}
    for model in models:
        results[model] = getModelResults(model)
        # results.append(getModelResults(model))
    return results

In [8]:
models = ["tiny_2_128", "model_2_256", "model_2_512", "model_2_768",
         "model_4_128", "mini_4_256", "small_4_512", "model_4_768",
         "model_6_128", "model_6_256", "model_6_512", "model_6_768",
         "model_8_128", "model_8_256", "medium_8_512", "model_8_768",
         "model_10_128", "model_10_256", "model_10_512", "model_10_768",
         "model_12_128", "model_12_256", "model_12_512", "base_12_768"]

print(len(models))
print(models)

24
['tiny_2_128', 'model_2_256', 'model_2_512', 'model_2_768', 'model_4_128', 'mini_4_256', 'small_4_512', 'model_4_768', 'model_6_128', 'model_6_256', 'model_6_512', 'model_6_768', 'model_8_128', 'model_8_256', 'medium_8_512', 'model_8_768', 'model_10_128', 'model_10_256', 'model_10_512', 'model_10_768', 'model_12_128', 'model_12_256', 'model_12_512', 'base_12_768']


In [9]:
def getStandardDeviation(model="", dataset=""):
    averageResults = getAverageResult(model, dataset)
    maxAveragePosition = np.where(averageResults == max(averageResults))[0][0]
    all3Results = getAll3Results(model, dataset)
    valuesFromMaxAverage = []
    for resultsArr in all3Results:
        valuesFromMaxAverage.append(resultsArr[maxAveragePosition])
    sd = np.std(np.array(valuesFromMaxAverage), axis=0)
    return np.round(sd*100, 2)

# print(getStandardDeviation("model_8_256", "few_nerd"))

def getAllStandardDeviations():
    res=[]
    for model in models:
        for dataset in ["conll","few_nerd"]:
            combo = model + "-" + dataset
            res.append({combo: getStandardDeviation(model, dataset)})
    return res

print(getAllStandardDeviations())

[{'tiny_2_128-conll': 0.24}, {'tiny_2_128-few_nerd': 0.13}, {'model_2_256-conll': 0.16}, {'model_2_256-few_nerd': 0.12}, {'model_2_512-conll': 0.2}, {'model_2_512-few_nerd': 0.14}, {'model_2_768-conll': 0.09}, {'model_2_768-few_nerd': 0.04}, {'model_4_128-conll': 0.88}, {'model_4_128-few_nerd': 0.05}, {'mini_4_256-conll': 0.08}, {'mini_4_256-few_nerd': 0.02}, {'small_4_512-conll': 0.14}, {'small_4_512-few_nerd': 0.16}, {'model_4_768-conll': 0.26}, {'model_4_768-few_nerd': 0.09}, {'model_6_128-conll': 0.29}, {'model_6_128-few_nerd': 0.14}, {'model_6_256-conll': 0.22}, {'model_6_256-few_nerd': 0.04}, {'model_6_512-conll': 0.02}, {'model_6_512-few_nerd': 0.12}, {'model_6_768-conll': 0.07}, {'model_6_768-few_nerd': 0.08}, {'model_8_128-conll': 0.38}, {'model_8_128-few_nerd': 0.18}, {'model_8_256-conll': 0.36}, {'model_8_256-few_nerd': 0.07}, {'medium_8_512-conll': 0.26}, {'medium_8_512-few_nerd': 0.16}, {'model_8_768-conll': 0.23}, {'model_8_768-few_nerd': 0.08}, {'model_10_128-conll': 0.4

## Save results to file

In [10]:
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

In [11]:
# Save and print all results

jsonResults = json.dumps(getResultsObject(), cls=NumpyEncoder)
print(jsonResults)

with open("stored-results.json", 'w') as f:
    json.dump(jsonResults, f)

{"tiny_2_128": [{"conll": [0.0, 0.03701322009554698, 0.10540041188202605, 0.2368168029250796, 0.34864553360099554, 0.38971039005916247, 0.424408724137463, 0.4335410740694579, 0.4544490114027578, 0.45772261685848825, 0.48056017872166884, 0.4710768462548682, 0.4930067763613286, 0.48450018569788816, 0.5008209096930075, 0.49245975398454833, 0.504120103224294, 0.4991412253938836, 0.5020058215007901, 0.5135512556612746, 0.5020023684175855, 0.5057589815875377, 0.5089983210751584, 0.5034084127383694, 0.505486959668084, 0.5088532152900082, 0.5059874703970956, 0.49721578077577755, 0.5110138677324737, 0.5112551946331931, 0.5069014141904652, 0.5143312014884642, 0.5076172894765302, 0.5137782252202295, 0.5098844582896837, 0.5107387896921242, 0.5108212533426333, 0.5011541742260077, 0.511275634729203, 0.5198297251422094, 0.5046314353909166, 0.5149136471777152, 0.5133419373277502, 0.5086148640461668, 0.5124751758711844, 0.5059313895717751, 0.5182624289276203, 0.5165886534814688, 0.5119771277267133, 0.5

## Max results

In [12]:
def getmaxResultsforDataset(dataset=""):
    maxResults = []
    for model in models:
        maxResults.append(max(getAverageResult(model, dataset)))
    return maxResults

In [13]:
print(getmaxResultsforDataset("conll"))

[0.520043509995936, 0.635668541455094, 0.7138305260769693, 0.7450437062094389, 0.5187565534825334, 0.6830046706149359, 0.7585083264608087, 0.788886319479836, 0.573004792469931, 0.6869852032449039, 0.765747636500779, 0.8103313966780087, 0.5912366915345196, 0.6955492355451266, 0.760050241808722, 0.8063368498420984, 0.5798861267748975, 0.7084680251020531, 0.7651247065650741, 0.8081917425287096, 0.5846665472678917, 0.6933049915341846, 0.7696725666176105, 0.8007050190913523]


In [14]:
print(getmaxResultsforDataset("few_nerd"))

[0.22862275760393844, 0.31553121327227396, 0.3946979911860175, 0.44810142143792836, 0.25063018426713723, 0.35964934106319313, 0.45573638612439177, 0.4932787883473937, 0.25317027734010483, 0.3780051885227424, 0.4751098203516477, 0.5133335552849583, 0.25674762864311296, 0.39538996073836435, 0.47350100158834896, 0.5128383697913422, 0.26820525863533046, 0.4040083330163917, 0.4827507296778039, 0.5199050945788365, 0.28350647397782475, 0.40315644016663504, 0.4759414869535555, 0.511377391660151]


In [15]:
print(np.array_split(getmaxResultsforDataset("conll"), 6))

[array([0.520043509995936 , 0.635668541455094 , 0.7138305260769693,
       0.7450437062094389]), array([0.5187565534825334, 0.6830046706149359, 0.7585083264608087,
       0.788886319479836 ]), array([0.573004792469931 , 0.6869852032449039, 0.765747636500779 ,
       0.8103313966780087]), array([0.5912366915345196, 0.6955492355451266, 0.760050241808722 ,
       0.8063368498420984]), array([0.5798861267748975, 0.7084680251020531, 0.7651247065650741,
       0.8081917425287096]), array([0.5846665472678917, 0.6933049915341846, 0.7696725666176105,
       0.8007050190913523])]


In [16]:
print(np.array_split(getmaxResultsforDataset("few_nerd"), 6))

[array([0.22862275760393844, 0.31553121327227396, 0.3946979911860175 ,
       0.44810142143792836]), array([0.25063018426713723, 0.35964934106319313, 0.45573638612439177,
       0.4932787883473937 ]), array([0.25317027734010483, 0.3780051885227424 , 0.4751098203516477 ,
       0.5133335552849583 ]), array([0.25674762864311296, 0.39538996073836435, 0.47350100158834896,
       0.5128383697913422 ]), array([0.26820525863533046, 0.4040083330163917 , 0.4827507296778039 ,
       0.5199050945788365 ]), array([0.28350647397782475, 0.40315644016663504, 0.4759414869535555 ,
       0.511377391660151  ])]


## Testing functions

In [17]:
print(len(getResultsFromFile('results-tiny_2_128-pretrained-conll-1')))
print(getResultsFromFile('results-tiny_2_128-pretrained-conll-1'))

results1 = getResultsFromFile('results-tiny_2_128-pretrained-conll-1')
print(len(results1))
print(results1)
results2 = getResultsFromFile('results-tiny_2_128-pretrained-conll-2')
print(len(results1))
print(results2)
results3 = getResultsFromFile('results-tiny_2_128-pretrained-conll-3')
print(len(results1))
print(results3)

50
[0.0, 0.027895555533793887, 0.09625542912717641, 0.24260039246852724, 0.35828256944809955, 0.38368608137078347, 0.4299759114197932, 0.4384883136852656, 0.45972354022598266, 0.45848559836061253, 0.48616682097290076, 0.47665794127282024, 0.48906256026286554, 0.48900495986351117, 0.4915429481096896, 0.4862094704781087, 0.5124843582082306, 0.5030063015012907, 0.5030329570369881, 0.516106349565586, 0.5003790878985109, 0.5030795549386055, 0.5223640337527911, 0.5043849823980033, 0.5154717960626914, 0.5070815455196427, 0.5067306251175117, 0.5014683848944328, 0.5168345620925413, 0.5123635597809031, 0.5085061107819622, 0.51887274653835, 0.5011777293970267, 0.5012740732235161, 0.5168769873191643, 0.5142446795165989, 0.5128710088593892, 0.49846436086459966, 0.5236329772103371, 0.5210147868179074, 0.5169148504329162, 0.517315295683945, 0.5181870446764552, 0.5251659415824994, 0.5161440378157273, 0.5193429043667088, 0.5145823838125332, 0.5230124650883465, 0.5130420689835737, 0.5210198084482502]
50

In [18]:
print(len(getAll3Results("tiny_2_128", "conll")))
print(getAll3Results("tiny_2_128", "conll"))

3
[[0.0, 0.027895555533793887, 0.09625542912717641, 0.24260039246852724, 0.35828256944809955, 0.38368608137078347, 0.4299759114197932, 0.4384883136852656, 0.45972354022598266, 0.45848559836061253, 0.48616682097290076, 0.47665794127282024, 0.48906256026286554, 0.48900495986351117, 0.4915429481096896, 0.4862094704781087, 0.5124843582082306, 0.5030063015012907, 0.5030329570369881, 0.516106349565586, 0.5003790878985109, 0.5030795549386055, 0.5223640337527911, 0.5043849823980033, 0.5154717960626914, 0.5070815455196427, 0.5067306251175117, 0.5014683848944328, 0.5168345620925413, 0.5123635597809031, 0.5085061107819622, 0.51887274653835, 0.5011777293970267, 0.5012740732235161, 0.5168769873191643, 0.5142446795165989, 0.5128710088593892, 0.49846436086459966, 0.5236329772103371, 0.5210147868179074, 0.5169148504329162, 0.517315295683945, 0.5181870446764552, 0.5251659415824994, 0.5161440378157273, 0.5193429043667088, 0.5145823838125332, 0.5230124650883465, 0.5130420689835737, 0.5210198084482502], [

In [19]:
print(len(getAverageResult("tiny_2_128", "conll")))
print(getAverageResult("tiny_2_128", "conll"))

print(len(getAverageResult("tiny_2_128", "few_nerd")))
print(getAverageResult("tiny_2_128", "few_nerd"))

50
[0.                  0.03701322009554698 0.10540041188202605
 0.2368168029250796  0.34864553360099554 0.38971039005916247
 0.424408724137463   0.4335410740694579  0.4544490114027578
 0.45772261685848825 0.48056017872166884 0.4710768462548682
 0.4930067763613286  0.48450018569788816 0.5008209096930075
 0.49245975398454833 0.504120103224294   0.4991412253938836
 0.5020058215007901  0.5135512556612746  0.5020023684175855
 0.5057589815875377  0.5089983210751584  0.5034084127383694
 0.505486959668084   0.5088532152900082  0.5059874703970956
 0.49721578077577755 0.5110138677324737  0.5112551946331931
 0.5069014141904652  0.5143312014884642  0.5076172894765302
 0.5137782252202295  0.5098844582896837  0.5107387896921242
 0.5108212533426333  0.5011541742260077  0.511275634729203
 0.5198297251422094  0.5046314353909166  0.5149136471777152
 0.5133419373277502  0.5086148640461668  0.5124751758711844
 0.5059313895717751  0.5182624289276203  0.5165886534814688
 0.5119771277267133  0.5200435099959

In [20]:
print(len(getModelResults("tiny_2_128")))
print(getModelResults("tiny_2_128"))

2
[{'conll': array([0.                 , 0.03701322009554698, 0.10540041188202605,
       0.2368168029250796 , 0.34864553360099554, 0.38971039005916247,
       0.424408724137463  , 0.4335410740694579 , 0.4544490114027578 ,
       0.45772261685848825, 0.48056017872166884, 0.4710768462548682 ,
       0.4930067763613286 , 0.48450018569788816, 0.5008209096930075 ,
       0.49245975398454833, 0.504120103224294  , 0.4991412253938836 ,
       0.5020058215007901 , 0.5135512556612746 , 0.5020023684175855 ,
       0.5057589815875377 , 0.5089983210751584 , 0.5034084127383694 ,
       0.505486959668084  , 0.5088532152900082 , 0.5059874703970956 ,
       0.49721578077577755, 0.5110138677324737 , 0.5112551946331931 ,
       0.5069014141904652 , 0.5143312014884642 , 0.5076172894765302 ,
       0.5137782252202295 , 0.5098844582896837 , 0.5107387896921242 ,
       0.5108212533426333 , 0.5011541742260077 , 0.511275634729203  ,
       0.5198297251422094 , 0.5046314353909166 , 0.5149136471777152 ,
       

In [21]:
print(len(getResultsObject()))
print(getResultsObject())

24
{'tiny_2_128': [{'conll': array([0.                 , 0.03701322009554698, 0.10540041188202605,
       0.2368168029250796 , 0.34864553360099554, 0.38971039005916247,
       0.424408724137463  , 0.4335410740694579 , 0.4544490114027578 ,
       0.45772261685848825, 0.48056017872166884, 0.4710768462548682 ,
       0.4930067763613286 , 0.48450018569788816, 0.5008209096930075 ,
       0.49245975398454833, 0.504120103224294  , 0.4991412253938836 ,
       0.5020058215007901 , 0.5135512556612746 , 0.5020023684175855 ,
       0.5057589815875377 , 0.5089983210751584 , 0.5034084127383694 ,
       0.505486959668084  , 0.5088532152900082 , 0.5059874703970956 ,
       0.49721578077577755, 0.5110138677324737 , 0.5112551946331931 ,
       0.5069014141904652 , 0.5143312014884642 , 0.5076172894765302 ,
       0.5137782252202295 , 0.5098844582896837 , 0.5107387896921242 ,
       0.5108212533426333 , 0.5011541742260077 , 0.511275634729203  ,
       0.5198297251422094 , 0.5046314353909166 , 0.5149136471

## Without functions

In [22]:
file = open('BERT-NER-TF2/log/results-tiny_2_128-pretrained-conll-1.json')
data = json.load(file)

tiny_nontrained_conll_1=[]
for i in data['results']:
    # print(i['macro avg'])
    # print(i['macro avg']['f1-score'])
    tiny_nontrained_conll_1.append(i['macro avg']['f1-score'])

file.close()
print(len(tiny_nontrained_conll_1))
print(tiny_nontrained_conll_1)

50
[0.0, 0.027895555533793887, 0.09625542912717641, 0.24260039246852724, 0.35828256944809955, 0.38368608137078347, 0.4299759114197932, 0.4384883136852656, 0.45972354022598266, 0.45848559836061253, 0.48616682097290076, 0.47665794127282024, 0.48906256026286554, 0.48900495986351117, 0.4915429481096896, 0.4862094704781087, 0.5124843582082306, 0.5030063015012907, 0.5030329570369881, 0.516106349565586, 0.5003790878985109, 0.5030795549386055, 0.5223640337527911, 0.5043849823980033, 0.5154717960626914, 0.5070815455196427, 0.5067306251175117, 0.5014683848944328, 0.5168345620925413, 0.5123635597809031, 0.5085061107819622, 0.51887274653835, 0.5011777293970267, 0.5012740732235161, 0.5168769873191643, 0.5142446795165989, 0.5128710088593892, 0.49846436086459966, 0.5236329772103371, 0.5210147868179074, 0.5169148504329162, 0.517315295683945, 0.5181870446764552, 0.5251659415824994, 0.5161440378157273, 0.5193429043667088, 0.5145823838125332, 0.5230124650883465, 0.5130420689835737, 0.5210198084482502]


In [23]:
res = []
for i in range(1, 4):
    arr = getResultsFromFile('results-tiny_2_128-pretrained-conll-' + str(i))
    res.append(arr)

print(len(res))
print(res)

3
[[0.0, 0.027895555533793887, 0.09625542912717641, 0.24260039246852724, 0.35828256944809955, 0.38368608137078347, 0.4299759114197932, 0.4384883136852656, 0.45972354022598266, 0.45848559836061253, 0.48616682097290076, 0.47665794127282024, 0.48906256026286554, 0.48900495986351117, 0.4915429481096896, 0.4862094704781087, 0.5124843582082306, 0.5030063015012907, 0.5030329570369881, 0.516106349565586, 0.5003790878985109, 0.5030795549386055, 0.5223640337527911, 0.5043849823980033, 0.5154717960626914, 0.5070815455196427, 0.5067306251175117, 0.5014683848944328, 0.5168345620925413, 0.5123635597809031, 0.5085061107819622, 0.51887274653835, 0.5011777293970267, 0.5012740732235161, 0.5168769873191643, 0.5142446795165989, 0.5128710088593892, 0.49846436086459966, 0.5236329772103371, 0.5210147868179074, 0.5169148504329162, 0.517315295683945, 0.5181870446764552, 0.5251659415824994, 0.5161440378157273, 0.5193429043667088, 0.5145823838125332, 0.5230124650883465, 0.5130420689835737, 0.5210198084482502], [

In [24]:
print(len(np.mean(np.array(getAll3Results("tiny_2_128", "conll")), axis=0)))
print(np.mean(np.array(getAll3Results("tiny_2_128", "conll")), axis=0))

50
[0.                  0.03701322009554698 0.10540041188202605
 0.2368168029250796  0.34864553360099554 0.38971039005916247
 0.424408724137463   0.4335410740694579  0.4544490114027578
 0.45772261685848825 0.48056017872166884 0.4710768462548682
 0.4930067763613286  0.48450018569788816 0.5008209096930075
 0.49245975398454833 0.504120103224294   0.4991412253938836
 0.5020058215007901  0.5135512556612746  0.5020023684175855
 0.5057589815875377  0.5089983210751584  0.5034084127383694
 0.505486959668084   0.5088532152900082  0.5059874703970956
 0.49721578077577755 0.5110138677324737  0.5112551946331931
 0.5069014141904652  0.5143312014884642  0.5076172894765302
 0.5137782252202295  0.5098844582896837  0.5107387896921242
 0.5108212533426333  0.5011541742260077  0.511275634729203
 0.5198297251422094  0.5046314353909166  0.5149136471777152
 0.5133419373277502  0.5086148640461668  0.5124751758711844
 0.5059313895717751  0.5182624289276203  0.5165886534814688
 0.5119771277267133  0.5200435099959

In [25]:
maxResults = []

for model in models:
    maxResults.append(max(getAverageResult(model, "conll")))

print(maxResults)

[0.520043509995936, 0.635668541455094, 0.7138305260769693, 0.7450437062094389, 0.5187565534825334, 0.6830046706149359, 0.7585083264608087, 0.788886319479836, 0.573004792469931, 0.6869852032449039, 0.765747636500779, 0.8103313966780087, 0.5912366915345196, 0.6955492355451266, 0.760050241808722, 0.8063368498420984, 0.5798861267748975, 0.7084680251020531, 0.7651247065650741, 0.8081917425287096, 0.5846665472678917, 0.6933049915341846, 0.7696725666176105, 0.8007050190913523]
