In [1]:
from CollateFn.CollateFnBase import CollateFnBase

CollateFnBase.id2label = CollateFnBase.ATEid2label
CollateFnBase.label2id = CollateFnBase.ATElabel2id

from ModelSummary.ModelOutputsRecord import ModelOutputsRecord
import os
import numpy as np
import pandas as pd

In [2]:
class PairMap:
    def __init__(self, logDir='./RecordsDir/') -> None:
        self.logDir = logDir
        self.recordsPath = os.listdir(logDir)
        self.modelRecords = []
        self.analyseModelResults = []

        self.languages =['english', 'spanish', 'french']
        self.languageArrgs = {
            'english': 'En', 
            'spanish': 'Es', 
            'french': 'Fr'
        }
        self.arrgs = ['En', 'Es', 'Fr']

        self.loadDir()

    def loadDir(self):
        for recordPath in self.recordsPath:
            if recordPath[-1] == 's':
                modelRecord = ModelOutputsRecord.load(self.logDir +'/' + recordPath)
                # print('load ' + self.logDir + recordPath)
                self.modelRecords.append(modelRecord)
                analyseModelResult = modelRecord.analyseModel()
                analyseModelResult['source'] = self.languageArrgs[modelRecord.dataParams.Source]
                self.analyseModelResults.append(analyseModelResult)
            
    def analyse(self, key="MacroF1"):
        # 要分析出均值和方差
        source2targetResults = {}
        for source in self.arrgs[:1]:
            for target in self.arrgs:
                source2targetResults[f"{source}2{target}"] = []
        
        for analyseModelResult in self.analyseModelResults:
            for target in self.arrgs:
                source2targetResults[f"{analyseModelResult['source']}2{target}"].append(analyseModelResult[f'test{target}'][key])

        
        analyseDic = {}
        for key, valueList in source2targetResults.items():
            analyseDic[key] = (np.round(np.mean(valueList)*100, 3), np.round(np.std(valueList)*100, 3), len(valueList))

        return pd.DataFrame(analyseDic)

# xlm-roberta-base类
    Records/cardiffnlp/twitter-xlm-roberta-base-sentiment
    Records/CodeNinja1126/xlm-roberta-large-kor-mrc
    Records/xlm-roberta-base
    Records/xlm-roberta-base-yelp-mlm
# xlm-roberta-large类
    Records/xlm-roberta-large
    Records/xlm-roberta-large-finetuned-conll02-spanish
    Records/xlm-roberta-large-finetuned-conll03-english
# mBert类
    Records/bert-base-multilingual-uncased
    Records/dbmdz/bert-base-multilingual-cased-finetuned-conll03-spanish
    Records/nlptown/bert-base-multilingual-uncased-sentiment
# roberta-base类
    Records/allenai/reviews_roberta_base
    Records/cardiffnlp/twitter-roberta-base-sentiment
    Records/roberta-base
# roberta-large类
    Records/roberta-large
    Records/this-is-real/mrc-pretrained-roberta-large-1
# albert类
    Records/albert-base-v2
# bert-base类
    Records/activebus/BERT_Review
    Records/activebus/BERT-XD_Review
    Records/ainize/klue-bert-base-mrc
    Records/bert-base-uncased
    Records/bert-large-uncased
    Records/google/rembert
    Records/skimai/spanberta-base-cased-ner-conll02
    Records/SpanBERT/spanbert-base-cased
# bert-large类
# electra-base类
    Records/dbmdz/electra-base-french-europeana-cased-generator
    Records/electra-base-discriminator-yelp-mlm
    Records/google/electra-base-discriminator
# electra-small类
    Records/google/electra-small-discriminator
    Records/test-electra-small-yelp

In [3]:
# xlm-roberta-base类
# 模型层数量,ndim数量，模型大小
paths = [
    "Records/cardiffnlp/twitter-xlm-roberta-base-sentiment",
    "Records/CodeNinja1126/xlm-roberta-large-kor-mrc",
    "Records/xlm-roberta-base",
    "Records/xlm-roberta-base-yelp-mlm"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/cardiffnlp/twitter-xlm-roberta-base-sentiment
    En2En   En2Es   En2Fr
0  78.818  71.862  67.244
1   0.554   1.605   1.503
2   6.000   6.000   6.000

Records/CodeNinja1126/xlm-roberta-large-kor-mrc
    En2En   En2Es   En2Fr
0  82.893  77.793  73.931
1   0.871   0.305   0.965
2   3.000   3.000   3.000

Records/xlm-roberta-base
    En2En   En2Es   En2Fr
0  80.199  74.029  69.123
1   2.060   1.060   1.457
2   3.000   3.000   3.000

Records/xlm-roberta-base-yelp-mlm
    En2En   En2Es   En2Fr
0  81.915  70.275  68.742
1   0.243   1.115   2.589
2   3.000   3.000   3.000



In [4]:
# xlm-roberta-large类

paths = [
    "Records/xlm-roberta-large",
    "Records/xlm-roberta-large-finetuned-conll02-spanish",
    "Records/xlm-roberta-large-finetuned-conll03-english"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/xlm-roberta-large
    En2En   En2Es   En2Fr
0  81.845  76.662  73.922
1   0.122   0.995   0.715
2   3.000   3.000   3.000

Records/xlm-roberta-large-finetuned-conll02-spanish
    En2En   En2Es   En2Fr
0  83.593  75.968  73.206
1   0.703   1.694   0.399
2   3.000   3.000   3.000

Records/xlm-roberta-large-finetuned-conll03-english
    En2En   En2Es   En2Fr
0  82.469  77.002  73.523
1   0.973   1.113   1.056
2   3.000   3.000   3.000



In [5]:
# mBert类

paths = [
    "Records/bert-base-multilingual-uncased",
    "Records/dbmdz/bert-base-multilingual-cased-finetuned-conll03-spanish",
    "Records/nlptown/bert-base-multilingual-uncased-sentiment",
    "Records/google/rembert" # 彷佛是大模型
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/bert-base-multilingual-uncased
    En2En   En2Es   En2Fr
0  77.113  67.652  62.266
1   0.108   0.864   0.716
2   3.000   3.000   3.000

Records/dbmdz/bert-base-multilingual-cased-finetuned-conll03-spanish
    En2En   En2Es   En2Fr
0  75.722  65.755  62.417
1   1.144   2.528   1.407
2   3.000   3.000   3.000

Records/nlptown/bert-base-multilingual-uncased-sentiment
    En2En   En2Es   En2Fr
0  76.131  67.721  62.293
1   1.343   2.066   2.206
2   7.000   7.000   7.000

Records/google/rembert
    En2En   En2Es   En2Fr
0  81.092  76.544  73.413
1   0.635   0.473   0.514
2   3.000   3.000   3.000



In [6]:
# roberta-base类

paths = [
    "Records/allenai/reviews_roberta_base",
    "Records/cardiffnlp/twitter-roberta-base-sentiment",
    "Records/roberta-base"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/allenai/reviews_roberta_base
    En2En   En2Es   En2Fr
0  79.256  40.759  34.530
1   0.493   3.723   2.573
2   3.000   3.000   3.000

Records/cardiffnlp/twitter-roberta-base-sentiment
    En2En   En2Es   En2Fr
0  81.646  27.180  31.201
1   0.522   6.396   4.695
2   5.000   5.000   5.000

Records/roberta-base
    En2En   En2Es   En2Fr
0  80.396  41.533  39.471
1   0.430   8.897   4.306
2   3.000   3.000   3.000



In [7]:
# roberta-large类
paths = [
    "Records/roberta-large",
    "Records/this-is-real/mrc-pretrained-roberta-large-1"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/roberta-large
    En2En   En2Es   En2Fr
0  81.269  64.335  56.028
1   1.518   2.776   4.131
2   3.000   3.000   3.000

Records/this-is-real/mrc-pretrained-roberta-large-1
    En2En   En2Es   En2Fr
0  70.529  17.274  18.594
1   0.000   0.000   0.000
2   1.000   1.000   1.000



In [8]:
# albert类
    
paths = [
    "Records/albert-base-v2"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/albert-base-v2
    En2En  En2Es   En2Fr
0  78.604  3.838  21.421
1   1.186  1.669   6.987
2   5.000  5.000   5.000



In [9]:
# bert-base类

paths = [
    "Records/activebus/BERT_Review",
    "Records/activebus/BERT-XD_Review",
    "Records/ainize/klue-bert-base-mrc",
    "Records/bert-base-uncased",
    "Records/skimai/spanberta-base-cased-ner-conll02",
    "Records/SpanBERT/spanbert-base-cased",
    "Records/Tahsin/BERT-finetuned-conll2003-POS"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/activebus/BERT_Review
    En2En   En2Es   En2Fr
0  81.839  10.122  33.155
1   0.600   1.926   2.137
2   3.000   3.000   3.000

Records/activebus/BERT-XD_Review
    En2En   En2Es   En2Fr
0  80.277  19.476  39.780
1   0.242  10.392   2.884
2   3.000   3.000   3.000

Records/ainize/klue-bert-base-mrc
    En2En  En2Es   En2Fr
0  71.171  9.402  15.881
1   0.200  1.464   2.686
2   3.000  3.000   3.000

Records/bert-base-uncased
    En2En  En2Es  En2Fr
0  77.765  0.902  15.95
1   1.027  0.883   2.65
2   3.000  3.000   3.00

Records/skimai/spanberta-base-cased-ner-conll02
    En2En   En2Es   En2Fr
0  68.608  42.478  28.412
1   2.193  24.963   1.251
2   3.000   3.000   3.000

Records/SpanBERT/spanbert-base-cased
    En2En  En2Es   En2Fr
0  75.627  4.028  16.463
1   0.994  2.350   3.116
2   3.000  3.000   3.000

Records/Tahsin/BERT-finetuned-conll2003-POS
    En2En  En2Es   En2Fr
0  77.471  8.455  16.677
1   0.642  3.863   1.052
2   3.000  3.000   3.000



In [10]:
# bert-large类
paths = [
    "Records/bert-large-uncased"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/bert-large-uncased
    En2En   En2Es   En2Fr
0  78.571  16.101  28.883
1   0.973   8.591   6.349
2   3.000   3.000   3.000



In [11]:
# albert类
    
paths = [
    "Records/albert-base-v2"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/albert-base-v2
    En2En  En2Es   En2Fr
0  78.604  3.838  21.421
1   1.186  1.669   6.987
2   5.000  5.000   5.000



In [12]:
# electra-base类
paths = [
    "Records/dbmdz/electra-base-french-europeana-cased-generator",
    "Records/electra-base-discriminator-yelp-mlm",
    "Records/google/electra-base-discriminator"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/dbmdz/electra-base-french-europeana-cased-generator
    En2En   En2Es   En2Fr
0  63.907  19.191  45.311
1   1.778   1.688   1.290
2   3.000   3.000   3.000

Records/electra-base-discriminator-yelp-mlm
    En2En  En2Es   En2Fr
0  80.817  7.799  34.348
1   0.831  7.949   5.876
2   3.000  3.000   3.000

Records/google/electra-base-discriminator
    En2En  En2Es   En2Fr
0  80.558  8.084  27.262
1   1.088  5.154   8.196
2   3.000  3.000   3.000



In [13]:
# electra-small类
    
        
paths = [
    "Records/google/electra-small-discriminator",
    "Records/test-electra-small-yelp"
]
rList = []
for path in paths:
    pairMap = PairMap(path)
    analysePd = pairMap.analyse()
    rList.append(analysePd)

for path, record in zip(paths, rList):
    print(path)
    print(record)
    print()

Records/google/electra-small-discriminator
    En2En  En2Es   En2Fr
0  76.649  2.943  16.686
1   1.314  1.504   2.620
2   3.000  3.000   3.000

Records/test-electra-small-yelp
    En2En  En2Es   En2Fr
0  79.137  6.270  24.254
1   0.333  2.229   1.056
2   3.000  3.000   3.000

