In [1]:
import pymongo
from collections import defaultdict

In [2]:
# print all available databases
client = pymongo.MongoClient('localhost', 27017)
cursor = client.list_databases()
for db in cursor:
    print(db)

{'name': '10000', 'sizeOnDisk': 270336.0, 'empty': False}
{'name': '10000_cl', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_cl_clagreement', 'sizeOnDisk': 253952.0, 'empty': False}
{'name': '10000_cl_claudience', 'sizeOnDisk': 368640.0, 'empty': False}
{'name': '10000_cl_clcontroversial', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_cl_cldisagreement', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_cl_clinformative', 'sizeOnDisk': 258048.0, 'empty': False}
{'name': '10000_cl_clmean', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_cl_clpersuasive', 'sizeOnDisk': 360448.0, 'empty': False}
{'name': '10000_cl_clsentiment', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_cl_cltopic', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_ner', 'sizeOnDisk': 274432.0, 'empty': False}
{'name': '10000_ner_cl', 'sizeOnDisk': 262144.0, 'empty': False}
{'name': '10000_ner_cl_clagreement', 'sizeOnDisk': 253952.0, 'empty': False}
{'name': '10000_ner_

In [3]:
all_cols =  ['claudience', 'clpersuasive', 'clsentiment', 'clagreement', 'cldisagreement', 'clinformative', 'clmean', 'clcontroversial', 'cltopic']

In [4]:
def get_mets(db, conf=None):
    mydb = client[db]
    res = mydb["metrics"].aggregate([{
        "$match": {"name": 'kappa_score'}  # only consider metric
    },
        {"$unwind": "$values"},
        {"$group":
         {'_id': '$_id',
          'val': {'$max': "$values"}, 'run_id' : { '$first': '$run_id' }}
         },  # find min values
        {"$sort": {"val": -1}}  # sort
    ])
    
    if not conf is None:
        runs = mydb['runs'].find(conf)
        runs  = [r['_id'] for r in list(runs)]
        res = [r for r in res if r['run_id'] in runs]

    best = list(res)[0]

    epoch = None
    max_epochs = 0

    for x in mydb['metrics'].find({'run_id': best['run_id'], 'name': 'kappa_score'}):
        max_epochs = len(x['values'])
        for i, v in enumerate(x['values']):
            if v == best['val'] and epoch is None:
                epoch = i + 1

    for x in mydb['metrics'].find({'run_id': best['run_id'], 'name': 'F1_macro'}):
        f1_macro = x['values'][epoch - 1]

    for x in mydb['metrics'].find({'run_id': best['run_id'], 'name': 'accuracy'}):
        f1_micro = x['values'][epoch - 1]
    
    run = list(mydb['runs'].find({'_id': best['run_id']}))[0]
        
    mod = ''
    if 'mod' in  run['config']:
        mod= run['config']['mod']
        
    return best['val'], f1_micro, f1_macro, epoch, max_epochs, run['config']['exp_id'], run['config']['drop_mult'], mod

In [5]:
get_mets('lm_threads_cl_cltopic')

(0.44218260049819946,
 0.7632933259010315,
 0.7209945900957136,
 40,
 52,
 '2019_ 1_27_20_53_08_545796',
 1,
 '')

In [6]:
def get_all(db, **kwargs):
    for col in all_cols:
        print(col)
        print(get_mets(db + col, **kwargs))

In [7]:
get_all('lm_threads_cl_', conf={'config.mod': 'simle_fit'})

claudience
(0.5690997838973999, 0.8278829455375671, 0.7772512575144154, 22, 43, '2019_ 1_29_23_51_17_179985', 0.8, 'simle_fit')
clpersuasive
(0.34844154119491577, 0.8456260561943054, 0.6740183896620278, 120, 141, '2019_ 1_29_22_56_40_420538', 1.0, 'simle_fit')
clsentiment
(0.32171106338500977, 0.6159169673919678, 0.42018169515097575, 39, 60, '2019_ 1_30_02_18_08_573913', 0.8, 'simle_fit')
clagreement
(0.4948951005935669, 0.9125214219093323, 0.7460996541565261, 55, 76, '2019_ 1_30_03_45_35_481969', 0.8, 'simle_fit')
cldisagreement
(0.5476285219192505, 0.7821612358093262, 0.7738135606164749, 45, 66, '2019_ 1_30_05_36_50_858836', 0.8, 'simle_fit')
clinformative
(0.3481070399284363, 0.843910813331604, 0.67211767250703, 39, 60, '2019_ 1_30_07_13_44_296713', 0.8, 'simle_fit')
clmean
(0.45336586236953735, 0.8404802680015564, 0.7247836349331235, 27, 48, '2019_ 1_30_08_40_58_145853', 0.8, 'simle_fit')
clcontroversial
(0.34643083810806274, 0.7255574464797974, 0.6723154315262907, 24, 45, '2019_ 1

In [8]:
get_all('lm_threads_cut_cl_', conf={'config.mod': 'simple_fit'})

claudience
(0.6009355783462524, 0.8364887833595276, 0.7963261585921615, 63, 84, '2019_ 2_01_17_37_08_103558', 1.2, 'simple_fit')
clpersuasive
(0.40735119581222534, 0.8473413586616516, 0.703592626233198, 53, 74, '2019_ 1_29_20_41_55_980599', 1, 'simple_fit')
clsentiment
(0.3614158630371094, 0.6470588445663452, 0.40863006396588486, 64, 85, '2019_ 2_05_17_42_04_600587', 1.3, 'simple_fit')
clagreement
(0.5264347791671753, 0.9210977554321289, 0.7613207547169811, 40, 61, '2019_ 1_30_21_52_02_248101', 1.1, 'simple_fit')
cldisagreement
(0.5722670555114746, 0.7941681146621704, 0.7861335289801907, 25, 46, '2019_ 1_30_01_20_48_014374', 1, 'simple_fit')
clinformative
(0.39166170358657837, 0.8473413586616516, 0.6950424637808927, 75, 96, '2019_ 1_31_00_23_03_377347', 1.1, 'simple_fit')
clmean
(0.45350390672683716, 0.8147512674331665, 0.7266333229134104, 82, 103, '2019_ 1_31_13_33_26_202644', 1.2, 'simple_fit')
clcontroversial
(0.3601683974266052, 0.728987991809845, 0.6795455177980018, 45, 66, '2019_

In [9]:
get_all('lm_threads_cl_')

claudience
(0.5690997838973999, 0.8278829455375671, 0.7772512575144154, 22, 43, '2019_ 1_29_23_51_17_179985', 0.8, 'simle_fit')
clpersuasive
(0.34844154119491577, 0.8456260561943054, 0.6740183896620278, 120, 141, '2019_ 1_29_22_56_40_420538', 1.0, 'simle_fit')
clsentiment
(0.32171106338500977, 0.6159169673919678, 0.42018169515097575, 39, 60, '2019_ 1_30_02_18_08_573913', 0.8, 'simle_fit')
clagreement
(0.4948951005935669, 0.9125214219093323, 0.7460996541565261, 55, 76, '2019_ 1_30_03_45_35_481969', 0.8, 'simle_fit')
cldisagreement
(0.5657382011413574, 0.7907375693321228, 0.7828664582621012, 46, 53, '2019_ 1_29_02_28_53_082834', 0.9, '')
clinformative
(0.3481070399284363, 0.843910813331604, 0.67211767250703, 39, 60, '2019_ 1_30_07_13_44_296713', 0.8, 'simle_fit')
clmean
(0.45336586236953735, 0.8404802680015564, 0.7247836349331235, 27, 48, '2019_ 1_30_08_40_58_145853', 0.8, 'simle_fit')
clcontroversial
(0.3548339009284973, 0.7066895365715027, 0.6765367206891517, 19, 32, '2019_ 1_28_06_36_

In [10]:
get_all('lm_threads_cut_cl_')

claudience
(0.6009355783462524, 0.8364887833595276, 0.7963261585921615, 63, 84, '2019_ 2_01_17_37_08_103558', 1.2, 'simple_fit')
clpersuasive
(0.40735119581222534, 0.8473413586616516, 0.703592626233198, 53, 74, '2019_ 1_29_20_41_55_980599', 1, 'simple_fit')
clsentiment
(0.3614158630371094, 0.6470588445663452, 0.40863006396588486, 64, 85, '2019_ 2_05_17_42_04_600587', 1.3, 'simple_fit')
clagreement
(0.5264347791671753, 0.9210977554321289, 0.7613207547169811, 40, 61, '2019_ 1_30_21_52_02_248101', 1.1, 'simple_fit')
cldisagreement
(0.5722670555114746, 0.7941681146621704, 0.7861335289801907, 25, 46, '2019_ 1_30_01_20_48_014374', 1, 'simple_fit')
clinformative
(0.39166170358657837, 0.8473413586616516, 0.6950424637808927, 75, 96, '2019_ 1_31_00_23_03_377347', 1.1, 'simple_fit')
clmean
(0.45350390672683716, 0.8147512674331665, 0.7266333229134104, 82, 103, '2019_ 1_31_13_33_26_202644', 1.2, 'simple_fit')
clcontroversial
(0.3601683974266052, 0.728987991809845, 0.6795455177980018, 45, 66, '2019_

In [11]:
# the first audiance is not correct because it was run on wrong data, so better check again!
get_all('threads_headline_cl_')

claudience
(0.6008878946304321, 0.8399311304092407, 0.7941482370421167, 40, 61, '2019_ 2_07_13_00_37_295654', 1.2, 'simple_fit')
clpersuasive
(0.3780103325843811, 0.8473413586616516, 0.688988389587192, 85, 106, '2019_ 2_09_17_52_21_077774', 1.3, 'simple_fit')
clsentiment
(0.318831205368042, 0.6038062572479248, 0.40958570678427597, 50, 71, '2019_ 2_07_15_37_11_457239', 1.2, 'simple_fit')
clagreement
(0.5264347791671753, 0.9210977554321289, 0.7613207547169811, 54, 75, '2019_ 2_07_17_16_37_999916', 1.2, 'simple_fit')
cldisagreement
(0.5678068399429321, 0.7924528121948242, 0.7838974613473515, 41, 62, '2019_ 2_19_21_52_21_475283', 1.1, 'simple_fit')
clinformative
(0.39213693141937256, 0.8593481779098511, 0.6932523997741389, 32, 53, '2019_ 2_19_22_30_15_247649', 1.0, 'simple_fit')
clmean
(0.5002095699310303, 0.8456260561943054, 0.7496278057718735, 41, 62, '2019_ 2_19_23_54_34_673133', 1.0, 'simple_fit')
clcontroversial
(0.36971259117126465, 0.7272727489471436, 0.6848037865573136, 15, 36, '20

In [12]:
get_all('threads_headline_article_cl_')

claudience
(0.5837217569351196, 0.8330464959144592, 0.7852943977751109, 41, 62, '2019_ 2_14_11_45_40_468691', 1.3, 'simple_fit')
clpersuasive
(0.3036462664604187, 0.8113207817077637, 0.6512095896967324, 32, 53, '2019_ 2_16_14_30_08_504370', 1.1, 'simple_fit')
clsentiment
(0.32407450675964355, 0.6089965105056763, 0.42122653355484513, 51, 72, '2019_ 2_16_18_41_49_371875', 1.2, 'simple_fit')
clagreement
(0.4669921398162842, 0.9090909361839294, 0.7318097784104224, 65, 86, '2019_ 2_16_21_40_28_972048', 1.2, 'simple_fit')
cldisagreement
(0.5134850740432739, 0.7632933259010315, 0.7565541031227306, 102, 123, '2019_ 2_15_06_31_53_152954', 1.3, 'simple_fit')
clinformative
(0.3340609073638916, 0.8267581462860107, 0.6667119480622393, 52, 73, '2019_ 2_17_01_33_55_340111', 1.1, 'simple_fit')
clmean
(0.45505446195602417, 0.8267581462860107, 0.7273745861981156, 87, 108, '2019_ 2_19_01_24_18_664466', 1.2, 'simple_fit')
clcontroversial
(0.3809050917625427, 0.739279568195343, 0.689719887955182, 17, 38, '

In [13]:
get_all('headline_root_threads_cl_')

claudience
(0.5960416793823242, 0.826161801815033, 0.7970646110644243, 58, 79, '2019_ 2_15_21_48_12_538092', 1.1, 'simple_fit')
clpersuasive
(0.4427451491355896, 0.8713550567626953, 0.7209725279984686, 44, 65, '2019_ 2_16_18_44_40_802093', 1.2, 'simple_fit')
clsentiment
(0.3285176753997803, 0.6107266545295715, 0.41123962765207106, 74, 95, '2019_ 2_15_14_17_26_051079', 1.2, 'simple_fit')
clagreement
(0.5290092825889587, 0.9159519672393799, 0.7636453894841353, 116, 137, '2019_ 2_19_21_00_07_972344', 1.3, 'simple_fit')
cldisagreement
(0.5541368722915649, 0.7855917811393738, 0.7770677255248659, 49, 70, '2019_ 2_16_05_17_50_166639', 1.1, 'simple_fit')
clinformative
(0.3819933533668518, 0.8456260561943054, 0.6901133947554925, 31, 52, '2019_ 2_16_06_58_57_065647', 1.1, 'simple_fit')
clmean
(0.4779966473579407, 0.838765025138855, 0.738500152695068, 28, 49, '2019_ 2_19_00_10_28_080602', 0.9, 'simple_fit')
clcontroversial
(0.37279385328292847, 0.728987991809845, 0.6863285932221859, 13, 34, '2019

In [14]:
get_all('headline_root_threads_no_over_cl_')

claudience
(0.558082640171051, 0.824440598487854, 0.7708352668213456, 22, 43, '2019_ 2_18_18_06_01_350738', 0.9, 'simple_fit')
clpersuasive
(0.31206053495407104, 0.8353344798088074, 0.6558949387634647, 49, 70, '2019_ 2_18_19_09_08_264497', 0.9, 'simple_fit')
clsentiment
(0.28415054082870483, 0.6038062572479248, 0.38467205501397067, 24, 45, '2019_ 2_18_20_52_38_196497', 0.9, 'simple_fit')
clagreement
(0.4440755844116211, 0.9073756337165833, 0.719811320754717, 59, 80, '2019_ 2_18_21_58_27_422204', 0.9, 'simple_fit')
cldisagreement
(0.5378996729850769, 0.7770154476165771, 0.7689385624740861, 20, 41, '2019_ 2_18_23_57_12_219262', 0.9, 'simple_fit')
clinformative
(0.3001561760902405, 0.8404802680015564, 0.6462401722506769, 17, 38, '2019_ 2_19_00_56_42_643245', 0.9, 'simple_fit')
clmean
(0.4645414352416992, 0.8233276009559631, 0.7322695272617026, 24, 45, '2019_ 2_19_01_52_37_215603', 0.9, 'simple_fit')
clcontroversial
(0.3557744026184082, 0.7255574464797974, 0.6775264118590629, 9, 30, '2019_

In [15]:
get_all('dat_false_par_true_hea_false30000_cl_')

claudience
(0.5918749570846558, 0.8347676396369934, 0.7905299843768778, 44, 65, '2019_ 2_22_12_33_53_919038', 0.8, '')
clpersuasive
(0.3704584240913391, 0.8301886916160583, 0.6847323198942499, 88, 109, '2019_ 2_22_14_08_11_910835', 0.9, '')
clsentiment
(0.322735071182251, 0.6280276775360107, 0.44814216067070645, 35, 56, '2019_ 2_22_14_12_51_580715', 0.8, '')
clagreement
(0.4012119770050049, 0.8970840573310852, 0.6988636363636364, 28, 49, '2019_ 2_23_01_15_49_259348', 1.0, '')
cldisagreement
(0.5461447238922119, 0.7787306904792786, 0.7728248951074299, 74, 95, '2019_ 2_22_22_17_53_610249', 0.8, '')
clinformative
(0.3440946936607361, 0.8421955108642578, 0.6702326496483203, 82, 103, '2019_ 2_22_23_49_46_233061', 0.8, '')
clmean
(0.4895244240760803, 0.833619236946106, 0.74475182010625, 65, 86, '2019_ 2_23_01_27_11_345391', 0.8, '')
clcontroversial
(0.34311848878860474, 0.6809605360031128, 0.6650667160859896, 25, 46, '2019_ 2_23_01_52_17_601046', 0.9, '')
cltopic
(0.2864335775375366, 0.69811

In [16]:
get_all('threads_constructive')

claudience


IndexError: list index out of range

In [None]:
for r in reas:
    print(r)

In [17]:
def get_best(db, metric, config_param=None, config_val=None, config_param2=None, config_val2=None, one=True):
    myclient = pymongo.MongoClient("mongodb://localhost:27017/")
    mydb = myclient[db]
    res = mydb["metrics"].aggregate([{
        "$match": {"name": metric}  # only consider metric
    },
        {"$unwind": "$values"},
        {"$group":
         {'_id': '$_id',
          'val': {'$max': "$values"}, 'run_id' : { '$first': '$run_id' }}
         },  # find min values
        {"$sort": {"val": -1}}  # sort
    ])
    
    if config_param is None:
        if one:
            return list(res)[0]['val']
        return [x['val'] for x in list(res)]
    
    filtered_res = []
#   filter only results for the config
    for res_obj in res:
        run = list(mydb['runs'].find({'_id': res_obj['run_id']}))[0]
        if config_param in run['config']:
            if run['config'][config_param] == config_val:
                if config_param2 is None:
                    filtered_res.append(res_obj)
                else:
                    if not config_param2 in run['config']:
                        continue
#                     print(run['config'][config_param2])
                    if run['config'][config_param2] == config_val2:
#                         print('yes')
                        filtered_res.append(res_obj)
        else:
            pass
#             print('NB: just used run without considering param')
#             filtered_res.append(res_obj)
    if one:
        return filtered_res[0]['val']
    return [x['val'] for x in filtered_res]