In [1]:
import os
import glob
import json
import numpy as np
import networkx as nx
from joblib import Parallel
from joblib import delayed
from utils import io
from utils import estimator

In [27]:
s1 = "BAH-N2000-m20-B0.3-H0.9-i3-x5-h0.9-k39.6-km36.5-kM40.9_nodes" # 11
s2 = "Caltech36_nodes" # 1
s3 = "BAH-Caltech36-N701-m2-B0.33-Hmm0.63-HMM0.44-i1-x5-h0.5-k4.0-km5.0-kM3.5_nodes" # 13
len(s1.split("-")), len(s2.split("-")), len(s3.split("-"))

(11, 1, 13)

In [2]:
#s = 'BAH-Swarthmore42-N1519-m2-B0.49-Hmm0.54-HMM0.51-i1-x5-h0.5-k4.0-km4.1-kM3.9_nodes' #3
#s = 'BAH-N2000-m20-B0.1-H0.0-i1-x5-h0.0-k37.9-km189.4-kM21.0.gpickle' #2
#s = 'Swarthmore42.gpickle' # 1
s.split('-H')

['BAH-Swarthmore42-N1519-m2-B0.49',
 'mm0.54',
 'MM0.51-i1-x5-h0.5-k4.0-km4.1-kM3.9_nodes']

In [92]:
def _change_empirical_graph(fn):
    g = io.load_gpickle(fn)  
    
    h = round(np.mean([g.graph['HMM'], g.graph['Hmm']]),2)
    if h != g.graph['H']:
        g.graph['H'] = h
        print(g.graph)
        io.write_gpickle(g, fn)
        return 1
    else:
        return None
    
    return 0

def _change_fit_graph(fn):
    df = io.load_csv('../results-batch/summary_datasets.csv')
    
    g = io.load_gpickle(fn)  
    
    if 'H' in g.graph:
        dataset = g.graph['fullname']
        
        if g.graph['name'] == 'Sex':
            g.graph['name'] = 'Escorts'
            
        if 'Hmm' not in g.graph:
            tmp = df.query("dataset==@dataset").copy()
            g.graph['Hmm'] = tmp.Hmm.iloc[0]
            g.graph['HMM'] = tmp.HMM.iloc[0]
            g.graph['H'] = round(np.mean([tmp.Hmm.iloc[0],tmp.HMM.iloc[0]]),2)
            
            try:
                #print(g.graph['name'])
                #io.write_gpickle(g, fn)
                return 1
            except:
                return -1
        else:
            return None
    else:
        return -2
    
    return 0

def _change_synthetic_graph(fn):
    g=io.load_gpickle(fn)  
    if 'H' in g.graph:
        try:
            _ = float(g.graph['H'])
            return None
        except:
            if g.graph['H'].startswith('MM') and 'Hmm' in g.graph and 'HMM' in g.graph:
                g.graph['H'] = round(np.mean([g.graph['Hmm'], g.graph['HMM']]),2)
                try:
                    #io.write_gpickle(g, fn)
                    return 1
                except:
                    return -1
    return 0

def _change_empirical_eval(fn):
    df = io.load_csv('../results-batch/summary_datasets.csv')
    obj = io.load_pickle(fn, verbose=False)
    #/bigdata/lespin/Network-Unbiased-Inference/results/Caltech36_nodes/P20_evaluation_10.pickle
    
    dataset = fn.split("/")[-2].replace('_nodes','')
    tmp = df.query("dataset==@dataset").copy()
    h = round(np.mean([tmp.Hmm.iloc[0], tmp.HMM.iloc[0]]),2)
    
    if h != obj['H']:
        obj['Hmm'] = tmp.Hmm.iloc[0]
        obj['HMM'] = tmp.HMM.iloc[0]
        obj['H'] = h
        
        try:
            #io.write_pickle(obj, fn)
            return 1
        except:
            return -1
    else:
        return None
    
    return 0
    
def _change_synthetic_eval(fn):
    obj = io.load_pickle(fn, verbose=False)
         
    if 'H' in obj:
 
        if not str(obj['H']).startswith('MM'):
            return None
        else:
            if str(obj['H']).startswith('MM'):
                if 'Hmm' in obj and 'HMM' in obj:
                    obj['H'] = round(np.mean([float(obj['Hmm']), float(obj['HMM'])]),2)
                    try:
                        io.write_pickle(obj, fn)
                        return 1
                    except:
                        return -1
                else:
                    obj['Hmm'] = estimator.get_param(fn, 'Hmm')
                    obj['HMM'] = estimator.get_param(fn, 'HMM')
                    obj['H'] = round(np.mean([float(obj['Hmm']), float(obj['HMM'])]),2)
                    
                    try:
                        io.write_pickle(obj, fn)
                        return 2
                    except:
                        return -2
    else:
        print(obj)
        print(fn)
    return 0

def change_fit_graph_H(path, njobs=1):
    exp = '/[!BAH]*_nodes/*_graph_*.gpickle'
    change_H(path, exp, _change_fit_graph, njobs=njobs)
    
def change_synthetic_graph_H(path, njobs=1):
    exp = '/BAH-*-Hmm*_nodes/*_graph_*.gpickle'
    change_H(path, exp, _change_synthetic_graph, njobs=njobs)
    
def change_synthetic_evaluation_H(path, njobs=1):
    exp = '/BAH-*-Hmm*_nodes/*_evaluation_*.pickle'
    change_H(path, exp, _change_synthetic_eval, njobs=njobs)
    
def change_empirical_evaluation_H(path, njobs=1):
    exp = '/[!BAH]*_nodes/*_evaluation_*.pickle'
    change_H(path, exp, _change_empirical_eval, njobs=njobs)
    
def change_empirical_graph_H(path, njobs=1):
    exp = '/[!BAH]*.gpickle'
    change_H(path, exp, _change_empirical_graph, njobs=njobs)
    
def change_H(path, exp, callback, njobs=1):
    files = glob.glob(path + exp, recursive=True)
    print('{} files found.'.format(len(files)))
    results = Parallel(n_jobs=njobs)(delayed(callback)(fn) for fn in files)
    
    results = np.array(results)
    print('---')
    print('{} changed (1).'.format(results[np.where(results==1)].shape[0]))
    print('{} changed (2).'.format(results[np.where(results==2)].shape[0]))
    print('{} already changed.'.format(results[np.where(results==None)].shape[0]))
    print('{} NOT changed.'.format(results[np.where(results==0)].shape[0]))
    print('{} error (1).'.format(results[np.where(results==-1)].shape[0]))
    print('{} error (2).'.format(results[np.where(results==-2)].shape[0]))

In [26]:
change_synthetic_graph_H('/bigdata/lespin/Network-Unbiased-Inference/results', 30)

2500 files found.
---
0 changed (1).
0 changed (2).
2500 already changed.
0 NOT changed.
0 error (1).
0 error (2).


In [24]:
change_synthetic_evaluation_H('/bigdata/lespin/Network-Unbiased-Inference/results', 30)

2500 files found.
---
0 changed (1).
0 changed (2).
2500 already changed.
0 NOT changed.
0 error (1).
0 error (2).


In [62]:
change_fit_graph_H('/bigdata/lespin/Network-Unbiased-Inference/results', 30)

500 files found.
---
0 changed (1).
0 changed (2).
500 already changed.
0 NOT changed.
0 error (1).
0 error (2).


In [75]:
change_empirical_graph_H('/ssd/lespin/code/Network-Inference/Discrimination-in-Relational-Classification/data', 30)

5 files found.
---
0 changed (1).
0 changed (2).
5 already changed.
0 NOT changed.
0 error (1).
0 error (2).


In [93]:
change_empirical_evaluation_H('/bigdata/lespin/Network-Unbiased-Inference/results', 30)

500 files found.
---
0 changed (1).
0 changed (2).
500 already changed.
0 NOT changed.
0 error (1).
0 error (2).
