In [1]:
from pathlib import Path

from report_config import DATA_DIR, REPORT_DIR, SUB_EXPERIMENTS_NAME_START, ATTRIBUTES
from report_utils import (create_reporting_process_dict, 
                          get_filtered_attributes, 
                          build_analyze_graph, 
                          create_confusion_reports, 
                          create_combined_reports, 
)
                          

  from .autonotebook import tqdm as notebook_tqdm


## Process the experiment data directories 

**This is mandatory**

Creating a dictionary of the experiment names, data directories, and corresponding experiment data files, based on the experiment data directories.

In [2]:
d_experiments = create_reporting_process_dict(data_dir=DATA_DIR, 
                                              sub_ex_name_start=SUB_EXPERIMENTS_NAME_START, 
                                              )

Experiment/Graph name(s) found: 
pokec synthcorrattr
/Volumes/tcs_jf_fair_node_sampling/TEST_SynthCorr/pokec_synthcorrlabels_02/data/pokec_synthcorrattr/resources
With
  Graph 1
  Graph 0


In [3]:
d_experiments


defaultdict(dict,
            {'pokec synthcorrattr': {'Graph_nrs': [1, 0],
              'Experiment_dir': PosixPath('/Volumes/tcs_jf_fair_node_sampling/TEST_SynthCorr/pokec_synthcorrlabels_02/data/pokec_synthcorrattr')}})

In [4]:
# print(d_experiments)
print(d_experiments.keys())
for key in d_experiments.keys():
    print(key)
    print(d_experiments[key]["Graph_nrs"])
    print(d_experiments[key]["Experiment_dir"])


dict_keys(['pokec synthcorrattr'])
pokec synthcorrattr
[1, 0]
/Volumes/tcs_jf_fair_node_sampling/TEST_SynthCorr/pokec_synthcorrlabels_02/data/pokec_synthcorrattr


## Choose the experiments and graphs to process

**This is mandatory**

- set the names of the experiment(s), and the index of the graph(s) to process

In [5]:
experiments_list = [exp for exp in d_experiments.keys()]

# experiments_list = ['pokec distinct']
# print(experiments_list)
graph_nrs = [
    0, 
    1, 
    # 2, 
]


d_graph_filtered_attributes, d_graph_attributes = get_filtered_attributes(d_experiments=d_experiments,
                                                                     experiments_list=experiments_list, 
                                                                     graph_nrs=graph_nrs,
                                                                     )
# Print attributes for each graph
for ex, graphs in d_graph_attributes.items():
    for g_nr, attrs in graphs.items():
        print(f"Attributes for {ex} {g_nr}: {attrs}")
# d_graph_filtered_attributes

Attributes for pokec synthcorrattr Graph_nr_0: ['label_region', 'label_AGE', 'synth_AGE_classes_3_corrRL2_-0.01_from64clusters', 'synth_region_classes_5_corrRL2_0.01_from64clusters', 'synth_region_classes_4_corrRL2_-0.05_from64clusters', 'synth_region_classes_4_corrRL2_0.56_from256clusters', 'synth_AGE_classes_3_corrRL2_0.25_from1024clusters']
Attributes for pokec synthcorrattr Graph_nr_1: ['label_region', 'label_AGE', 'synth_AGE_classes_3_corrRL2_0.29_from256clusters', 'synth_region_classes_3_corrRL2_0.54_from256clusters', 'synth_region_classes_3_corrRL2_-0.08_from64clusters', 'synth_region_classes_4_corrRL2_-0.05_from64clusters', 'synth_AGE_classes_3_corrRL2_-0.02_from64clusters', 'synth_AGE_classes_4_corrRL2_0.04_from64clusters']


In [6]:
# print(d_graph_attributes)

In [7]:
df_attr = d_graph_attributes['pokec synthcorrattr'][f"Graph_nr_{0}"]
print(df_attr)

['label_region', 'label_AGE', 'synth_AGE_classes_3_corrRL2_-0.01_from64clusters', 'synth_region_classes_5_corrRL2_0.01_from64clusters', 'synth_region_classes_4_corrRL2_-0.05_from64clusters', 'synth_region_classes_4_corrRL2_0.56_from256clusters', 'synth_AGE_classes_3_corrRL2_0.25_from1024clusters']


In [8]:
import pandas as pd 
df_attr_path = Path('/Volumes/tcs_jf_fair_node_sampling/TEST_SynthCorr/pokec_synthcorrlabels_02/data/pokec_synthcorrattr/resources/graph_dir_1/filtered_attributes.csv')
df_attr = pd.read_csv(df_attr_path)

df_attr.head()

Unnamed: 0,user_id,region,AGE,label_region,label_AGE,synth_AGE_classes_3_corrRL2_0.29_from256clusters,synth_region_classes_3_corrRL2_0.54_from256clusters,synth_region_classes_3_corrRL2_-0.08_from64clusters,synth_region_classes_4_corrRL2_-0.05_from64clusters,synth_AGE_classes_3_corrRL2_-0.02_from64clusters,synth_AGE_classes_4_corrRL2_0.04_from64clusters
0,1988,"trnavsky kraj, velky meder",21.0,0,1,0,0,1,3,1,0
1,25028,"trnavsky kraj, velky meder",25.0,0,2,0,1,0,1,0,2
2,40738,"trnavsky kraj, velky meder",20.0,0,1,0,1,1,3,1,0
3,54987,"trnavsky kraj, velky meder",16.0,0,0,1,0,0,3,0,1
4,65155,"trnavsky kraj, velky meder",21.0,0,1,0,0,1,2,1,1


In [9]:
d_graph_filtered_attributes.keys()

dict_keys(['pokec synthcorrattr'])

## Create a Visula plots and a CSV with some graph data; as connectivity, number of nodes, number of edges, etc.

**This is optional** (time full example: <1min)

note: igraph and networkx tools can be expanded in \data_utils\graph

In [10]:
build_analyze_graph(
    d_experiments=d_experiments,
    d_graph_attributes=d_graph_attributes,
    experiments_list=experiments_list,
    graph_nrs=graph_nrs,
    report_dir=REPORT_DIR,
)

TypeError: build_analyze_graph() got an unexpected keyword argument 'd_graph_filtered_attributes'

# Create confusion matrix and avg classification reports

note: This needs some time to run (<1min for full example ). 


### Confusion matrices 
The results are saved in the \reports\confusion_reports directory.


In [None]:
create_confusion_reports(
    d_experiments=d_experiments, 
    d_graph_filtered_attributes=d_graph_filtered_attributes, 
    report_dir=REPORT_DIR, 
    experiments_list=experiments_list, 
    graph_nrs=graph_nrs
)

### Combined reports

for plotting over multiple experiments, the code will create a combined report for each graph and biasing strategy

In [None]:
create_combined_reports(
    d_experiments=d_experiments, 
    report_dir=REPORT_DIR, 
    graph_nrs=graph_nrs, 
)