In [1]:
from pathlib import Path

from report_config import DATA_DIR, REPORT_DIR, SUB_EXPERIMENTS_NAME_START, ATTRIBUTES
from report_utils import (create_reporting_process_dict, 
                          get_filtered_attributes, 
                          build_analyze_graph, 
                          create_confusion_reports, 
                          create_combined_reports, 
)
                          

  from .autonotebook import tqdm as notebook_tqdm


## Process the experiment data directories 

**This is mandatory**

Creating a dictionary of the experiment names, data directories, and corresponding experiment data files.

In [2]:
d_experiments = create_reporting_process_dict(data_dir=DATA_DIR, 
                                              sub_ex_name_start=SUB_EXPERIMENTS_NAME_START, 
                                              )

Experiment/Graph name(s) found: 
pokec semi
/Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_semi/resources
With
  Graph 0
pokec distinct
/Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_distinct/resources
With
  Graph 1
  Graph 0


In [3]:
# print(d_experiments)
print(d_experiments.keys())
for key in d_experiments.keys():
    print(key)
    print(d_experiments[key]["Graph_nrs"])
    print(d_experiments[key]["Experiment_dir"])


dict_keys(['pokec semi', 'pokec distinct'])
pokec semi
[0]
/Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_semi
pokec distinct
[1, 0]
/Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_distinct


## Choose the experiments and graphs to process

**This is mandatory**

- set the names of the experiment(s), and the index of the graph(s) to process

In [4]:
experiments_list = [exp for exp in d_experiments.keys()]

# experiments_list = ['pokec distinct']
# print(experiments_list)
graph_nrs = [
    0, 
    1, 
    ]

# print("Attributes: ", ATTRIBUTES)
attributes = [f"label_{attr}" for attr in ATTRIBUTES]
# print(attributes)

d_graph_filtered_attributes = get_filtered_attributes(d_experiments=d_experiments,
                                                                     experiments_list=experiments_list, 
                                                                     graph_nrs=graph_nrs,
                                                                     attributes=attributes, 
                                                                     )
d_graph_filtered_attributes

Graph 1 of experiment pokec semi not found in the dictionary


defaultdict(dict,
            {'pokec semi': {'Graph_nr_0':       user_id  label_region  label_AGE
              0        2142             0          2
              1        3647             0          2
              2        4916             0          2
              3        7587             0          2
              4        8124             0          2
              ...       ...           ...        ...
              9235  1596545             3          2
              9236  1599400             3          2
              9237  1621406             3          2
              9238  1625668             3          2
              9239  1625748             3          2
              
              [9240 rows x 3 columns]},
             'pokec distinct': {'Graph_nr_0':       user_id  label_region  label_AGE
              0        1988             0          1
              1       25028             0          2
              2       40738             0          1
              3    

In [5]:
d_graph_filtered_attributes.keys()

dict_keys(['pokec semi', 'pokec distinct'])

In [6]:
df_filtered_attributes = d_graph_filtered_attributes.get('pokec distinct').get(f"Graph_nr_0")
df_filtered_attributes.head()

Unnamed: 0,user_id,label_region,label_AGE
0,1988,0,1
1,25028,0,2
2,40738,0,1
3,54987,0,0
4,65155,0,1


## Create a Visula plots and a CSV with some graph data; as connectivity, number of nodes, number of edges, etc.

**This is optional** (time full example: <1min)

note: igraph and networkx tools can be expanded in \data_utils\graph

In [7]:
build_analyze_graph(d_experiments=d_experiments,
                                    experiments_list=experiments_list,
                                    graph_nrs=graph_nrs,
                                    report_dir=REPORT_DIR,
                                    attributes=ATTRIBUTES,
                                    )

Graph 0 of experiment pokec semi analyzed                                     
 and saved to /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/reports / graph_plots and / graph_specs
Graph 0 of experiment pokec distinct analyzed                                     
 and saved to /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/reports / graph_plots and / graph_specs
Graph 1 of experiment pokec distinct analyzed                                     
 and saved to /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/reports / graph_plots and / graph_specs


# Create confusion matrix and avg classification reports

note: This needs some time to run (<1min for full example ). 


### Confusion matrices 
The results are saved in the \reports\confusion_reports directory.


In [8]:
create_confusion_reports(d_experiments=d_experiments, 
                                             d_graph_filtered_attributes=d_graph_filtered_attributes, 
                                             report_dir=REPORT_DIR, 
                                             experiments_list=experiments_list, 
                                             graph_nrs=graph_nrs
                                             )

Graph dir: /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_semi/results/graph_0, 
                             exp_graph_name: pokec semi 0, 
                             graph_name: graph_0
Confusion reports for pokec semi 0 created                 
and saved in /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/reports/confusion_reports.
Graph dir: /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_distinct/results/graph_0, 
                             exp_graph_name: pokec distinct 0, 
                             graph_name: graph_0
Confusion reports for pokec distinct 0 created                 
and saved in /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/reports/confusion_reports.
Graph dir: /Volumes/tcs_jf_fair_node_sampling/test_exprmt_diff_nr_graphs/data/pokec_distinct/results/graph_1, 
                             exp_graph_name: pokec distinct 1, 
                             graph_name: graph_

### Combined reports

for plotting over multiple experiments, the code will create a combined report for each graph and biasing strategy

In [9]:
create_combined_reports(d_experiments=d_experiments, 
                                       report_dir=REPORT_DIR, 
                                       graph_nrs=graph_nrs, 
                                       )

  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
  combined_df = pd.concat([combined_df, pd.DataFrame({
