-
Notifications
You must be signed in to change notification settings - Fork 1
/
__main__.py
129 lines (114 loc) · 5.01 KB
/
__main__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import csv
import os
import sys
import time
from tool_kit.colors import bcolors
from configuration.configuration import getConfig
from dataset_loader.loader import data_loader
from graph_generation.full_graph_generation import graph_generation
from graph_generation.sub_graph_generation_random_selection import random_selection
from graph_generation.sub_graph_generation_random_walk import random_walk
from dataset_generation.graph_feature_extraction import structural_feature_extraction
from dataset_generation.xgboost_dataset_generator import xgboost_generator
from dataset_generation.decision_tree_dataset_generator import Decision_Tree
from regressor.xgboost_regressor import XgboostRegression
from subgraph_embadding.sub2vec import sub2vec
from Test_Module.Test import GA_Feature_Selection
from Test_Module.test_regression_model import test_dataset_cross_validation
from DB.csv_db import CSV_DB
from regressor.RF_Regressor import RandomForestReg
from DB.schema_definition import DB
from graph_generation.sub_graph_generation_algo_feature_selection import algo_feature_selection
from prediction.challenge import challenge_prediction
from feature_selection.SA_feature_selection import simulated_annealing_feature_selection
from Test_Module.benchmark import benchmark
from dataset_loader.full_graph_feature_extraction import full_graph_fs
from dataset_generation.global_local_feature_extraction import global_local_fs
import logging
logging.basicConfig(filename='progression_log.txt', level=logging.DEBUG)
modules_dict = {}
# modules_dict["DB"] = DB
modules_dict["DB"] = CSV_DB
# db = DB()
db = CSV_DB()
modules_dict['data_loader'] = data_loader
modules_dict['full_graph_fs'] = full_graph_fs
modules_dict['graph_generation'] = graph_generation
modules_dict['algo_feature_selection'] = algo_feature_selection
modules_dict['random_selection'] = random_selection
modules_dict['random_walk'] = random_walk
modules_dict['structural_feature_extraction'] = structural_feature_extraction
modules_dict['xgboost_generator'] = xgboost_generator
modules_dict['Decision_Tree'] = Decision_Tree
modules_dict['global_local_fs'] = global_local_fs
modules_dict['full_graph_fs'] = full_graph_fs
modules_dict['sub2vec'] = sub2vec
modules_dict['RandomForestReg'] = RandomForestReg
modules_dict['XgboostRegression'] = XgboostRegression
modules_dict['GA_Feature_Selection'] = GA_Feature_Selection
modules_dict['test_dataset_cross_validation'] = test_dataset_cross_validation
modules_dict['challenge_prediction'] = challenge_prediction
modules_dict['simulated_annealing_feature_selection'] = simulated_annealing_feature_selection
modules_dict['benchmark'] = benchmark
window_start = getConfig().eval("DEFAULT", "start_date")
disable_prints = getConfig().eval("DEFAULT", "disable_prints")
if disable_prints:
sys.stdout = open(os.devnull, 'w')
newbmrk = os.path.isfile("benchmark.csv")
bmrk_file = open("benchmark.csv", 'a', newline='')
bmrk_results = csv.DictWriter(bmrk_file,
["time", "jobnumber", "config", "window_size", "window_start", "dones", "posts",
"authors"] + list(modules_dict.keys()),
dialect="excel", lineterminator="\n")
if not newbmrk:
bmrk_results.writeheader()
modules_dict["DB"] = lambda x: x
pipeline = []
for module in getConfig().sections():
parameters = {}
if modules_dict.get(module):
pipeline.append(modules_dict.get(module)(db))
bmrk = {"config": getConfig().getfilename(), "window_start": "setup"}
for module in pipeline:
print(bcolors.YELLOW + 'Started setup ' + module.__class__.__name__ + bcolors.ENDC)
logging.info(f"starting setup {module.__class__.__name__}")
T = time.perf_counter()
module.setUp()
T = time.perf_counter() - T
logging.info(f"finished setup {module.__class__.__name__}")
print(bcolors.YELLOW + 'Finished setup ' + module.__class__.__name__ + bcolors.ENDC)
bmrk[module.__class__.__name__] = T
bmrk_results.writerow(bmrk)
bmrk_file.flush()
bmrk = {"config": getConfig().getfilename(), "window_start": "execute"}
for module in pipeline:
logging.info(f"Started executing {module.__class__.__name__}")
T = time.time()
print(bcolors.YELLOW + 'Started executing ' + module.__class__.__name__ + bcolors.ENDC)
module.execute(window_start)
print(bcolors.YELLOW + 'Finished executing ' + module.__class__.__name__ + bcolors.ENDC)
T = time.time() - T
logging.info(f"Finished executing {module.__class__.__name__}")
bmrk[module.__class__.__name__] = T
bmrk_results.writerow(bmrk)
bmrk_file.flush()
if disable_prints:
sys.stdout = sys.__stdout__
# x = pipeline[0].execQuery('SELECT * FROM dataset_feature_correlation WHERE feature1=\'f1\'')
# for record in x:
# print(record)
"""
pipeline:
|--- create database connection
| |--- create schema if needed
|--- dataset loader
| |--- create a correlation matrix
|--- graph generation
| |--- create a table fit to the schema
| |--- fill it with correlations from the matrix
|--- subgraph generation
| |--- randomly choose records from the tables (the full graph)
.
.
.
"""