In [None]:
from functions import *
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="ticks", color_codes=True)
import pickle
import re
from sklearn.metrics.pairwise import cosine_similarity



In [1]:
from functions import *
logger = get_logger('log_experiments')
seed_everything(seed=42)

def run_experiment_gnn(database_name, 
                       K, 
                       hidden_channels, 
                       num_layers, 
                       p_dropout, 
                       Kc,
                       docf, 
                       docst,
                       activation, 
                       version,
                       num_epochs):
    
    try:
        
        heterodata_description = f'{database_name}_K_{K}_Kc_{Kc}_docf_{str(docf)}_docst_{str(docst)}'
        with open(f'./pickle_objects/preprocess/{database_name}/heterodata_pbg_{heterodata_description}_train.pickle', 'rb') as f:
            heterodata_train = pickle.load(f)
        with open(f'./pickle_objects/preprocess/{database_name}/heterodata_pbg_{heterodata_description}_test.pickle', 'rb') as f:
            heterodata_test = pickle.load(f)
                        
        logger.info(f'Loaded preprocessed heterographs {heterodata_description}.')

        training_description = f'{heterodata_description}_act_{activation}_ver_{version}'

        df_experiment = run_heterognn_splitted(database_name=database_name,
                           description=training_description, 
                           heterodata_train=heterodata_train,
                           heterodata_test=heterodata_test,
                           hidden_channels=hidden_channels,
                           num_layers=num_layers,
                           p_dropout=p_dropout,
                           num_epochs=num_epochs, 
                           aggr='sum',
                           version=version, 
                           activation=activation, 
                           verbose=True)
        
        loss_test, micro_test, acc_test, epoch_convergence = df_experiment
        df = pd.DataFrame(columns=['database_name', 'K', 'Kc', 'docf', 'docst', 'hidden_channels', 'num_layers', 'p_dropout', 'activation', 'version', 'loss_test', 'micro_test', 'acc_test', 'epoch_convergence'])
        output_list = [database_name, K, Kc, docf, docst, hidden_channels, num_layers, p_dropout, activation, version, loss_test, micro_test, acc_test, epoch_convergence]
        row = pd.Series(output_list, index=df.columns)
        df = df.append(row,ignore_index=True) 
        
#         with open('./csv_objects/summary/experiments_summary.csv', 'a') as f:
#             df.to_csv(f, mode='a', sep=';', decimal=',', index=False, header=f.tell()==0)
        
        logger.info(f'Executed experiments on {database_name} {training_description}. Results saved as pickle objects.')

    except Exception as e:
        logger.info(f'Error occurred: \n{e}')
        pass

if __name__ == '__main__':

    databse_list = ['classic4']#['20ng', '20ng', 'bbcnews', 'reuters', 'classic4', 'nsf', 'webkb', 'agnews']
    K = 50
    hidden_channels = 400
    num_layers = 3
    p_dropout = 0.2
    Kc = 400
    docf = 'replace'
    docst = 0.5
    num_epochs = 1500
    activation_list = ['fl']#['ce', 'fl']
    gnn_version_list = [1, 2, 3, 4]
    

    logger.info('Running experiments on datasets with heterographs and GNNs.')

    for database_name in databse_list:
        for activation in activation_list:
            for version in gnn_version_list:
                run_experiment_gnn(database_name, 
                                  K, 
                                  hidden_channels, 
                                  num_layers, 
                                  p_dropout, 
                                  Kc,
                                  docf, 
                                  docst,
                                  activation, 
                                  version,
                                  num_epochs)

!zip -r /content/csv_objects.zip /content/csv_objects
!cp /content/csv_objects.zip  /content/drive/MyDrive/project 


[INFO][2023-04-30 14:12:21 - Mod: 125686976 - Func: <module> - Line: 72]: Running experiments on datasets with heterographs and GNNs.
[INFO][2023-04-30 14:12:21 - Mod: 125686976 - Func: run_experiment_gnn - Line: 25]: Loaded preprocessed heterographs classic4_K_50_Kc_400_docf_replace_docst_0.5.
  0%|                                                  | 0/1500 [00:00<?, ?it/s][INFO][2023-04-30 14:12:25 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 1.1242, Loss (test): 1.1528, F1 (train): 0.2960, F1 (test): 0.0702
  0%|                                        | 1/1500 [00:03<1:19:09,  3.17s/it][INFO][2023-04-30 14:12:28 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 1.0887, Loss (test): 1.1737, F1 (train): 0.2809, F1 (test): 0.0702
  0%|                                        | 2/1500 [00:06<1:15:11,  3.01s/it][INFO][2023-04-30 14:12:30 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 1.0610, Loss (test): 

  2%|▊                                      | 32/1500 [01:35<1:12:54,  2.98s/it][INFO][2023-04-30 14:14:00 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.3836, Loss (test): 2.0973, F1 (train): 0.7785, F1 (test): 0.1617
  2%|▊                                      | 33/1500 [01:38<1:12:41,  2.97s/it][INFO][2023-04-30 14:14:03 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.3473, Loss (test): 2.2711, F1 (train): 0.8537, F1 (test): 0.1480
  2%|▉                                      | 34/1500 [01:41<1:12:22,  2.96s/it][INFO][2023-04-30 14:14:06 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.3081, Loss (test): 2.5020, F1 (train): 0.8954, F1 (test): 0.1418
  2%|▉                                      | 35/1500 [01:44<1:12:12,  2.96s/it][INFO][2023-04-30 14:14:08 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.2684, Loss (test): 2.5937, F1 (train): 0.8927, F1 (test): 0.1436


  1%|▎                                      | 13/1500 [01:33<2:56:56,  7.14s/it][INFO][2023-04-30 14:16:28 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.9176, Loss (test): 1.2328, F1 (train): 0.2809, F1 (test): 0.0702
  1%|▎                                      | 14/1500 [01:41<2:56:48,  7.14s/it][INFO][2023-04-30 14:16:36 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.8931, Loss (test): 1.2445, F1 (train): 0.2809, F1 (test): 0.0702
  1%|▍                                      | 15/1500 [01:48<2:58:03,  7.19s/it][INFO][2023-04-30 14:16:43 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.8636, Loss (test): 1.2800, F1 (train): 0.2809, F1 (test): 0.0702
  1%|▍                                      | 16/1500 [01:56<3:02:07,  7.36s/it][INFO][2023-04-30 14:16:50 - Mod: functions - Func: run_heterognn_splitted - Line: 696]: Loss (train): 0.8282, Loss (test): 1.3075, F1 (train): 0.2809, F1 (test): 0.0702


KeyboardInterrupt: 