In [3]:
%load_ext autoreload
%autoreload 2

In [10]:
import numpy as np
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Perceptron
from sklearn.model_selection import GridSearchCV
from transformers.wl_graph_kernel_transformer import WLGraphKernelTransformer
import graph_helper
import dataset_helper
import wl
import os
import pickle
import json

results = {}
for cache_file in dataset_helper.get_all_cached_graph_datasets():
    graph_dataset_cache_file = cache_file.split('/')[-1]
    result_file = 'data/results/{}.results.npy'.format(graph_dataset_cache_file)
    print('{}\tDataset File: {}'.format('#' * 10, graph_dataset_cache_file))

    if os.path.exists(result_file):
        print('\tAlready calculated result: {}'.format(result_file))
        continue

    if not os.path.exists(cache_file):
        print('\tCould not find cachefile: "{}". Skipping.'.format(cache_file))
        continue
        
    X, Y = dataset_helper.get_dataset('', use_cached = True, cache_file = cache_file)
    X, Y = np.array(X), np.array(Y)

    p = Pipeline([
        ('wl_transformer', WLGraphKernelTransformer()),
        ('clf', sklearn.linear_model.PassiveAggressiveClassifier())
    ])

    param_grid = dict(
        wl_transformer__H= [1],
        wl_transformer__n_jobs= [1],
        clf__n_iter=[100],
        clf__class_weight = ['balanced']
    )

    cv = GridSearchCV(estimator = p, param_grid=param_grid, cv=3, scoring = 'f1_macro', n_jobs=1, verbose = 11)
    gscv_result = cv.fit(X, Y)
    with open(result_file, 'wb') as f:
        pickle.dump(gscv_result.cv_results_, f)
    results[graph_dataset_cache_file] = gscv_result.cv_results_
    #gscv_result.best_estimator_, gscv_result.cv_results_

##########	Dataset File: dataset_graph_cooccurrence_1_no-nouns_cade-ana.npy
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] clf__n_iter=100, wl_transformer__n_jobs=1, wl_transformer__H=1, clf__class_weight=balanced 
WLGraphKernelTransformer.fit: len(X)=27317, H=1
Number of original labels 136859
K original is computed
Iteration 0: phi is computed
	Graph          0/1500
	Graph        750/1500
Number of original labels 136859
K original is computed
Iteration 0: phi is computed
	Graph          0/1500
	Graph        750/1500
Number of original labels 136859
K original is computed
Iteration 0: phi is computed
	Graph          0/1500
	Graph        750/1500
Number of original labels 136859
K original is computed
Iteration 0: phi is computed
	Graph          0/1500
	Graph        750/1500
Number of original labels 136859
K original is computed
Iteration 0: phi is computed
	Graph          0/1500


KeyboardInterrupt: 