In [1]:
import numpy as np
import math
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit, cross_val_score

In [84]:
def read_label_matrix(file_path):
    def read():
        with open(file_path, "r") as f:
            for line in f:
                yield int(line.strip())
    return np.array(list(read()))

In [85]:
def read_kernel_matrix(file_path):
    def read():
        with open(file_path, 'r') as f:
            for line in f:
                yield line.strip().split(' ')
    return np.array(list(read()))

In [86]:
# Finished: MUTAG, PROTEINS, NCI1

In [110]:
DATASET = 'MUTAG'
LABEL_PATH = '/Users/Fabian/Documents/HPI/Master/18SS/smart_representations/datasets/{}/{}_graph_labels.txt'.format(DATASET, DATASET)
KERNEL_PATH = '/Users/Fabian/Documents/HPI/Master/18SS/smart_representations/results/mlg/{}_output.txt'.format(DATASET)

In [111]:
label_matrix = read_label_matrix(LABEL_PATH)
kernel_matrix = read_kernel_matrix(KERNEL_PATH)

In [112]:
def score_n_fold(train, test, n, c):
    cv = ShuffleSplit(n_splits=n, test_size=0.33)
    clf = svm.SVC(kernel='precomputed', C=c, class_weight='balanced')
    return cross_val_score(clf, kernel_matrix, label_matrix, cv=cv).mean(), c

In [113]:
score_n_fold(kernel_matrix, label_matrix, 10, 1000)

(0.8555555555555555, 1000)

In [114]:
penalties = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
scores = [score_n_fold(kernel_matrix, label_matrix, 10, c) for c in penalties]
scores

[(0.5555555555555556, 0.001),
 (0.5253968253968253, 0.01),
 (0.638095238095238, 0.1),
 (0.8904761904761905, 1),
 (0.8317460317460317, 10),
 (0.8571428571428571, 100),
 (0.8714285714285713, 1000)]

In [139]:
from scipy import sparse as sps

def load_dense_matrix(data_dir, dataset):
    file_start = '{}/{}'.format(data_dir, dataset)
    
        
# for datadir in os.listdir(rootdir):
    offsets = np.loadtxt(file_start + '_graph_indicator.txt', dtype=np.int, delimiter=',') - 1
    offs = np.append([0], np.append(np.where((offsets[1:] - offsets[:-1])>0)[0]+1, len(offsets)))
    A_data = np.loadtxt(file_start +'_A.txt', dtype=np.int, delimiter=',') - 1
    A_mat = sps.csr_matrix((np.ones(A_data.shape[0]), (A_data[:, 0], A_data[:, 1])), dtype=np.int)
    As = []
    for i in range(1, len(offs)):
        As.append(A_mat[offs[i-1]:offs[i],offs[i-1]:offs[i]])
    am = [x.astype(np.float64) for x in As]
    return am

In [140]:
DATASET_PATH = '/Users/Fabian/Documents/HPI/Master/18SS/smart_representations/datasets/MUTAG'
matrices = load_dense_matrix(DATASET_PATH, 'MUTAG')

In [141]:
matrices

[<17x17 sparse matrix of type '<class 'numpy.float64'>'
 	with 38 stored elements in Compressed Sparse Row format>,
 <13x13 sparse matrix of type '<class 'numpy.float64'>'
 	with 28 stored elements in Compressed Sparse Row format>,
 <13x13 sparse matrix of type '<class 'numpy.float64'>'
 	with 28 stored elements in Compressed Sparse Row format>,
 <19x19 sparse matrix of type '<class 'numpy.float64'>'
 	with 44 stored elements in Compressed Sparse Row format>,
 <11x11 sparse matrix of type '<class 'numpy.float64'>'
 	with 22 stored elements in Compressed Sparse Row format>,
 <28x28 sparse matrix of type '<class 'numpy.float64'>'
 	with 62 stored elements in Compressed Sparse Row format>,
 <16x16 sparse matrix of type '<class 'numpy.float64'>'
 	with 34 stored elements in Compressed Sparse Row format>,
 <20x20 sparse matrix of type '<class 'numpy.float64'>'
 	with 44 stored elements in Compressed Sparse Row format>,
 <12x12 sparse matrix of type '<class 'numpy.float64'>'
 	with 26 stored

In [142]:
convert_to_mlg_format(matrices)

['188',
 '17',
 '0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0',
 '1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
 '0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0',
 '0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0',
 '0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0',
 '1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0',
 '0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0',
 '0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0',
 '0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0',
 '0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0',
 '0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1',
 '0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0',
 '0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0',
 '13',
 '0 1 0 0 0 0 0 0 0 1 0 0 0',
 '1 0 1 0 0 0 0 0 0 0 0 0 0',
 '0 1 0 1 0 0 0 1 0 0 0 0 0',
 '0 0 1 0 1 0 0 0 0 0 0 0 0',
 '0 0 0 1 0 1 0 0 0 0 0 0 0',
 '0 0 0 0 1 0 1 0 0 0 0 0 0',
 '0 0 0 0 0 1 0 1 0 0 0 0 0',
 '0 0 1 0 0 0 1 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 1 0 1 1 0 0',
 '1 0 0 0 0 0 0 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 0 1 0 0 1 1',
 '

In [100]:
def convert_to_mlg_format(matrices):
    lines = []
    lines.append(str(len(matrices)))
    for matrix in matrices:
        lines.append(str(matrix.shape[0]))
        for row in matrix.todense():
            lines.append(' '.join([str(int(x)) for x in row.tolist()[0]]))
    return lines        

In [101]:
' '.join([str(int(x)) for x in matrices[3][0].todense()[0].tolist()[0]])

'0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0'

In [102]:
lines = convert_to_mlg_format(matrices[3])

In [103]:
lines

['188',
 '17',
 '0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0',
 '1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
 '0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0',
 '0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0',
 '0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0',
 '1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0',
 '0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0',
 '0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0',
 '0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0',
 '0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0',
 '0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1',
 '0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0',
 '0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0',
 '13',
 '0 1 0 0 0 0 0 0 0 1 0 0 0',
 '1 0 1 0 0 0 0 0 0 0 0 0 0',
 '0 1 0 1 0 0 0 1 0 0 0 0 0',
 '0 0 1 0 1 0 0 0 0 0 0 0 0',
 '0 0 0 1 0 1 0 0 0 0 0 0 0',
 '0 0 0 0 1 0 1 0 0 0 0 0 0',
 '0 0 0 0 0 1 0 1 0 0 0 0 0',
 '0 0 1 0 0 0 1 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 1 0 1 1 0 0',
 '1 0 0 0 0 0 0 0 1 0 0 0 0',
 '0 0 0 0 0 0 0 0 1 0 0 1 1',
 '

In [104]:
with open('/Users/Fabian/Documents/HPI/Master/18SS/smart_representations/datasets/MUTAG/MUTAG_dense.txt', 'w') as f:
    f.write('\n'.join(lines))
    