In [2]:
!pip install spektral

Collecting spektral
[?25l  Downloading https://files.pythonhosted.org/packages/c8/97/1e7355b9de1af57531172675e4cabf83e2242baa5bf3c172f19989525630/spektral-0.6.0-py3-none-any.whl (95kB)
[K     |███▍                            | 10kB 27.9MB/s eta 0:00:01[K     |██████▉                         | 20kB 29.2MB/s eta 0:00:01[K     |██████████▎                     | 30kB 32.5MB/s eta 0:00:01[K     |█████████████▊                  | 40kB 25.1MB/s eta 0:00:01[K     |█████████████████▏              | 51kB 14.3MB/s eta 0:00:01[K     |████████████████████▋           | 61kB 13.2MB/s eta 0:00:01[K     |████████████████████████        | 71kB 12.7MB/s eta 0:00:01[K     |███████████████████████████▌    | 81kB 12.6MB/s eta 0:00:01[K     |███████████████████████████████ | 92kB 12.2MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 7.8MB/s 
Installing collected packages: spektral
Successfully installed spektral-0.6.0


In [3]:
import numpy as np
import pandas as pd
import spektral
import os
import pathlib
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from scipy import sparse
from spektral.layers import GraphConv
from spektral.layers.ops import sp_matrix_to_sp_tensor
from spektral.layers import ChebConv
import gc

np.random.seed(42)
tf.compat.v1.disable_eager_execution()

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.metrics import precision_score, recall_score

metric_scores_per_pathway = []
pathways_not_used = [] #for those having <10 gene features

kegg_pathways_path = '/content/drive/My Drive/IIITH/GCN_KEGG/KEGG_csv'
rcc_dataset_path = '/content/drive/My Drive/IIITH/GCN_KEGG/GCN_Dataset/CSV'


############## LOAD INPUT DATASET ##########################
def load_dataset(path, filename, transpose=True):
	'''
		Loads the dataset and converts into its transpose with appropriate columns
	'''
	df = pd.read_csv(os.path.join(path, filename))
	df.rename(columns={"Unnamed: 0": "pid"}, inplace=True)
	if transpose:
		df = df.astype({"pid": str})
		df = df.T
		new_header = df.iloc[0] 
		df = df[1:]
		df.columns = new_header
	return df

df_kirp = load_dataset(rcc_dataset_path,'KIRP_290_tumors_log_transformed.csv',transpose=True)
df_kirc = load_dataset(rcc_dataset_path,'KIRC_518_tumors_log_transformed.csv',transpose=True)
df_kich = load_dataset(rcc_dataset_path,'KICH_81_tumors_log_transformed.csv',transpose=True)
df_kirp['y'] = 0
df_kirc['y'] = 1
df_kich['y'] = 2
data = pd.concat([df_kirp, df_kirc, df_kich])   
y = data['y']
data.drop(['y'], inplace=True, axis=1)

from sklearn.model_selection import train_test_split
data_train, data_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0, stratify=y)



del df_kirp
del df_kirc
del df_kich
##########################################################

f1_weighted_per_fold = 0
f1_macro_per_fold =0
f1_micro_per_fold = 0
testacc_per_fold = 0
precision_per_fold = 0
recall_per_fold = 0


##########################################################



In [None]:
data_train

pid,100130426,100133144,100134869,10357,10431,136542,155060,26823,280660,317712,340602,388795,390284,391343,391714,404770,441362,442388,553137,57714,645851,652919,653553,728045,728603,728788,729884,8225,90288,1,29974,54715,87769,2,144568,53947,51146,404744,8086,65985,...,124626,131368,54764,9406,84083,8233,84891,284312,9753,221584,80345,65982,7579,7589,342945,222696,54993,146050,201516,79149,342933,90204,151112,140831,65249,57643,57688,125150,221302,9183,55055,11130,7789,158586,79364,440590,79699,7791,23140,26009
TCGA-BP-4355,0,2.56098,3.209,6.97211,9.22317,0,9.01515,0.539432,0,0,2.96362,1.23891,3.39521,0.539432,0,0,0,0,9.74619,8.80016,5.12944,1.89542,8.70462,0,0,1.49237,1.23891,9.73472,5.41655,5.90861,6.39956,0,6.43242,15.8342,0,11.1104,5.40116,0,9.35187,8.23646,...,0,0.539432,10.3427,10.7356,5.46178,9.03773,0,3.67672,8.02423,5.27473,6.2436,11.0841,6.43779,8.18795,6.72427,5.44686,8.58197,9.05012,3.19549,6.54675,1.49237,7.73289,0,6.49692,9.00373,7.70808,10.1786,8.57526,7.5106,8.02172,6.81333,6.64805,5.90545,7.91194,10.6831,1.89542,10.0004,12.0594,10.3682,9.13284
TCGA-KL-8327,0.958583,1.93742,0,5.58496,10.0984,0,9.24922,2.92672,0,0,0,1.52947,2.25508,0,0.958583,0,0,0,5.40971,6.56066,2.51526,0,10.8975,0,0,0.958583,6.42468,10.7153,2.25508,5.53241,0.958583,0.958583,4.55459,12.4552,0,9.79408,0,0,10.4048,8.68762,...,0,0,9.02723,10.1116,2.92672,10.2932,0,4.56325,6.82355,6.86863,6.40875,11.571,4.7769,9.12741,5.75034,3.2465,8.26493,9.3486,0.958583,6.76218,1.52947,9.66925,0,7.56023,7.7365,5.98332,10.5316,7.71722,6.72406,7.22937,7.27402,6.00467,4.67403,8.36739,9.86439,0,10.3328,10.5289,9.97536,9.26036
TCGA-GL-7966,0,3.79609,4.10852,6.42633,9.20353,0,8.12605,0.82122,0,0,0.468218,0.82122,2.48574,0.468218,0.82122,0,0,0,7.49237,11.4514,4.06358,6.71012,8.87751,0,0,2.98218,0,8.72899,3.96075,4.42488,1.88127,1.10454,3.37451,12.8044,0.82122,9.6308,2.27334,0,9.69094,9.98644,...,0,8.4157,9.60759,10.2314,7.43599,8.18427,0.468218,2.38344,7.14691,3.81547,6.52159,10.5451,6.40892,7.47692,7.11933,6.34231,6.74432,8.95089,0,6.63725,1.72273,8.02986,0,6.09771,8.99384,7.193,8.71194,8.10033,7.10332,9.49097,8.77131,8.46976,6.35588,9.37911,10.5985,6.4024,9.81465,12.0267,10.8971,9.50169
TCGA-A3-3367,0,4.14356,3.04383,6.21134,8.79191,0,7.69196,1.15491,0,0,2.48548,0.941106,3.47514,0,0.941106,0,0,0,9.71805,10.3266,4.05664,3.80989,7.75476,0,0.385928,2.02389,0.385928,8.6416,6.27928,4.18648,8.51428,1.50594,6.21932,14.6609,0,10.1455,3.00963,0,9.53812,8.75449,...,0,2.31811,9.8365,10.4081,6.45093,7.89147,0.385928,0.385928,8.02316,3.72744,5.73085,9.7605,6.53445,7.62325,6.87321,4.25505,8.16248,8.84579,1.65384,5.85138,1.34107,7.83058,0,6.18513,7.75476,9.3489,8.77079,8.31984,6.89939,8.74731,7.61436,8.04343,7.23807,9.14362,10.4844,4.34488,10.4487,10.9128,10.3734,10.0942
TCGA-B2-5636,0,3.3653,3.41854,6.1664,8.70678,0,6.32811,0.803227,0,0,0.803227,0,2.71975,1.31615,0.456806,0,0,0,7.84606,10.5735,1.51728,1.48311,7.8082,0,0,0,0,8.48925,4.5682,6.19778,0.456806,0,5.45215,15.7528,0,9.95833,4.85489,0,8.93803,9.33246,...,0,2.45157,10.1398,9.87986,5.91237,7.23242,0,3.07897,8.21139,4.4688,5.73111,10.6607,7.18519,7.51198,6.95767,5.65945,8.12355,8.91372,3.81123,6.13516,1.69377,7.81775,0.456806,6.949,8.69386,8.1061,9.53003,7.73731,6.83595,8.26303,7.06191,6.57355,7.27468,9.31822,10.3343,0,10.3529,11.8899,10.983,9.82366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-BP-5186,0,2.73368,2.20917,6.83242,8.98353,0,6.02827,1.21729,0,0,1.57899,0,2.49513,0.733354,1.21729,0,0,0,10.0093,9.4498,3.2144,0,8.37909,0,0,0.412836,0,9.23899,5.05021,4.83802,7.90198,0,6.41602,15.1814,0,10.4253,4.31342,0,9.09172,9.16598,...,0,1.21729,9.98729,9.79505,6.02827,7.81975,0,2.21537,7.97787,4.10954,5.47205,10.2944,5.87384,7.87678,6.86148,5.55602,8.75151,8.77242,0.995521,6.2071,1.21729,8.03,0,6.59105,8.43815,9.14841,8.78656,7.7517,6.95304,8.29637,7.02454,8.04093,7.08563,8.78981,10.3179,3.57675,10.4535,11.4812,10.4779,9.53086
TCGA-CJ-4878,0.661111,2.80595,1.53973,6.23426,9.91252,0,9.3691,0.645517,0,0,0.645517,0,3.80526,1.08997,0,0,0,0,9.26311,10.0993,5.23579,5.86534,8.5095,0,0,1.42921,0,9.72226,2.46328,4.07051,6.31184,0,6.53008,14.4775,0,11.7494,6.58208,0,9.31771,9.49291,...,0,0,9.82277,10.981,5.72571,7.91869,0,1.08997,6.75033,4.39464,6.92779,10.7401,5.6788,8.52947,7.11579,2.13291,8.63972,9.37769,1.08997,5.72571,0,7.36825,0,5.89943,8.28516,8.50503,8.99284,7.93372,6.70422,8.43618,7.47215,6.02976,6.28076,8.41248,10.3255,2.60383,10.0073,11.8241,9.98581,9.47013
TCGA-B2-4099,0,1.3297,3.34694,6.58274,9.7981,0,7.14627,1.19068,0,0,0.891497,0.513491,2.71323,0,0,0,0,0,8.96312,8.28653,2.97095,0,8.33509,0,0,2.5117,0.513491,9.42097,6.18637,5.49929,6.98522,0,6.89118,14.3859,0,10.4009,2.5117,0,9.69099,8.21152,...,0,0.891497,9.11767,10.8825,5.95739,7.31845,0,1.19068,8.13021,3.86196,6.70625,9.41556,4.71284,7.74952,6.37654,3.95755,8.11249,8.24037,2.14414,6.09006,1.19068,8.25458,0,6.66491,7.64243,7.79464,8.48378,9.38162,7.0189,8.45418,7.37063,8.01093,6.31589,8.15422,9.43797,5.37905,9.86444,11.8503,9.27329,9.55082
TCGA-F9-A7Q0,0,2.13609,3.42523,6.07267,10.0138,0,8.3075,1.00763,0,0,0.590051,0,2.01142,0,1.00763,0,0,0,7.64451,8.99534,3.18347,2.18177,10.6555,0,0,1.00763,0,8.90242,7.20988,4.93049,0,2.33411,6.70661,10.2565,0,10.296,6.95762,0,10.4331,9.97159,...,0,11.718,9.18513,9.99304,5.31862,8.53679,0,0,7.05403,5.00843,6.76301,7.44977,5.30024,8.16492,6.60753,4.56918,7.84805,8.4075,0,5.3899,0,8.72944,0.590051,6.95174,8.66265,8.59073,9.43937,9.0933,6.48266,8.16238,7.10052,7.41174,5.53846,8.4075,10.2896,4.22386,9.23195,11.985,10.7303,8.54852


In [None]:
files_to_use = os.listdir(kegg_pathways_path)
files_to_use.sort()

In [None]:



for file in files_to_use[100:]:


	print(file)
	
	pathway = pd.read_csv(os.path.join(kegg_pathways_path,file))
	pathway.rename(columns={"Unnamed: 0": "idx"}, inplace=True)
	
	genes_used = set()

	for i in range(len(pathway)):
		genes_used.add(pathway.iloc[i]['from'][4:])
		genes_used.add(pathway.iloc[i]['to'][4:])

	to_remove = []
	for gene in genes_used:
		if gene not in data.columns:
			to_remove.append(gene)

	for gene in to_remove:
		genes_used.remove(gene)

	genes_used = list(genes_used)

	for gene in to_remove:
		pathway = pathway[pathway['from']!=("hsa:"+str(gene))]
		pathway = pathway[pathway['to']!=("hsa:"+str(gene))]

	nodes = len(genes_used)
	edges = len(pathway)
	print(nodes, edges)

	if(nodes<10):
		print("NOT USED: ",file)
		continue

	genes_used.sort()


	# dict to map gene_id to node_number
	node_map = {}
	count = 0
	for gene in genes_used:
		node_map[("hsa:"+str(gene))] = count
		count += 1

	# CREATE ADJACENCY MATRIX
	adjacency_matrix = np.zeros((nodes,nodes))
	for i in range(edges):
		n1 = pathway.iloc[i]['from']
		n2 = pathway.iloc[i]['to']
		n1 = node_map[n1]
		n2 = node_map[n2]
		adjacency_matrix[n1][n2] = 1

	A = sparse.csr_matrix(adjacency_matrix)

	assert adjacency_matrix.shape[0]==nodes #sanity check
	assert edges==len(pathway)

	# CREATE NODE FEATURES MATRIX
	X_train = data_train[genes_used]
	X_test = data_test[genes_used]
	if X_train.shape[1]==0:
	  continue
  
	X_train = X_train.to_numpy()
	X_test = X_test.to_numpy()

	X_train = X_train.T
	X_test = X_test.T


	assert X_train.shape[0]==nodes
	assert X_test.shape[0]==nodes

	# CREATE TARGET LABELS
	# OneHot = False
	# if OneHot:
	# 	y = []
	# 	for i in data['y']:
	# 		if i==0:
	# 			y.append([1,0,0])
	# 		elif i==1:
	# 			y.append([0,1,0])
	# 		elif i==2:
	# 			y.append([0,0,1])
	# else:
	# 	y = data['y']

	y_train = np.asarray(y_train)
	y_test = np.asarray(y_test)

	# BUILDING MODEL
	# Parameters
	l2_reg = 5e-4         # Regularization rate for l2
	learning_rate = 5e-4  # Learning rate for SGD
	batch_size = 32       # Batch size
	epochs = 50         # Number of training epochs
	es_patience = 0      # Patience fot early stopping
	channels = 16           # Number of channels in the first layer
	K = 2  
	n_out = 3

	fltr = ChebConv.preprocess(A).astype('f4')
	assert fltr.shape==adjacency_matrix.shape


	f1_weighted_per_fold = 0
	f1_macro_per_fold =0
	f1_micro_per_fold = 0
	testacc_per_fold = 0
	precision_per_fold = 0
	recall_per_fold = 0


	kfold1 = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

	X_test = X_test.T[..., None]

	for train_ix, val_ix in kfold1.split(X_train.T, y_train):
		train_X, val_X = X_train.T[train_ix], X_train.T[val_ix]
		train_X, val_X = train_X[..., None], val_X[..., None]
		train_y, val_y = y_train[train_ix], y_train[val_ix]

		N = train_X.shape[-2]      # Number of nodes in the graphs
		F = train_X.shape[-1]      # Node features dimensionality

		# Model definition
		X_in = Input(shape=(N, F))
		A_in = Input(tensor=sp_matrix_to_sp_tensor(fltr))

		# dropout_1 = Dropout(dropout)(X_in)
		bn_1 = BatchNormalization()(X_in)
		graph_conv_1 = ChebConv(32,
					K=K,
					activation='relu',
					kernel_regularizer=l2(l2_reg),
					use_bias=False)([bn_1, A_in])
		# dropout_2 = Dropout(dropout)(graph_conv_1)
		bn_2 = BatchNormalization()(graph_conv_1)
		graph_conv_2 = ChebConv(16,
					K=K,
					activation='relu',
					use_bias=False)([bn_2, A_in])
		flatten = Flatten()(graph_conv_2)
		fc_1 = Dense(64, activation='relu')(flatten)
		dropout_1 = Dropout(0.3, seed=42)(fc_1)
		fc_2 = Dense(32, activation='relu')(dropout_1)
		output = Dense(n_out, activation='softmax')(fc_2)

		# Build model
		model = Model(inputs=[X_in, A_in], outputs=output)
		optimizer = Adam(lr=learning_rate)
		model.compile(optimizer=optimizer,
				loss='sparse_categorical_crossentropy',
				metrics=['acc'])


		# Train model
		validation_data = (val_X, val_y)
		model.fit(train_X,
			  train_y,
			  batch_size=16,
			  validation_data=validation_data,
			  epochs=10, verbose=0)

	y_pred = model.predict(X_test, verbose=0)
	y_p = []
	for row in y_pred:
	  y_p.append(np.argmax(row))
	target_names = ['0', '1', '2']
	print(classification_report(y_test, y_p, target_names=target_names))
	f1_weighted_per_fold = (f1_score(y_test, y_p, average='weighted'))
	f1_macro_per_fold = (f1_score(y_test, y_p, average='macro'))
	f1_micro_per_fold = (f1_score(y_test, y_p, average='micro'))
	testacc_per_fold = (accuracy_score(y_test, y_p))
	precision_per_fold = (precision_score(y_test, y_p,  average='micro'))
	recall_per_fold = (recall_score(y_test, y_p,  average='micro'))


	# APPEND METRICS
	scores = [file, (f1_weighted_per_fold), (f1_macro_per_fold), (f1_micro_per_fold), (testacc_per_fold), (precision_per_fold), (recall_per_fold)]
	print(scores)
	# metric_scores_per_pathway.append(scores)


	# GENERATE OUTPUT CSV
	X_full = np.concatenate([X_train.T[..., None], X_test])
	gcn_pathway_output = model.predict(X_full)
	filename_output_csv = os.path.join("/content/drive/My Drive/IIITH/GCN_KEGG/GCN_pathway_output_csv",file)
	np.savetxt(filename_output_csv,gcn_pathway_output)


	tf.keras.backend.clear_session()
	del pathway
	del X_train
	del X_test
	del train_X
	del val_X
	del A
	del adjacency_matrix
	del train_y
	del val_y
	del fltr
	del genes_used
	del node_map
	del to_remove
	del X_full
	del model

	gc.collect()

# SAVE METRICS FOR ALL PATHWAYS
# metric_scores_df = pd.DataFrame(metric_scores_per_pathway, index=["Name", "f1-weighted", "f1-macro", "f1-micro", "test-acc", "prec", "recall"])
# metric_scores_df.to_csv(index=False)

hsa04972 .csv
22 22
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.98      0.93      0.96        58
           1       0.97      0.98      0.98       104
           2       0.89      1.00      0.94        16

    accuracy                           0.97       178
   macro avg       0.95      0.97      0.96       178
weighted avg       0.97      0.97      0.97       178

['hsa04972 .csv', 0.9663169301712577, 0.9576684126671798, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607]
hsa04973 .csv
13 14


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.89      0.83      0.86        58
           1       0.92      0.95      0.93       104
           2       0.88      0.88      0.88        16

    accuracy                           0.90       178
   macro avg       0.89      0.88      0.89       178
weighted avg       0.90      0.90      0.90       178

['hsa04973 .csv', 0.9036312426178866, 0.8887017070979336, 0.9044943820224719, 0.9044943820224719, 0.9044943820224719, 0.9044943820224719]
hsa04976 .csv
30 27


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.94      0.84      0.89        58
           1       0.94      0.97      0.95       104
           2       0.83      0.94      0.88        16

    accuracy                           0.93       178
   macro avg       0.90      0.92      0.91       178
weighted avg       0.93      0.93      0.93       178

['hsa04976 .csv', 0.9263186177201816, 0.9086974069216023, 0.9269662921348315, 0.9269662921348315, 0.9269662921348315, 0.9269662921348315]
hsa04977 .csv
0 0
NOT USED:  hsa04977 .csv
hsa04978 .csv
15 13


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.90      0.78      0.83        58
           1       0.88      0.95      0.92       104
           2       0.94      0.94      0.94        16

    accuracy                           0.89       178
   macro avg       0.91      0.89      0.90       178
weighted avg       0.89      0.89      0.89       178

['hsa04978 .csv', 0.8913857677902621, 0.8958333333333334, 0.893258426966292, 0.8932584269662921, 0.8932584269662921, 0.8932584269662921]
hsa04979 .csv
31 37


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.96      0.95      0.96        58
           1       0.98      0.98      0.98       104
           2       0.88      0.94      0.91        16

    accuracy                           0.97       178
   macro avg       0.94      0.96      0.95       178
weighted avg       0.97      0.97      0.97       178

['hsa04979 .csv', 0.966425367500111, 0.9487939596635249, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607]
hsa05200 .csv
464 2098


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.98      0.95      0.96        58
           1       0.96      0.98      0.97       104
           2       0.94      0.94      0.94        16

    accuracy                           0.97       178
   macro avg       0.96      0.96      0.96       178
weighted avg       0.97      0.97      0.97       178

['hsa05200 .csv', 0.9662555264565909, 0.9579469507101086, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607]
hsa05202 .csv
19 12


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.94      0.84      0.89        58
           1       0.91      0.97      0.94       104
           2       0.93      0.88      0.90        16

    accuracy                           0.92       178
   macro avg       0.93      0.90      0.91       178
weighted avg       0.92      0.92      0.92       178

['hsa05202 .csv', 0.9204267869827517, 0.9112232603605447, 0.9213483146067416, 0.9213483146067416, 0.9213483146067416, 0.9213483146067416]
hsa05204 .csv
65 480


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       1.00      0.90      0.95        58
           1       0.93      1.00      0.96       104
           2       1.00      0.88      0.93        16

    accuracy                           0.96       178
   macro avg       0.98      0.92      0.95       178
weighted avg       0.96      0.96      0.95       178

['hsa05204 .csv', 0.954594635493512, 0.9472502805836139, 0.9550561797752809, 0.9550561797752809, 0.9550561797752809, 0.9550561797752809]
hsa05205 .csv
200 803


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.96      0.93      0.95        58
           1       0.98      0.97      0.98       104
           2       0.84      1.00      0.91        16

    accuracy                           0.96       178
   macro avg       0.93      0.97      0.95       178
weighted avg       0.96      0.96      0.96       178

['hsa05205 .csv', 0.9610329357018992, 0.9458331819887884, 0.9606741573033708, 0.9606741573033708, 0.9606741573033708, 0.9606741573033708]
hsa05211 .csv
55 111


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       1.00      0.93      0.96        58
           1       0.96      0.99      0.98       104
           2       0.88      0.94      0.91        16

    accuracy                           0.97       178
   macro avg       0.95      0.95      0.95       178
weighted avg       0.97      0.97      0.97       178

['hsa05211 .csv', 0.9663459044815879, 0.9498933136373894, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607, 0.9662921348314607]
hsa05230 .csv
65 227


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.93      0.97      0.95        58
           1       0.98      0.95      0.97       104
           2       0.94      1.00      0.97        16

    accuracy                           0.96       178
   macro avg       0.95      0.97      0.96       178
weighted avg       0.96      0.96      0.96       178

['hsa05230 .csv', 0.9607571879808061, 0.9615677235354788, 0.9606741573033708, 0.9606741573033708, 0.9606741573033708, 0.9606741573033708]
hsa05231 .csv
78 285


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.98      0.74      0.84        58
           1       0.88      0.98      0.93       104
           2       0.89      1.00      0.94        16

    accuracy                           0.90       178
   macro avg       0.92      0.91      0.90       178
weighted avg       0.91      0.90      0.90       178

['hsa05231 .csv', 0.9011075727533098, 0.9038621509209745, 0.9044943820224719, 0.9044943820224719, 0.9044943820224719, 0.9044943820224719]
hsa05235 .csv
86 344


  degrees = np.power(np.array(A.sum(1)), k).flatten()


              precision    recall  f1-score   support

           0       0.96      0.88      0.92        58
           1       0.94      0.98      0.96       104
           2       0.88      0.94      0.91        16

    accuracy                           0.94       178
   macro avg       0.93      0.93      0.93       178
weighted avg       0.94      0.94      0.94       178

['hsa05235 .csv', 0.943360806409354, 0.9300913263177414, 0.9438202247191011, 0.9438202247191011, 0.9438202247191011, 0.9438202247191011]


In [None]:
y_labels = np.concatenate([y_train, y_test])
filename_output_csv = os.path.join("/content/drive/My Drive/IIITH/GCN_KEGG/KEGG_csv","subtypes_labels.csv")
np.savetxt(filename_output_csv,y_labels)

In [4]:
y_test

TCGA-UZ-A9PN    0
TCGA-CZ-5989    1
TCGA-A3-A6NI    1
TCGA-B0-4703    1
TCGA-B8-4153    1
               ..
TCGA-PJ-A5Z8    0
TCGA-BP-4760    1
TCGA-CJ-4874    1
TCGA-CJ-4901    1
TCGA-KO-8405    2
Name: y, Length: 178, dtype: int64