#### Training Process

##### Preprocessing of training data and label

In [1]:
import pandas as pd
from tensorflow import keras
from scFTAT import Transformer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dense
from tensorflow.python.keras.layers import Activation, SpatialDropout1D, Convolution1D, GlobalMaxPooling1D
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

feature = []  
expression = pd.read_csv('../finaldata/mouse_kidney/train_data.csv')
file = open('../finaldata/mouse_kidney/train_data.csv')
lines = file.readlines() 
line_0 = lines[0].strip('\n').split(',') 

for i in range(1,len(line_0)):
    tem = list(expression[line_0[i]])    
    feature.append(list(tem))
    file.close()

feature_train = list(feature)
label = []
file = open('../finaldata/mouse_kidney/train_labels.csv')
lable_lines = file.readlines()
lable_line_0 = lable_lines[0].strip('\n').split(',')
file.close()

for i in range(1,len(lable_line_0)):
    label.append(int(lable_line_0[i]))

y_train=[]
for i in label:
    tem =[]
    for j in range(0,17):
        tem.append(0)
    tem[i-1]=1
    y_train.append(tem)

##### Processing of prediction data

In [2]:
feature = []  
testexpression = pd.read_csv('../finaldata/mouse_kidney/test_data.csv')
file = open('../finaldata/mouse_kidney/test_data.csv') 
lines = file.readlines() 
line_0 = lines[0].strip('\n').split(',') 
for i in range(1,len(line_0)):
    tem = list(testexpression[line_0[i]])
    feature.append(tem)
file.close()
feature_test = list(feature)

##### Parameters

In [None]:
activation = 'relu'
dropout = 0.2
epoch = 150
params_dict = {'kernel_initializer': 'glorot_uniform','kernel_regularizer': l2(0.01),}
num_layers = 4
model_size = 40
num_heads = 5
dff_size = 128
maxlen = 16
vocab_size = 121

##### Training and prediction, and outputs

In [4]:
enc_inputs = keras.layers.Input(shape=(maxlen,))
transformer = Transformer(num_layers=num_layers, model_size=model_size, num_heads=num_heads, dff_size=dff_size,
                          vocab_size=vocab_size+1, maxlen=maxlen)
final_output = transformer(enc_inputs)
final_output = SpatialDropout1D(0.2)(final_output)
final_output = Convolution1D(filters=64,kernel_size=15, padding='same', kernel_initializer='glorot_normal',
                             kernel_regularizer=l2(0.001))(final_output)
final_output = Activation('relu')(final_output)
final_output = GlobalMaxPooling1D()(final_output)
final_output = Dense(17,'softmax',**params_dict)(final_output)

model = Model(inputs=enc_inputs,outputs=final_output)
model.compile(optimizer=Adam(learning_rate=0.001),loss='categorical_crossentropy',metrics=['accuracy'])

feature_train = [list(i) for i in feature_train]
feature_test = [list(i) for i in feature_test]
for i in range(epoch):
    print(i)
    model.fit(feature_train,y_train,verbose=1,epochs=i+1,initial_epoch=i,batch_size=32,shuffle=True)


a = model.predict(x=feature_test,batch_size=32)
print(a)
with open('../modelsave/mkidney_epoch200.txt','w',newline='') as f:
    for i in range(len(a)):
        f.write(str(i))
        f.write(',')
        for j in range(len(a[i])):
            f.write(str(a[i][j]))
            f.write(',')
        f.write('\n')

The following Variables were used a Lambda layer's call (tf.compat.v1.nn.conv1d), but
are not present in its tracked objects:
  <tf.Variable 'conv1d/kernel:0' shape=(15, 122, 64) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (tf.nn.bias_add), but
are not present in its tracked objects:
  <tf.Variable 'conv1d/bias:0' shape=(64,) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
0
1
Epoch 2/2
2
Epoch 3/3
3
Epoch 4/4
4
Epoch 5/5
5
Epoch 6/6
6
Epoch 7/7
7
Epoch 8/8
8
Epoch 9/9
9
Epoch 10/10
10
Epoch 11/11
11
Epoch 12/12
12
Epoch 13/13
13
Epoch 14/14
14
Epoch 15/15
15
Epoch 16/16
16
Epoch 17/17
17
Epoch 18/18
18
E

#### Load the predictions, calculating its prediction metrics

In [5]:
from sklearn.metrics import f1_score,precision_score,recall_score,matthews_corrcoef

probability = open('../modelsave/mkidney_epoch200.txt')
lines = probability.readlines()
a = []
for i in lines:
    temp = i.strip('\n')
    temp_list = temp.split(',')
    t = []
    for j in range(1,len(temp_list)-1):
        t.append(float(temp_list[j]))
    a.append(t)

labela = []
for i in a:
    labela.append(i.index(max(i))+1)
probability.close()

labelb = open('../finaldata/mouse_kidney/test_labels.csv')
linesb = labelb.readlines()
a = linesb[0].strip('\n').split(',')
b= []
for i in range(1,len(a)):
    b.append(int(a[i]))

count = 0
for i in range(len(b)):
    if (b[i]==labela[i]):
        count +=1
print('accuracy:',count/len(b))
f1 = f1_score(y_true=b,y_pred=labela,average='macro')
precision = precision_score(y_true=b,y_pred=labela,average='macro')
recall = recall_score(y_true=b,y_pred=labela,average='macro')
mcc = matthews_corrcoef(y_true=b,y_pred=labela,sample_weight=None)
print('f1:',f1)
print('precision:',precision)
print('recall:',recall)
print('mcc:',mcc)

accuracy: 0.9040948275862069
f1: 0.919012520564608
precision: 0.9386245533501208
recall: 0.9045070117007556
mcc: 0.8924284844266591
