In [1]:
import sys
sys.path.append("..")
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import  Dataset, DataLoader
import pandas as pd
import Evaluation_par as ep
import model.CycleDNN_v1 as md
import os
import csv
import matplotlib.pyplot as plt

import argparse

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument("--num_cell", type=int, default=2, help="number of cellline")
parser.add_argument("--choose_cell", type=str, default='0 1', help="choose cellline")
parser.add_argument("--num_temperature", type=int, default=10, help="number of temperature")
parser.add_argument("--epoch", type=int, default=0, help="epoch to start training from")
parser.add_argument("--n_epochs", type=int, default=4000, help="number of epochs of training")
parser.add_argument("--dataset_name", type=str, default="c12345", help="name of the dataset")
parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")

parser.add_argument("--lr", type=float, default=0.01, help="adam: learning rate")
parser.add_argument("--momentum", type=float, default=0.9, help="adam: momentum")
parser.add_argument("--weight_decay", type=float, default=0, help="adam: weight_decay")
parser.add_argument("--step_size", type=int, default=300, help="adam: step_size")
parser.add_argument("--gamma", type=float, default=0.95, help="adam: gamma")

parser.add_argument("--clean_threshold", type=float, default=0.02, help="clean threshold to filter ")

opt = parser.parse_args(args=[])
#opt = parser.parse_args()
print(opt)

Namespace(batch_size=128, choose_cell='0 1', clean_threshold=0.02, dataset_name='c12345', epoch=0, gamma=0.95, lr=0.01, momentum=0.9, n_epochs=4000, num_cell=2, num_temperature=10, step_size=300, weight_decay=0)


In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
df = pd.read_csv('.\data\%s.csv'% opt.dataset_name)
df = np.asarray(df)
dfc = np.asarray(df)
cc = opt.choose_cell.split()
choose_cell = []
for i in range(len(cc)):
    choose_cell.append(int(cc[i]))


In [5]:
clean_thers = opt.clean_threshold
j = 0
for i in range(df.shape[0]):
    k = (df[i,:]-clean_thers<0).sum().item()
    if k>0:
        dfc = np.delete(dfc, i-j, 0)
        j = j +1

df = dfc

In [6]:
df_data = np.zeros((df.shape[0] ,opt.num_cell,opt.num_temperature),dtype=float)
k = 0
for i in choose_cell:
    df_data[:,k,:] = np.asarray(df[:,i*10:i*10+10])
    k=k+1

CETSA_cell = torch.Tensor(df_data)
#viewCETSA_cell = df_data

In [7]:
train_index = np.array(list(range(0,df.shape[0])))

In [8]:
train_list, test_list, train_label, test_label = train_test_split(train_index, train_index, test_size=0.3,random_state=202)

In [9]:
CETSA_train = CETSA_cell[train_list,:,:]
CETSA_test = CETSA_cell[test_list,:,:]

In [10]:
path = "./result/"
dirs = os.listdir(path)
exp_num = str(len(dirs)+1)

In [11]:
class Dataset(Dataset):
    def __init__(self, input, label):
        super(Dataset).__init__()
        self.input = input
        self.label = label

    def __getitem__(self, item):
        return self.input[item,:,:], self.label[item,:,:]

    def __len__(self):
        return self.input.shape[0]

In [12]:
train_iter = \
    DataLoader(Dataset(torch.tensor(CETSA_train).float(), torch.tensor(CETSA_train).float()),
                    batch_size=opt.batch_size,
                    shuffle=True)

  DataLoader(Dataset(torch.tensor(CETSA_train).float(), torch.tensor(CETSA_train).float()),


In [13]:
test_iter = \
    DataLoader(Dataset(torch.tensor(CETSA_test).float(), torch.tensor(CETSA_test).float()),
                    batch_size=opt.batch_size,
                    shuffle=True)

  DataLoader(Dataset(torch.tensor(CETSA_test).float(), torch.tensor(CETSA_test).float()),


In [14]:
en_all = []
de_all = []
for i in range(opt.num_cell):
    en = md.Encoder()
    de = md.Decoder()
    if torch.cuda.is_available():
        en.cuda()
        de.cuda()
    en_all.append(en)
    de_all.append(de)

In [15]:
loss_function = nn.MSELoss()

In [16]:
en_op_all = []
de_op_all = []
en_sch_all = []
de_sch_all = []
for i in range(opt.num_cell):
    en_optimizer = torch.optim.SGD(en_all[i].parameters(), lr=opt.lr,momentum=opt.momentum,weight_decay=opt.weight_decay)
    de_optimizer = torch.optim.SGD(de_all[i].parameters(), lr=opt.lr,momentum=opt.momentum,weight_decay=opt.weight_decay)
    en_scheduler=torch.optim.lr_scheduler.StepLR(en_optimizer,step_size=opt.step_size,gamma=opt.gamma)
    de_scheduler=torch.optim.lr_scheduler.StepLR(de_optimizer,step_size=opt.step_size,gamma=opt.gamma)
    en_op_all.append(en_optimizer)
    de_op_all.append(de_optimizer)
    en_sch_all.append(en_scheduler)
    de_sch_all.append(de_scheduler)

In [17]:
def plot_train_loss(exp_num, e_num,c_line,c_tem):
    csv_path = './result/exp' + str(exp_num)+ '/result/train_loss.csv'
    jpg_path = './result/exp' + str(exp_num)+ '/result/train_loss_'+str(e_num)+'.jpg'
    train_loss = pd.read_csv(csv_path)
    train_loss = np.asarray(train_loss)

    plt.figure(figsize=(10,10))
    for i in range(c_line):
        for j in range(c_line):
            plt.plot(train_loss[:,c_line*c_line+1],train_loss[:,i*c_line+j] ,label = '%s_Loss' % (chr(ord('A') + i) + chr(ord('A') + j)))

    plt.plot(train_loss[:,c_line*c_line+1],train_loss[:,i*c_line+j+1] ,label =  'Z_Loss')
    plt.legend()
    plt.savefig(jpg_path)
    plt.close()

In [18]:
def plot_test_eva_loss(exp_num, e_num,c_line,c_tem):
    csv_path = './result/exp' + str(exp_num)+ '/result/test_eva.csv'
    test_eva_plot = pd.read_csv(csv_path)
    test_eva_plot = np.asarray(test_eva_plot)
    loss_str = ['MAPE_Loss','MSE_Loss','RMSE_Loss','MAE_Loss']
    c_all = c_line*c_line
    for i in range(c_all):
        jpg_path = './result/exp' + str(exp_num)+ '/result/test_eva_self_'+str(i)+'_epcho'+str(e_num)+'.jpg'
        plt.figure(figsize=(10,10))
        for j in range(4):
            plt.plot(test_eva_plot[i::c_all,5],test_eva_plot[i::c_all,j] ,label = (str(i) + loss_str[j]))

        plt.legend()
        plt.savefig(jpg_path)
        plt.close()

    for i in range(4):
        jpg_path = './result/exp' + str(exp_num)+ '/result/test_eva_compare_'+str(i)+'_epcho'+str(e_num)+'.jpg'
        plt.figure(figsize=(10,10))
        for j in range(c_all):
            plt.plot(test_eva_plot[j::c_all,5],test_eva_plot[j::c_all,i] ,label = (str(i) + (chr(ord('A') + j//c_line))))
        plt.legend()
        plt.savefig(jpg_path)
        plt.close()

In [19]:
def train(en_all, de_all, en_op_all, de_op_all, en_sch_all, de_sch_all, train_iter, test_iter,
          loss_function, num_epochs, num_cell, c_tem):
    #num_cell = 1
    c_line = num_cell
    c_all = c_line * c_line
    text_cell_trans = []
    for j in range(c_all):
        text = (chr(ord('A') + j // c_line) + chr(ord('A') + j % c_line))
        text_cell_trans.append(text)

    lowest_test_loss = float('inf')

    # best_MAPE_testAB,best_MSE_testAB ,best_RMSE_testAB,best_MAE_testAB
    best_test_acc = 100 * np.ones([c_all, 5])
    best_train_acc = 100 * np.ones([c_all, 5])

    ex_test = np.zeros([c_all, 4])
    ex_train = np.zeros([c_all, 4])

    path = "./result/"
    dirs = os.listdir(path)
    new_path = path + 'exp' + str(len(dirs) + 1)
    os.mkdir(new_path)
    print(new_path)
    new_path_weights = new_path + "/weight"
    new_path_results = new_path + "/result"
    os.mkdir(new_path_weights)
    os.mkdir(new_path_results)
    train_loss = []

    for epoch in range(num_epochs):
        test_eva = np.zeros([c_all, 6])
        train_eva = np.zeros([c_all, 6])
        train_l_sum = np.zeros([c_all + 1, 6])
        train_loss = np.zeros([c_all + 1])
        train_loss_1 = np.zeros(c_all + 2)
        loss_z = 0.00
        n = 0

        for X, Y in train_iter:
            if torch.cuda.is_available():
                X = X.to(device)
                Y = Y.to(device)

            Z_all = torch.tensor(np.zeros([num_cell, X.shape[0], 500]))
            total_loss = 0.00
            #num_cell = 1
            for k1 in range(num_cell):
                for k2 in range(num_cell):
                    x = X[:, k1, :]
                    y = X[:, k2, :]
                    z_out = en_all[k1](x)
                    z_out = z_out.squeeze(1)
                    y_hat = de_all[k2](z_out)
                    y_hat = y_hat.squeeze(1)

                    train_loss[num_cell * k1 + k2] = loss_function(y_hat, y).item()
                    train_loss_1[num_cell * k1 + k2] += train_loss[num_cell * k1 + k2].item()

                    total_loss += loss_function(y_hat, y)
                    #print(loss_function(y_hat, y))
                    #a = (str(train_loss_1[num_cell*k1+k2]))
                    #print(text_cell_trans[num_cell*k1+k2]+a)
                Z_all[k1, :, :] = en_all[k1](x).squeeze(1)
            #num_cell = 1
            loss_z = 0.00
            for k1 in range(num_cell):
                for k2 in range(num_cell):
                    loss_z += loss_function(en_all[k1](x).squeeze(1), en_all[k2](x).squeeze(1))

            train_loss_1[c_all] += loss_z.item()

            total_loss += loss_z / 2

            for j in range(num_cell):
                en_op_all[j].zero_grad()
                de_op_all[j].zero_grad()

            n += y.shape[0]
            total_loss.backward()

            for j in range(num_cell):
                en_op_all[j].step()
                de_op_all[j].step()

            #ex_MAPE_testBA,ex_MSE_testBA ,ex_RMSE_testBA,ex_MAE_testBA
        if epoch == 0:
            for k1 in range(num_cell):
                for k2 in range(num_cell):
                    ex_test[num_cell * k1 + k2, :] = ep.ex_accurary(test_iter, loss_function, k1, k2)
                    ex_train[num_cell * k1 + k2, :] = ep.ex_accurary(train_iter, loss_function, k1, k2)

            with open(str(new_path_results + '/ex_test_train.csv'), 'w', newline='') as myFile:
                myWriter = csv.writer(myFile)
                myWriter.writerows(ex_test)
                myWriter.writerows(ex_train)

        for j in range(num_cell):
            en_sch_all[j].step()
            de_sch_all[j].step()

        # MAPE,MSE,RMSE,MAE,Loss
        h = 0
        for k1 in range(num_cell):
            for k2 in range(num_cell):
                test_eva[h, 0:5] = ep.net_accurary2(test_iter, loss_function, en_all[k1], de_all[k2], k1, k2)
                train_eva[h, 0:5] = ep.net_accurary2(train_iter, loss_function, en_all[k1], de_all[k2], k1, k2)
                test_eva[h, 5] = epoch
                train_eva[h, 5] = epoch
                h = h + 1

        if epoch == 0:
            with open(str(new_path_results + '/test_eva.csv'), 'w') as myFile:
                myWriter = csv.writer(myFile)
            with open(str(new_path_results + '/train_eva.csv'), 'w') as myFile:
                myWriter = csv.writer(myFile)
            with open(str(new_path + '/parameter.txt'), 'w') as f:
                print(en_all[0], file=f)
            with open(str(new_path + '/parameter.txt'), 'a') as f:
                print(de_all[0], file=f)
                print(opt, file=f)

        with open(str(new_path_results + '/test_eva.csv'), 'a', newline='') as myFile:
            myWriter = csv.writer(myFile)
            myWriter.writerows(test_eva)

        with open(str(new_path_results + '/train_eva.csv'), 'a', newline='') as myFile:
            myWriter = csv.writer(myFile)
            myWriter.writerows(train_eva)

        for i in range(c_all):
            if best_test_acc[i, 1] > test_eva[i, 1]:
                best_test_acc[i, 0:4] = test_eva[i, 0:4]
                best_test_acc[i, 4] = epoch + 1
                k = i // num_cell
                j = i % num_cell
                torch.save(en_all[k],
                           str(new_path_weights + '/CycleDNN_v01_withZ_MSE_best_en' + str(k) + '_' + str(i) + '.pkl'))
                torch.save(de_all[j],
                           str(new_path_weights + '/CycleDNN_v01_withZ_MSE_best_de' + str(j) + '_' + str(i) + '.pkl'))

            if best_train_acc[i, 1] > train_eva[i, 1]:
                best_train_acc[i, 0:4] = train_eva[i, 0:4]
                best_train_acc[i, 4] = epoch + 1

        if (epoch + 1) % 500 == 0:
            for i in range(num_cell):
                torch.save(en_all[i], str(new_path_weights + '/CycleDNN_v01_withZ_MSE_en' + str(i) + '_' + str(
                    epoch + 1) + '.pkl'))
                torch.save(de_all[i], str(new_path_weights + '/CycleDNN_v01_withZ_MSE_de' + str(i) + '_' + str(
                    epoch + 1) + '.pkl'))

        with open(str(new_path_results + '/best_test_acc.csv'), 'w', newline='') as myFile:
            myWriter = csv.writer(myFile)
            myWriter.writerows(best_test_acc)

        with open(str(new_path_results + '/best_train_acc.csv'), 'w', newline='') as myFile:
            myWriter = csv.writer(myFile)
            myWriter.writerows(best_train_acc)

        for k1 in range(num_cell):
            for k2 in range(num_cell):
                train_loss_1[num_cell * k1 + k2] = train_loss_1[num_cell * k1 + k2] / n
        train_loss_1[c_all] = train_loss_1[c_all] / n
        train_loss_1[c_all + 1] = epoch + 1

        if epoch % 10 == 0:
            print('epoch: %d' % (epoch + 1))
            for k1 in range(num_cell):
                for k2 in range(num_cell):
                    text = (chr(ord('A') + k1) + chr(ord('A') + k2)) + 'loss:'
                    print(text + '%.8f' % (train_loss_1[num_cell * k1 + k2]))
        print('epoch: %d' % (epoch + 1))
        if epoch == 0:
            with open(str(new_path_results + '/train_loss.csv'), 'w') as myFile:
                myWriter = csv.writer(myFile)

        with open(str(new_path_results + '/train_loss.csv'), 'a', newline='') as myFile:
            myWriter = csv.writer(myFile)
            myWriter.writerow(train_loss_1)

        if (epoch + 1) % 50 == 0:
            plot_train_loss(exp_num, epoch, c_line, c_tem)
            plot_test_eva_loss(exp_num, epoch, c_line, c_tem)

    return train_loss




In [20]:

train_loss = train(en_all, de_all, en_op_all, de_op_all, en_sch_all, de_sch_all, train_iter, test_iter,
                   loss_function, opt.n_epochs, opt.num_cell, opt.num_temperature)

./result/exp10
epoch: 1
AAloss:0.00234763
ABloss:0.00190181
BAloss:0.00238785
BBloss:0.00190425
epoch: 1
epoch: 2
epoch: 3
epoch: 4
epoch: 5
epoch: 6
epoch: 7
epoch: 8
epoch: 9
epoch: 10
epoch: 11
AAloss:0.00012992
ABloss:0.00016917
BAloss:0.00021555
BBloss:0.00009203
epoch: 11
epoch: 12
epoch: 13
epoch: 14
epoch: 15
epoch: 16
epoch: 17
epoch: 18
epoch: 19
epoch: 20
epoch: 21
AAloss:0.00010280
ABloss:0.00014965
BAloss:0.00018888
BBloss:0.00007588
epoch: 21
epoch: 22
epoch: 23
epoch: 24
epoch: 25
epoch: 26
epoch: 27
epoch: 28
epoch: 29
epoch: 30
epoch: 31
AAloss:0.00007523
ABloss:0.00013680
BAloss:0.00017215
BBloss:0.00005878
epoch: 31
epoch: 32
epoch: 33
epoch: 34
epoch: 35
epoch: 36
epoch: 37
epoch: 38
epoch: 39
epoch: 40
epoch: 41
AAloss:0.00005055
ABloss:0.00012592
BAloss:0.00015835
BBloss:0.00003580
epoch: 41
epoch: 42
epoch: 43
epoch: 44
epoch: 45
epoch: 46
epoch: 47
epoch: 48
epoch: 49
epoch: 50
epoch: 51
AAloss:0.00004156
ABloss:0.00012006
BAloss:0.00015204
BBloss:0.00002876
epo

KeyboardInterrupt: 