# 使用AutoEncoder降维

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
# 定义 AutoEncoder 网络
class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(True),
            nn.Linear(1024, encoding_dim),
            nn.ReLU(True)
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 1024),
            nn.ReLU(True),
            nn.Linear(1024, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 定义训练函数
def train(model, dataloader, num_epochs, learning_rate):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for data in dataloader:
            inputs, _ = data
            inputs = inputs.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()

        # 打印损失
        if epoch % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))





In [2]:
def min_max_normalize(tensor):
    min_val = tensor.min()
    max_val = tensor.max()
    normalized_tensor = (tensor - min_val) / (max_val - min_val)
    return normalized_tensor, min_val, max_val

def min_max_denormalize(normalized_tensor, min_val, max_val):
    denormalized_tensor = normalized_tensor * (max_val - min_val) + min_val
    return denormalized_tensor

In [3]:
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.cuda.manual_seed(seed)
     np.random.seed(seed)
     torch.backends.cudnn.deterministic = True

In [4]:
setup_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [5]:
# 加载数据
data = pd.read_csv('Liver_merge.csv')
data = data[data['group'] == 0]
data.pop('group')
data_tensor = torch.from_numpy(data.values).to(torch.float32).to(device)
nor_data_tensor, ae_min,ae_max = min_max_normalize(data_tensor)
print(nor_data_tensor)
dataset = TensorDataset(nor_data_tensor,nor_data_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 创建 AutoEncoder 模型
input_dim = 11885
encoding_dim = 512  # 降维后的维度
model = AutoEncoder(input_dim, encoding_dim).to(device)

# 训练模型
num_epochs = 10
learning_rate = 0.0001


tensor([[0.7400, 0.3986, 0.7373,  ..., 0.5117, 0.4073, 0.4064],
        [0.7041, 0.3985, 0.7412,  ..., 0.5353, 0.4401, 0.4021],
        [0.7128, 0.3693, 0.7803,  ..., 0.4887, 0.3896, 0.4362],
        ...,
        [0.7323, 0.3649, 0.7395,  ..., 0.4475, 0.4623, 0.4343],
        [0.7295, 0.3668, 0.7325,  ..., 0.5019, 0.4394, 0.4019],
        [0.7164, 0.4088, 0.7405,  ..., 0.4914, 0.4269, 0.4114]],
       device='cuda:0')


In [6]:
dataset

<torch.utils.data.dataset.TensorDataset at 0x7fc9ac1dabb0>

In [7]:
from pathlib import Path
model_file_name = 'Liver.pth'
model_file = Path(model_file_name)
if model_file.exists():
    # 指定的文件存在
    print(f'{model_file_name}:read model params!')
    model.load_state_dict(torch.load(model_file_name))
else:
    print(f'{model_file_name}:not exist!')

Liver.pth:read model params!


In [8]:
train(model, dataloader, 1000, learning_rate)

Epoch [1/1000], Loss: 0.0011
Epoch [101/1000], Loss: 0.0002
Epoch [201/1000], Loss: 0.0002
Epoch [301/1000], Loss: 0.0002
Epoch [401/1000], Loss: 0.0002
Epoch [501/1000], Loss: 0.0002
Epoch [601/1000], Loss: 0.0001
Epoch [701/1000], Loss: 0.0001
Epoch [801/1000], Loss: 0.0001
Epoch [901/1000], Loss: 0.0001


In [9]:
# 使用训练好的模型对数据进行降维
encoded_data = model.encoder(nor_data_tensor)

In [10]:
nor_data_tensor

tensor([[0.7400, 0.3986, 0.7373,  ..., 0.5117, 0.4073, 0.4064],
        [0.7041, 0.3985, 0.7412,  ..., 0.5353, 0.4401, 0.4021],
        [0.7128, 0.3693, 0.7803,  ..., 0.4887, 0.3896, 0.4362],
        ...,
        [0.7323, 0.3649, 0.7395,  ..., 0.4475, 0.4623, 0.4343],
        [0.7295, 0.3668, 0.7325,  ..., 0.5019, 0.4394, 0.4019],
        [0.7164, 0.4088, 0.7405,  ..., 0.4914, 0.4269, 0.4114]],
       device='cuda:0')

In [11]:
data_tensor

tensor([[16.9031,  8.7153, 16.8374,  ..., 11.4278,  8.9225,  8.9019],
        [16.0428,  8.7133, 16.9325,  ..., 11.9937,  9.7105,  8.7992],
        [16.2511,  8.0126, 17.8696,  ..., 10.8757,  8.4998,  9.6167],
        ...,
        [16.7177,  7.9058, 16.8900,  ...,  9.8876, 10.2421,  9.5702],
        [16.6516,  7.9529, 16.7222,  ..., 11.1921,  9.6930,  8.7938],
        [16.3369,  8.9588, 16.9159,  ..., 10.9400,  9.3942,  9.0210]],
       device='cuda:0')

In [12]:
encoded_data

tensor([[0.0000, 0.0000, 0.0000,  ..., 0.9157, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.8086, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.8698, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.6837, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.7050, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.7288, 0.0000, 0.0000]],
       device='cuda:0', grad_fn=<ReluBackward0>)

In [13]:
model.decoder(encoded_data)

tensor([[0.7253, 0.3874, 0.7188,  ..., 0.5186, 0.3956, 0.4144],
        [0.7111, 0.3826, 0.7329,  ..., 0.5455, 0.4336, 0.4048],
        [0.7128, 0.3648, 0.7686,  ..., 0.5007, 0.3945, 0.4346],
        ...,
        [0.7190, 0.3663, 0.7502,  ..., 0.4558, 0.4518, 0.4381],
        [0.7285, 0.3579, 0.7306,  ..., 0.5031, 0.4300, 0.4098],
        [0.7135, 0.3949, 0.7455,  ..., 0.4912, 0.4290, 0.4155]],
       device='cuda:0', grad_fn=<SigmoidBackward0>)

In [14]:
decoder_data_tensor = min_max_denormalize(model.decoder(encoded_data),ae_min,ae_max)
decoder_data_tensor

tensor([[16.5501,  8.4458, 16.3942,  ..., 11.5925,  8.6424,  9.0935],
        [16.2109,  8.3311, 16.7323,  ..., 12.2378,  9.5536,  8.8640],
        [16.2513,  7.9039, 17.5883,  ..., 11.1623,  8.6153,  9.5791],
        ...,
        [16.3991,  7.9388, 17.1479,  ..., 10.0867,  9.9905,  9.6609],
        [16.6278,  7.7396, 16.6771,  ..., 11.2200,  9.4666,  8.9829],
        [16.2675,  8.6260, 17.0348,  ..., 10.9368,  9.4434,  9.1191]],
       device='cuda:0', grad_fn=<AddBackward0>)

In [18]:
np.savez('Liver_AutoEncoder.npz', origin_data=data_tensor.cpu().detach().numpy(), decoder_data=decoder_data_tensor.cpu().detach().numpy())

In [21]:
torch.save(model.state_dict(), model_file_name)

# 生成对抗网络

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义生成器（Generator）
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)

# 定义判别器（Discriminator）
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# 超参数设置
input_dim = 64
data_dim = 512
lr = 0.00001
epochs = 1000
batch_size = 64

# 初始化生成器和判别器
generator = Generator(input_dim, data_dim).to(device)
discriminator = Discriminator(data_dim).to(device)

# 设置优化器
g_optimizer = optim.Adam(generator.parameters(), lr=lr)
d_optimizer = optim.Adam(discriminator.parameters(), lr=lr)

# 设置损失函数
loss_func = nn.BCELoss()



In [29]:
encoded_data.shape

torch.Size([50, 512])

In [30]:
gan_dataset = TensorDataset(encoded_data,encoded_data)
gan_dataloader = DataLoader(gan_dataset, batch_size=batch_size, shuffle=True)

In [31]:
discriminator.train()
generator.train()
min_loss = 1000
gan_model_name = 'generator_N.pth'
for epoch in range(epochs):
    for data in gan_dataloader:
        # print("********")
        real_data, _ = data
        real_data = real_data.to(device)
        # 训练判别器
        d_optimizer.zero_grad()
        real_label = torch.ones(real_data.shape[0], 1).to(device)

        fake_data = generator(torch.randn(real_data.shape[0], input_dim).to(device)).detach()
        fake_label = torch.zeros(real_data.shape[0], 1).to(device)
        real_out = discriminator(real_data)
        fake_out = discriminator(fake_data)
        real_loss = loss_func(real_out, real_label)
        fake_loss = loss_func(fake_out, fake_label)
        d_loss = real_loss + fake_loss
        d_loss.backward(retain_graph=True)
        d_optimizer.step()

        # 训练生成器
        g_optimizer.zero_grad()
        gen_input = torch.randn(real_data.shape[0], input_dim).to(device)
        gen_output = generator(gen_input)
        dis_output = discriminator(gen_output)

        g_loss = loss_func(dis_output, real_label)

        g_loss.backward()
        g_optimizer.step()
        if g_loss.item() < min_loss:
            torch.save(generator.state_dict(), gan_model_name)
    if epoch % 10 == 0:
        print("Epoch: {}, G_Loss: {:.4f}, D_Loss: {:.4f}".format(epoch, g_loss.item(), d_loss.item()))


Epoch: 0, G_Loss: 0.7281, D_Loss: 1.4299
Epoch: 10, G_Loss: 0.7254, D_Loss: 1.4066
Epoch: 20, G_Loss: 0.7244, D_Loss: 1.3813
Epoch: 30, G_Loss: 0.7221, D_Loss: 1.3577
Epoch: 40, G_Loss: 0.7202, D_Loss: 1.3344
Epoch: 50, G_Loss: 0.7191, D_Loss: 1.3129
Epoch: 60, G_Loss: 0.7185, D_Loss: 1.2929
Epoch: 70, G_Loss: 0.7172, D_Loss: 1.2733
Epoch: 80, G_Loss: 0.7159, D_Loss: 1.2533
Epoch: 90, G_Loss: 0.7142, D_Loss: 1.2347
Epoch: 100, G_Loss: 0.7116, D_Loss: 1.2152
Epoch: 110, G_Loss: 0.7108, D_Loss: 1.1959
Epoch: 120, G_Loss: 0.7082, D_Loss: 1.1780
Epoch: 130, G_Loss: 0.7063, D_Loss: 1.1609
Epoch: 140, G_Loss: 0.7047, D_Loss: 1.1455
Epoch: 150, G_Loss: 0.7031, D_Loss: 1.1295
Epoch: 160, G_Loss: 0.6997, D_Loss: 1.1144
Epoch: 170, G_Loss: 0.6960, D_Loss: 1.1016
Epoch: 180, G_Loss: 0.6950, D_Loss: 1.0867
Epoch: 190, G_Loss: 0.6926, D_Loss: 1.0712
Epoch: 200, G_Loss: 0.6907, D_Loss: 1.0569
Epoch: 210, G_Loss: 0.6898, D_Loss: 1.0442
Epoch: 220, G_Loss: 0.6901, D_Loss: 1.0302
Epoch: 230, G_Loss: 0.

# 生成样本数据

## 查看需要生成多少样本数据

In [32]:
Liver_data = pd.read_csv('Liver_merge.csv')
Liver_data

Unnamed: 0,A1BG,A2LD1,A2M,A4GALT,AAAS,AACS,AAGAB,AAK1,AAMP,AARS2,...,ZW10,ZWILCH,ZWINT,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3,group
0,16.903097,8.715260,16.837369,6.064562,9.365117,7.352139,9.069857,8.784455,11.415891,8.267119,...,8.452833,6.605290,6.043189,9.375843,9.384367,10.900427,11.427795,8.922543,8.901912,0
1,16.042797,8.713328,16.932478,6.033643,8.947604,6.721176,8.890053,9.139745,11.211142,7.529446,...,8.423790,6.757122,4.821338,9.254951,10.339537,11.014054,11.993706,9.710529,8.799178,0
2,16.251136,8.012606,17.869616,7.463284,9.140778,7.108619,9.200250,8.637313,11.241951,7.660811,...,8.608334,6.459951,4.762843,9.205788,9.931098,10.065102,10.875718,8.499810,9.616674,0
3,16.501291,8.284755,16.812427,7.136263,8.996256,7.515416,9.152846,9.349093,11.335538,8.014518,...,8.320020,6.429556,5.402074,10.358953,9.778451,10.949562,12.253583,8.632372,9.000843,0
4,16.399463,8.103829,16.814177,6.535796,9.308386,7.769594,9.242388,8.917900,11.452892,8.181746,...,8.066311,6.134584,6.066311,9.753889,9.655890,10.635777,12.043885,9.098322,8.693798,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416,15.502898,6.918879,12.875774,5.535636,9.743188,7.587477,9.080461,9.212885,11.338272,9.169593,...,8.827342,7.754948,7.982217,9.638542,9.975747,10.275460,11.220429,8.661901,9.628266,1
417,16.019612,8.014802,16.142849,9.323891,9.192969,8.318809,9.573331,8.661631,11.226378,9.061195,...,8.194820,6.809849,6.631884,10.291393,9.595918,10.273990,11.976045,9.312003,8.762415,1
418,17.284697,8.957807,14.249758,5.259016,10.206550,7.415136,9.796845,8.857654,11.198848,9.860128,...,8.773590,7.058756,8.069313,7.959457,9.930634,9.786159,11.980702,8.941911,9.172761,1
419,13.810608,8.329675,14.860054,8.669967,9.682121,7.682121,8.720340,8.878694,11.159920,8.584486,...,9.023285,8.584175,10.203577,10.379578,10.510076,10.295842,10.195120,8.752971,9.708505,1


In [33]:
gen_num = len(Liver_data[Liver_data['group'] == 1]) - len(Liver_data[Liver_data['group'] == 0])
gen_num

321

In [34]:
gen_z = torch.randn(gen_num, input_dim).to(device)
gen_z.shape

torch.Size([321, 64])

In [35]:
gen_data = generator(gen_z)
gen_data

tensor([[-0.2478, -0.0775,  0.1280,  ...,  0.0760,  0.0388,  0.2355],
        [-0.1820, -0.1382,  0.0763,  ..., -0.0293,  0.0978,  0.1594],
        [-0.1800, -0.0560, -0.0186,  ...,  0.0317, -0.0262, -0.0019],
        ...,
        [-0.1817,  0.0568,  0.0260,  ...,  0.0518,  0.1443,  0.3190],
        [-0.2401, -0.0359, -0.0304,  ...,  0.0162,  0.1669,  0.2169],
        [-0.1703, -0.1172, -0.0551,  ...,  0.0933,  0.1184,  0.2268]],
       device='cuda:0', grad_fn=<TanhBackward0>)

## 解码

In [36]:
gen_normal_data = model.decoder(gen_data)
gen_normal_data

tensor([[0.6091, 0.4369, 0.5949,  ..., 0.4815, 0.4581, 0.4627],
        [0.5947, 0.4198, 0.5877,  ..., 0.4992, 0.4643, 0.4744],
        [0.6057, 0.4286, 0.5917,  ..., 0.4899, 0.4595, 0.4694],
        ...,
        [0.6114, 0.4224, 0.5995,  ..., 0.4790, 0.4607, 0.4585],
        [0.6072, 0.4186, 0.6025,  ..., 0.4838, 0.4484, 0.4602],
        [0.6244, 0.4147, 0.6062,  ..., 0.4756, 0.4561, 0.4555]],
       device='cuda:0', grad_fn=<SigmoidBackward0>)

## minmax反归一化

In [37]:
gen_denormal_data = min_max_denormalize(gen_normal_data, ae_min, ae_max)
gen_denormal_data

tensor([[13.7623,  9.6330, 13.4233,  ..., 10.7038, 10.1408, 10.2522],
        [13.4182,  9.2243, 13.2505,  ..., 11.1274, 10.2914, 10.5332],
        [13.6810,  9.4352, 13.3455,  ..., 10.9038, 10.1764, 10.4137],
        ...,
        [13.8175,  9.2864, 13.5320,  ..., 10.6437, 10.2049, 10.1503],
        [13.7170,  9.1947, 13.6050,  ..., 10.7590,  9.9088, 10.1928],
        [14.1300,  9.1001, 13.6932,  ..., 10.5621, 10.0937, 10.0789]],
       device='cuda:0', grad_fn=<AddBackward0>)

In [38]:
gen_data_np = gen_denormal_data.cpu().detach().numpy()
gen_data_np

array([[13.762253,  9.633013, 13.423275, ..., 10.703782, 10.140843,
        10.252183],
       [13.418206,  9.224305, 13.250537, ..., 11.12739 , 10.291354,
        10.533186],
       [13.681049,  9.435204, 13.345477, ..., 10.903757, 10.176419,
        10.413694],
       ...,
       [13.817484,  9.286361, 13.532031, ..., 10.643708, 10.204868,
        10.150307],
       [13.716986,  9.194677, 13.604954, ..., 10.758987,  9.908797,
        10.192752],
       [14.13001 ,  9.100125, 13.693156, ..., 10.56206 , 10.093728,
        10.07886 ]], dtype=float32)

In [39]:
zeros_column = np.zeros((gen_data_np.shape[0], 1))

# 将列向量与原始数组水平堆叠
gen_data_np = np.hstack((gen_data_np, zeros_column))
gen_data_np, gen_data_np.shape

(array([[13.76225281,  9.63301277, 13.42327499, ..., 10.14084339,
         10.25218296,  0.        ],
        [13.41820621,  9.22430515, 13.25053692, ..., 10.29135418,
         10.53318596,  0.        ],
        [13.68104935,  9.43520355, 13.3454771 , ..., 10.17641926,
         10.41369438,  0.        ],
        ...,
        [13.8174839 ,  9.28636074, 13.53203106, ..., 10.20486832,
         10.1503067 ,  0.        ],
        [13.7169857 ,  9.19467735, 13.60495377, ...,  9.90879726,
         10.19275188,  0.        ],
        [14.13000965,  9.10012531, 13.69315624, ..., 10.09372807,
         10.07886028,  0.        ]]),
 (321, 11886))

In [45]:
# 将NumPy数组转换为DataFrame
arr_df = pd.DataFrame(gen_data_np, columns=Liver_data.columns.values)

# 将两个DataFrame对象沿着行的方向连接
new_Liver_df = pd.concat([Liver_data, arr_df], axis=0)

In [46]:
new_Liver_df

Unnamed: 0,A1BG,A2LD1,A2M,A4GALT,AAAS,AACS,AAGAB,AAK1,AAMP,AARS2,...,ZW10,ZWILCH,ZWINT,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3,group
0,16.903097,8.715260,16.837369,6.064562,9.365117,7.352139,9.069857,8.784455,11.415891,8.267119,...,8.452833,6.605290,6.043189,9.375843,9.384367,10.900427,11.427795,8.922543,8.901912,0.0
1,16.042797,8.713328,16.932478,6.033643,8.947604,6.721176,8.890053,9.139745,11.211142,7.529446,...,8.423790,6.757122,4.821338,9.254951,10.339537,11.014054,11.993706,9.710529,8.799178,0.0
2,16.251136,8.012606,17.869616,7.463284,9.140778,7.108619,9.200250,8.637313,11.241951,7.660811,...,8.608334,6.459951,4.762843,9.205788,9.931098,10.065102,10.875718,8.499810,9.616674,0.0
3,16.501291,8.284755,16.812427,7.136263,8.996256,7.515416,9.152846,9.349093,11.335538,8.014518,...,8.320020,6.429556,5.402074,10.358953,9.778451,10.949562,12.253583,8.632372,9.000843,0.0
4,16.399463,8.103829,16.814177,6.535796,9.308386,7.769594,9.242388,8.917900,11.452892,8.181746,...,8.066311,6.134584,6.066311,9.753889,9.655890,10.635777,12.043885,9.098322,8.693798,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316,13.589894,9.337051,13.183481,9.305174,10.659041,9.728997,10.384562,9.720572,11.029023,10.304235,...,10.181251,9.838455,9.166183,10.178794,10.458627,10.703332,10.828924,10.247210,10.225082,0.0
317,13.536341,9.479577,13.422260,9.424850,10.784612,9.865892,10.314266,9.595171,11.174696,10.166624,...,9.809552,9.701748,9.010886,10.438852,10.428571,10.950298,10.860692,10.031444,10.284664,0.0
318,13.817484,9.286361,13.532031,9.504230,10.724981,9.793881,10.305278,9.672647,11.165280,9.935654,...,9.956368,9.557275,9.108200,10.264177,10.510122,10.744174,10.643708,10.204868,10.150307,0.0
319,13.716986,9.194677,13.604954,9.556050,10.762415,9.846568,10.137026,9.632738,11.145919,10.388307,...,9.792408,9.586435,9.159556,10.084065,10.493257,11.020835,10.758987,9.908797,10.192752,0.0


In [47]:
new_Liver_df.to_csv('gan_Liver.csv', index=False)

In [48]:
gan_Liver_N = new_Liver_df[new_Liver_df['group'] == 0].drop('group',axis=1)

In [49]:
Liver_N_df = pd.read_csv('Liver_N.csv')
Liver_N_df.columns.values[0] = 'gene_name'
Liver_N_df

Unnamed: 0,gene_name,TCGA.BC.A10Q.11A.11R.A131.07,TCGA.BC.A10R.11A.11R.A131.07,TCGA.BC.A10T.11A.11R.A131.07,TCGA.BC.A10U.11A.11R.A131.07,TCGA.BC.A10W.11A.11R.A131.07,TCGA.BC.A10X.11A.11R.A131.07,TCGA.BC.A10Y.11A.11R.A131.07,TCGA.BC.A10Z.11A.11R.A131.07,TCGA.BC.A110.11A.11R.A131.07,...,TCGA.EP.A12J.11A.11R.A131.07,TCGA.EP.A26S.11A.12R.A16W.07,TCGA.EP.A3RK.11A.11R.A22L.07,TCGA.ES.A2HT.11A.11R.A180.07,TCGA.FV.A23B.11A.11R.A16W.07,TCGA.FV.A2QR.11A.11R.A213.07,TCGA.FV.A3I0.11A.11R.A22L.07,TCGA.FV.A3I1.11A.11R.A22L.07,TCGA.FV.A3R2.11A.11R.A22L.07,TCGA.G3.A3CH.11A.11R.A22L.07
0,A1BG,16.903097,16.042797,16.251136,16.501291,16.399463,16.462378,16.007805,16.035786,16.287674,...,16.364204,16.825251,16.786364,16.187135,16.132554,16.599763,16.836753,16.717657,16.651632,16.336895
1,A2LD1,8.715260,8.713328,8.012606,8.284755,8.103829,9.574804,7.323853,7.564576,7.389049,...,7.899467,8.626652,9.050738,7.305453,7.470782,7.307883,8.858681,7.905777,7.952933,8.958810
2,A2M,16.837369,16.932478,17.869616,16.812427,16.814177,15.919171,17.128519,16.486686,16.016451,...,17.415063,18.402469,18.135194,18.354558,17.332568,17.172855,16.767008,16.889990,16.722245,16.915888
3,A4GALT,6.064562,6.033643,7.463284,7.136263,6.535796,5.846731,6.586612,7.235979,7.443738,...,6.094006,6.173695,6.352252,6.189864,6.753298,7.163369,6.682330,6.058894,6.450669,6.201273
4,AAAS,9.365117,8.947604,9.140778,8.996256,9.308386,9.283421,9.522222,9.303985,9.146856,...,9.509782,9.685990,9.666466,9.250200,9.501451,9.458824,9.169113,9.437791,9.635360,9.475489
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11897,ZXDC,9.384367,10.339537,9.931098,9.778451,9.655890,9.827209,9.894735,10.061583,10.476469,...,9.540479,9.581120,9.666466,9.826972,9.832097,9.845103,10.075853,10.185885,9.832772,9.584456
11898,ZYG11B,10.900427,11.014054,10.065102,10.949562,10.635777,11.176010,10.264863,9.569289,10.370471,...,10.899670,10.407972,10.619289,9.509386,10.213516,9.312435,10.662420,10.567569,9.552207,11.394189
11899,ZYX,11.427795,11.993706,10.875718,12.253583,12.043885,10.304752,11.178238,11.212578,12.548723,...,11.043617,10.582551,10.318191,11.915060,11.322336,12.226467,11.754827,9.887559,11.192136,10.939975
11900,ZZEF1,8.922543,9.710529,8.499810,8.632372,9.098322,8.588344,9.741189,8.876975,8.467928,...,9.647084,9.793665,9.083436,9.358453,9.791659,9.410206,8.166853,10.242115,9.692960,9.394189


In [50]:
# 使用reset_index方法重新排序索引
gan_Liver_N = gan_Liver_N.reset_index(drop=True)

In [51]:
# 使用rename方法修改列名
gan_Liver_N = gan_Liver_N.T.rename(columns=lambda x: 'GSM' + str(x))
gan_Liver_N

Unnamed: 0,GSM0,GSM1,GSM2,GSM3,GSM4,GSM5,GSM6,GSM7,GSM8,GSM9,...,GSM361,GSM362,GSM363,GSM364,GSM365,GSM366,GSM367,GSM368,GSM369,GSM370
A1BG,16.903097,16.042797,16.251136,16.501291,16.399463,16.462378,16.007805,16.035786,16.287674,16.288071,...,13.763674,13.431907,13.608498,13.699017,13.156299,13.589894,13.536341,13.817484,13.716986,14.130010
A2LD1,8.715260,8.713328,8.012606,8.284755,8.103829,9.574804,7.323853,7.564576,7.389049,7.628115,...,9.148424,9.640868,9.271367,9.427846,9.651344,9.337051,9.479577,9.286361,9.194677,9.100125
A2M,16.837369,16.932478,17.869616,16.812427,16.814177,15.919171,17.128519,16.486686,16.016451,16.240680,...,13.648194,13.371598,13.439961,13.351699,12.963845,13.183481,13.422260,13.532031,13.604954,13.693156
A4GALT,6.064562,6.033643,7.463284,7.136263,6.535796,5.846731,6.586612,7.235979,7.443738,6.053485,...,9.572368,9.618690,9.546144,9.478063,9.580538,9.305174,9.424850,9.504230,9.556050,9.361629
AAAS,9.365117,8.947604,9.140778,8.996256,9.308386,9.283421,9.522222,9.303985,9.146856,9.352887,...,10.632652,10.791465,10.879822,10.663843,10.668114,10.659041,10.784612,10.724981,10.762415,10.680064
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,9.384367,10.339537,9.931098,9.778451,9.655890,9.827209,9.894735,10.061583,10.476469,9.993851,...,10.341238,10.592004,10.432567,10.379808,10.667485,10.458627,10.428571,10.510122,10.493257,10.447108
ZYG11B,10.900427,11.014054,10.065102,10.949562,10.635777,11.176010,10.264863,9.569289,10.370471,10.825579,...,10.725433,10.747353,10.893348,10.830852,11.036944,10.703332,10.950298,10.744174,11.020835,10.491642
ZYX,11.427795,11.993706,10.875718,12.253583,12.043885,10.304752,11.178238,11.212578,12.548723,10.714310,...,10.830477,11.051327,10.947355,10.730951,10.891881,10.828924,10.860692,10.643708,10.758987,10.562060
ZZEF1,8.922543,9.710529,8.499810,8.632372,9.098322,8.588344,9.741189,8.876975,8.467928,9.259089,...,10.100415,9.942131,10.128495,10.149437,10.213749,10.247210,10.031444,10.204868,9.908797,10.093728


In [53]:
gan_Liver_N.to_csv('gan_Liver_N.csv')

In [54]:
gan_Liver_T = new_Liver_df[new_Liver_df['group'] == 1].drop('group',axis=1)
gan_Liver_T

Unnamed: 0,A1BG,A2LD1,A2M,A4GALT,AAAS,AACS,AAGAB,AAK1,AAMP,AARS2,...,ZUFSP,ZW10,ZWILCH,ZWINT,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
50,14.443608,8.550167,18.127265,6.324510,10.010442,9.121777,10.060597,9.429120,11.646732,8.409902,...,7.708426,8.252361,7.730724,8.855702,8.096242,9.633479,8.963197,11.631406,9.797738,8.994865
51,14.466709,6.623880,14.927799,7.345553,9.817647,8.749069,10.046666,9.079894,11.619928,9.411658,...,6.979231,9.402347,9.080498,9.296644,9.305661,10.172140,9.966495,12.372085,9.561373,9.749069
52,16.245069,7.742594,14.252281,6.887027,9.594177,7.022542,9.029985,8.992380,11.110005,8.913881,...,6.500845,8.637049,7.466515,8.160045,10.252024,10.146414,10.332481,11.652401,9.749523,9.032457
53,13.186500,11.508430,17.896170,6.164625,9.494917,9.462875,8.621004,10.614110,10.763500,8.944887,...,6.836393,8.755169,8.365749,8.815176,8.156214,10.167768,9.879191,11.594786,10.387617,8.424861
54,16.362261,8.657462,13.095968,6.358683,9.692461,6.540130,9.260607,9.064832,10.918641,9.676127,...,6.677634,9.324824,7.088969,6.641413,9.964563,10.672407,10.431195,11.503183,9.168161,9.378990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416,15.502898,6.918879,12.875774,5.535636,9.743188,7.587477,9.080461,9.212885,11.338272,9.169593,...,6.932973,8.827342,7.754948,7.982217,9.638542,9.975747,10.275460,11.220429,8.661901,9.628266
417,16.019612,8.014802,16.142849,9.323891,9.192969,8.318809,9.573331,8.661631,11.226378,9.061195,...,6.553274,8.194820,6.809849,6.631884,10.291393,9.595918,10.273990,11.976045,9.312003,8.762415
418,17.284697,8.957807,14.249758,5.259016,10.206550,7.415136,9.796845,8.857654,11.198848,9.860128,...,7.146542,8.773590,7.058756,8.069313,7.959457,9.930634,9.786159,11.980702,8.941911,9.172761
419,13.810608,8.329675,14.860054,8.669967,9.682121,7.682121,8.720340,8.878694,11.159920,8.584486,...,6.270307,9.023285,8.584175,10.203577,10.379578,10.510076,10.295842,10.195120,8.752971,9.708505


In [57]:
gan_Liver_T.T.to_csv('gan_Liver_T.csv')

In [58]:
gan_Liver_N.shape

(11885, 371)

In [59]:
gan_Liver_T.shape

(371, 11885)