# 使用AutoEncoder降维

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
# 定义 AutoEncoder 网络
class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(True),
            nn.Linear(1024, encoding_dim),
            nn.ReLU(True)
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 1024),
            nn.ReLU(True),
            nn.Linear(1024, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 定义训练函数
def train(model, dataloader, num_epochs, learning_rate):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for data in dataloader:
            inputs, _ = data
            inputs = inputs.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()

        # 打印损失
        if epoch % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))





In [2]:
def min_max_normalize(tensor):
    min_val = tensor.min()
    max_val = tensor.max()
    normalized_tensor = (tensor - min_val) / (max_val - min_val)
    return normalized_tensor, min_val, max_val

def min_max_denormalize(normalized_tensor, min_val, max_val):
    denormalized_tensor = normalized_tensor * (max_val - min_val) + min_val
    return denormalized_tensor

In [3]:
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.cuda.manual_seed(seed)
     np.random.seed(seed)
     torch.backends.cudnn.deterministic = True

In [4]:
setup_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [5]:
# 加载数据
data = pd.read_csv('GSE25066_merge.csv')
data = data[data['group'] == 0]
data.pop('group')
data_tensor = torch.from_numpy(data.values).to(torch.float32).to(device)
nor_data_tensor, ae_min,ae_max = min_max_normalize(data_tensor)
print(nor_data_tensor)
dataset = TensorDataset(nor_data_tensor,nor_data_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 创建 AutoEncoder 模型
input_dim = 13236
encoding_dim = 512  # 降维后的维度
model = AutoEncoder(input_dim, encoding_dim).to(device)

# 训练模型
num_epochs = 10
learning_rate = 0.0001


tensor([[0.6131, 0.5153, 0.4467,  ..., 0.4749, 0.3460, 0.4236],
        [0.6731, 0.4737, 0.4691,  ..., 0.4936, 0.3113, 0.5068],
        [0.6626, 0.4508, 0.4821,  ..., 0.4458, 0.4158, 0.4828],
        ...,
        [0.6257, 0.5087, 0.4559,  ..., 0.4788, 0.3211, 0.4210],
        [0.6404, 0.4866, 0.4323,  ..., 0.4856, 0.2900, 0.4329],
        [0.6162, 0.4467, 0.4790,  ..., 0.4762, 0.3068, 0.4674]],
       device='cuda:0')


In [6]:
from pathlib import Path
model_file_name = 'GSE20194_AE.pth'
model_file = Path(model_file_name)
if model_file.exists():
    # 指定的文件存在
    print(f'{model_file_name}:read model params!')
    model.load_state_dict(torch.load(model_file_name))
else:
    print(f'{model_file_name}:not exist!')
    train(model, dataloader, 1000, learning_rate)

GSE20194_AE.pth:not exist!
Epoch [1/1000], Loss: 0.0110
Epoch [101/1000], Loss: 0.0015
Epoch [201/1000], Loss: 0.0019
Epoch [301/1000], Loss: 0.0011
Epoch [401/1000], Loss: 0.0014
Epoch [501/1000], Loss: 0.0008
Epoch [601/1000], Loss: 0.0011
Epoch [701/1000], Loss: 0.0010
Epoch [801/1000], Loss: 0.0007
Epoch [901/1000], Loss: 0.0006


In [7]:
# 使用训练好的模型对数据进行降维
encoded_data = model.encoder(nor_data_tensor)

In [8]:
nor_data_tensor

tensor([[0.6131, 0.5153, 0.4467,  ..., 0.4749, 0.3460, 0.4236],
        [0.6731, 0.4737, 0.4691,  ..., 0.4936, 0.3113, 0.5068],
        [0.6626, 0.4508, 0.4821,  ..., 0.4458, 0.4158, 0.4828],
        ...,
        [0.6257, 0.5087, 0.4559,  ..., 0.4788, 0.3211, 0.4210],
        [0.6404, 0.4866, 0.4323,  ..., 0.4856, 0.2900, 0.4329],
        [0.6162, 0.4467, 0.4790,  ..., 0.4762, 0.3068, 0.4674]],
       device='cuda:0')

In [9]:
data_tensor

tensor([[11.8287,  9.5239,  7.9073,  ...,  8.5732,  5.5354,  7.3647],
        [13.2413,  8.5433,  8.4357,  ...,  9.0120,  4.7177,  9.3233],
        [12.9936,  8.0050,  8.7417,  ...,  7.8872,  7.1810,  8.7589],
        ...,
        [12.1239,  9.3687,  8.1248,  ...,  8.6634,  4.9483,  7.3026],
        [12.4712,  8.8489,  7.5696,  ...,  8.8234,  4.2163,  7.5835],
        [11.9005,  7.9081,  8.6694,  ...,  8.6022,  4.6114,  8.3969]],
       device='cuda:0')

In [10]:
encoded_data

tensor([[1.5095, 0.9459, 0.0000,  ..., 0.0000, 0.0000, 0.8093],
        [1.7483, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 2.0990],
        [0.2235, 0.6540, 0.0000,  ..., 0.0000, 0.0000, 1.1112],
        ...,
        [0.7289, 1.9455, 0.0000,  ..., 0.0000, 0.0000, 0.7178],
        [0.4070, 1.1969, 0.0000,  ..., 0.0000, 0.0000, 0.1429],
        [0.3198, 0.2224, 0.0000,  ..., 0.0000, 0.0000, 0.1588]],
       device='cuda:0', grad_fn=<ReluBackward0>)

In [11]:
model.decoder(encoded_data)

tensor([[0.6173, 0.5034, 0.4290,  ..., 0.4702, 0.3797, 0.4209],
        [0.6746, 0.4700, 0.4597,  ..., 0.5012, 0.3022, 0.5044],
        [0.6533, 0.4342, 0.4575,  ..., 0.4666, 0.3960, 0.4833],
        ...,
        [0.6149, 0.4996, 0.4395,  ..., 0.4764, 0.3979, 0.4400],
        [0.6242, 0.4772, 0.4343,  ..., 0.4845, 0.3158, 0.4413],
        [0.6120, 0.4552, 0.4642,  ..., 0.4797, 0.3144, 0.4534]],
       device='cuda:0', grad_fn=<SigmoidBackward0>)

In [12]:
decoder_data_tensor = min_max_denormalize(model.decoder(encoded_data),ae_min,ae_max)
decoder_data_tensor

tensor([[11.9266,  9.2438,  7.4902,  ...,  8.4627,  6.3296,  7.3013],
        [13.2775,  8.4568,  8.2137,  ...,  9.1930,  4.5050,  9.2669],
        [12.7740,  7.6133,  8.1622,  ...,  8.3760,  6.7132,  8.7697],
        ...,
        [11.8704,  9.1547,  7.7390,  ...,  8.6067,  6.7595,  7.7507],
        [12.0904,  8.6265,  7.6169,  ...,  8.7994,  4.8251,  7.7815],
        [11.8020,  8.1084,  8.3207,  ...,  8.6859,  4.7912,  8.0670]],
       device='cuda:0', grad_fn=<AddBackward0>)

In [13]:
np.savez('GSE25066_AutoEncoder.npz', origin_data=data_tensor.cpu().detach().numpy(), decoder_data=decoder_data_tensor.cpu().detach().numpy())

In [14]:
torch.save(model.state_dict(), 'GSE25066_AE.pth')

# 生成对抗网络

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义生成器（Generator）
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)

# 定义判别器（Discriminator）
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# 超参数设置
input_dim = 64
data_dim = 512
lr = 0.00001
epochs = 1000
batch_size = 64

# 初始化生成器和判别器
generator = Generator(input_dim, data_dim).to(device)
discriminator = Discriminator(data_dim).to(device)

# 设置优化器
g_optimizer = optim.Adam(generator.parameters(), lr=lr)
d_optimizer = optim.Adam(discriminator.parameters(), lr=lr)

# 设置损失函数
loss_func = nn.BCELoss()



In [16]:
encoded_data.shape

torch.Size([99, 512])

In [17]:
gan_dataset = TensorDataset(encoded_data,encoded_data)
gan_dataloader = DataLoader(gan_dataset, batch_size=batch_size, shuffle=True)

In [20]:
discriminator.train()
generator.train()
min_loss = 1000
for epoch in range(10000):
    for data in gan_dataloader:
        # print("********")
        real_data, _ = data
        real_data = real_data.to(device)
        # 训练判别器
        d_optimizer.zero_grad()
        real_label = torch.ones(real_data.shape[0], 1).to(device)

        fake_data = generator(torch.randn(real_data.shape[0], input_dim).to(device)).detach()
        fake_label = torch.zeros(real_data.shape[0], 1).to(device)
        real_out = discriminator(real_data)
        fake_out = discriminator(fake_data)
        real_loss = loss_func(real_out, real_label)
        fake_loss = loss_func(fake_out, fake_label)
        d_loss = real_loss + fake_loss
        d_loss.backward(retain_graph=True)
        d_optimizer.step()

        # 训练生成器
        g_optimizer.zero_grad()
        gen_input = torch.randn(real_data.shape[0], input_dim).to(device)
        gen_output = generator(gen_input)
        dis_output = discriminator(gen_output)

        g_loss = loss_func(dis_output, real_label)

        g_loss.backward()
        g_optimizer.step()
        if g_loss.item() < min_loss:
            torch.save(generator.state_dict(), "generator_0.pth")
    if epoch % 10 == 0:
        print("Epoch: {}, G_Loss: {:.4f}, D_Loss: {:.4f}".format(epoch, g_loss.item(), d_loss.item()))


Epoch: 0, G_Loss: 0.9707, D_Loss: 0.6754
Epoch: 10, G_Loss: 0.9754, D_Loss: 0.6703
Epoch: 20, G_Loss: 0.9696, D_Loss: 0.6750
Epoch: 30, G_Loss: 0.9611, D_Loss: 0.6673
Epoch: 40, G_Loss: 0.9633, D_Loss: 0.6657
Epoch: 50, G_Loss: 0.9875, D_Loss: 0.6387
Epoch: 60, G_Loss: 1.0109, D_Loss: 0.6408
Epoch: 70, G_Loss: 1.0257, D_Loss: 0.6160
Epoch: 80, G_Loss: 1.0285, D_Loss: 0.6199
Epoch: 90, G_Loss: 1.0372, D_Loss: 0.6135
Epoch: 100, G_Loss: 1.0311, D_Loss: 0.6098
Epoch: 110, G_Loss: 1.0342, D_Loss: 0.6110
Epoch: 120, G_Loss: 1.0213, D_Loss: 0.6399
Epoch: 130, G_Loss: 0.9965, D_Loss: 0.6532
Epoch: 140, G_Loss: 0.9669, D_Loss: 0.6583
Epoch: 150, G_Loss: 0.9645, D_Loss: 0.6690
Epoch: 160, G_Loss: 0.9429, D_Loss: 0.6961
Epoch: 170, G_Loss: 0.9473, D_Loss: 0.6651
Epoch: 180, G_Loss: 0.9855, D_Loss: 0.6481
Epoch: 190, G_Loss: 1.0328, D_Loss: 0.6213
Epoch: 200, G_Loss: 1.0459, D_Loss: 0.6050
Epoch: 210, G_Loss: 1.0924, D_Loss: 0.5786
Epoch: 220, G_Loss: 1.0945, D_Loss: 0.5873
Epoch: 230, G_Loss: 1.

# 生成样本数据

## 查看需要生成多少样本数据

In [35]:
GSE25066_data = pd.read_csv('GSE25066_merge.csv')
GSE25066_data

Unnamed: 0,DDR1,RFC2,HSPA6,PAX8,GUCA1A,MIR5193,THRA,PTPN21,CCL5,CYP2E1,...,MINOS1-NBL1,NUS1P3,MROH7-TTC4,RBM12B,LOC102725263,FAM86B1,SNHG17,LOC100505915,NPEPL1,group
0,11.828700,9.523877,7.907263,10.750475,6.478643,8.698516,7.526617,7.276040,10.399704,6.642930,...,11.356547,6.894852,9.188312,8.628715,9.029101,8.669479,8.573230,5.535360,7.364660,0
1,13.241262,8.543289,8.435700,11.931321,7.151195,10.130569,9.457966,7.663284,6.242512,6.902703,...,11.920599,1.356235,8.111887,8.679902,9.154389,8.743268,9.011958,4.717732,9.323271,0
2,12.993611,8.004977,8.741661,11.579397,7.433843,9.621802,8.319854,7.718079,5.457564,6.741639,...,10.215395,5.618485,9.432083,7.999077,8.554622,9.404803,7.887245,7.180957,8.758899,0
3,12.028597,8.889190,7.975727,10.501003,7.320102,9.473202,8.133202,5.137284,8.062176,6.796330,...,10.992724,6.310822,9.204276,9.277398,10.488880,8.439994,9.210394,5.208338,7.218112,0
4,12.275150,7.982162,7.971315,11.017791,6.932531,9.590634,8.446712,5.407080,10.304378,7.145151,...,10.871971,5.364275,9.003275,8.045155,9.305313,8.852204,9.044612,6.692939,7.591082,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
483,11.936811,8.196444,8.358999,10.438085,6.986401,8.947285,7.340735,7.729171,6.857256,6.910038,...,11.448681,5.899329,8.921185,7.470472,9.912844,8.272279,8.474885,7.160992,7.249352,1
484,11.743730,9.206188,8.704715,10.817337,7.556436,9.359459,7.818077,7.218218,8.007647,6.902983,...,9.540868,5.655864,8.995773,8.154563,8.870963,8.862683,8.833386,6.583812,8.607642,1
485,11.763716,8.675637,8.216023,10.596750,6.840312,9.642787,7.412919,7.579046,7.749030,6.708932,...,10.964748,7.051643,8.625504,8.041096,9.241797,8.762348,8.793579,5.173663,7.033699,1
486,10.838269,7.985254,8.092749,11.253446,7.598196,9.082484,8.112567,7.139708,9.324205,7.000705,...,12.008883,5.553036,9.147902,6.768057,8.748552,8.072324,8.026667,6.329411,7.726061,1


In [41]:
gen_num = len(GSE25066_data[GSE25066_data['group'] == 1]) - len(GSE25066_data[GSE25066_data['group'] == 0])
gen_num

290

In [42]:
gen_z = torch.randn(gen_num, input_dim).to(device)
gen_z.shape

torch.Size([290, 64])

In [43]:
gen_data = generator(gen_z)
gen_data

tensor([[-0.0712, -0.4303, -0.4074,  ..., -0.4893,  0.9662, -0.1570],
        [ 0.0916, -0.4446, -0.2520,  ..., -0.4837,  0.9743,  0.0898],
        [ 0.0192, -0.5061, -0.3333,  ..., -0.4325,  0.9630, -0.0148],
        ...,
        [ 0.0377, -0.5254, -0.2750,  ..., -0.4627,  0.9627, -0.1331],
        [-0.0554, -0.5146, -0.2699,  ..., -0.3765,  0.9422,  0.0330],
        [-0.1753, -0.3198, -0.3283,  ..., -0.4824,  0.9643, -0.1702]],
       device='cuda:0', grad_fn=<TanhBackward0>)

## 解码

In [44]:
gen_normal_data = model.decoder(gen_data)
gen_normal_data

tensor([[0.5488, 0.4889, 0.4783,  ..., 0.4885, 0.4670, 0.4912],
        [0.5461, 0.4887, 0.4768,  ..., 0.4886, 0.4673, 0.4842],
        [0.5514, 0.4869, 0.4808,  ..., 0.4955, 0.4703, 0.4812],
        ...,
        [0.5543, 0.4882, 0.4829,  ..., 0.4995, 0.4617, 0.4919],
        [0.5494, 0.4882, 0.4816,  ..., 0.4910, 0.4695, 0.4931],
        [0.5478, 0.4854, 0.4799,  ..., 0.4932, 0.4606, 0.4827]],
       device='cuda:0', grad_fn=<SigmoidBackward0>)

## minmax反归一化

In [45]:
gen_denormal_data = min_max_denormalize(gen_normal_data, ae_min, ae_max)
gen_denormal_data

tensor([[10.3133,  8.9024,  8.6536,  ...,  8.8925,  8.3868,  8.9561],
        [10.2505,  8.8966,  8.6180,  ...,  8.8948,  8.3936,  8.7923],
        [10.3737,  8.8551,  8.7103,  ...,  9.0584,  8.4636,  8.7215],
        ...,
        [10.4431,  8.8861,  8.7620,  ...,  9.1514,  8.2624,  8.9730],
        [10.3276,  8.8863,  8.7310,  ...,  8.9520,  8.4444,  9.0019],
        [10.2894,  8.8209,  8.6894,  ...,  9.0030,  8.2355,  8.7554]],
       device='cuda:0', grad_fn=<AddBackward0>)

In [62]:
gen_data_np = gen_denormal_data.cpu().detach().numpy()
gen_data_np

array([[10.31335  ,  8.902443 ,  8.653562 , ...,  8.892497 ,  8.386763 ,
         8.956055 ],
       [10.250475 ,  8.896602 ,  8.618033 , ...,  8.894805 ,  8.393645 ,
         8.79229  ],
       [10.3737335,  8.855096 ,  8.710294 , ...,  9.058387 ,  8.463587 ,
         8.721526 ],
       ...,
       [10.443093 ,  8.886114 ,  8.761951 , ...,  9.151364 ,  8.26241  ,
         8.972986 ],
       [10.327574 ,  8.88632  ,  8.731006 , ...,  8.952026 ,  8.444355 ,
         9.001902 ],
       [10.289376 ,  8.820926 ,  8.689403 , ...,  9.003032 ,  8.235528 ,
         8.755377 ]], dtype=float32)

In [63]:
zeros_column = np.zeros((gen_data_np.shape[0], 1))

# 将列向量与原始数组水平堆叠
gen_data_np = np.hstack((gen_data_np, zeros_column))
gen_data_np, gen_data_np.shape

(array([[10.31334972,  8.90244293,  8.65356159, ...,  8.38676262,
          8.95605469,  0.        ],
        [10.25047493,  8.89660168,  8.61803341, ...,  8.39364529,
          8.79228973,  0.        ],
        [10.37373352,  8.85509586,  8.71029377, ...,  8.46358681,
          8.72152615,  0.        ],
        ...,
        [10.4430933 ,  8.88611412,  8.76195145, ...,  8.26241016,
          8.97298622,  0.        ],
        [10.32757378,  8.88632011,  8.73100567, ...,  8.44435501,
          9.00190163,  0.        ],
        [10.28937626,  8.82092571,  8.68940258, ...,  8.23552799,
          8.75537682,  0.        ]]),
 (290, 13237))

In [64]:
# 将NumPy数组转换为DataFrame
arr_df = pd.DataFrame(gen_data_np, columns=GSE25066_data.columns.values)

# 将两个DataFrame对象沿着行的方向连接
new_GSE25066_df = pd.concat([GSE25066_data, arr_df], axis=0)

In [65]:
new_GSE25066_df

Unnamed: 0,DDR1,RFC2,HSPA6,PAX8,GUCA1A,MIR5193,THRA,PTPN21,CCL5,CYP2E1,...,MINOS1-NBL1,NUS1P3,MROH7-TTC4,RBM12B,LOC102725263,FAM86B1,SNHG17,LOC100505915,NPEPL1,group
0,11.828700,9.523877,7.907263,10.750475,6.478643,8.698516,7.526617,7.276040,10.399704,6.642930,...,11.356547,6.894852,9.188312,8.628715,9.029101,8.669479,8.573230,5.535360,7.364660,0.0
1,13.241262,8.543289,8.435700,11.931321,7.151195,10.130569,9.457966,7.663284,6.242512,6.902703,...,11.920599,1.356235,8.111887,8.679902,9.154389,8.743268,9.011958,4.717732,9.323271,0.0
2,12.993611,8.004977,8.741661,11.579397,7.433843,9.621802,8.319854,7.718079,5.457564,6.741639,...,10.215395,5.618485,9.432083,7.999077,8.554622,9.404803,7.887245,7.180957,8.758899,0.0
3,12.028597,8.889190,7.975727,10.501003,7.320102,9.473202,8.133202,5.137284,8.062176,6.796330,...,10.992724,6.310822,9.204276,9.277398,10.488880,8.439994,9.210394,5.208338,7.218112,0.0
4,12.275150,7.982162,7.971315,11.017791,6.932531,9.590634,8.446712,5.407080,10.304378,7.145151,...,10.871971,5.364275,9.003275,8.045155,9.305313,8.852204,9.044612,6.692939,7.591082,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,10.501408,9.016302,8.783835,9.680231,8.268755,9.827520,8.895333,8.399132,9.558890,7.797887,...,10.109839,8.129112,8.990020,8.409897,9.251225,8.685712,9.164466,8.595016,8.784922,0.0
286,10.289022,8.815002,8.574663,9.619898,8.465206,9.673041,8.824081,8.257421,9.393159,7.811494,...,10.509812,8.167474,9.085081,8.492114,8.766034,8.326267,8.972200,8.277074,8.914538,0.0
287,10.443093,8.886114,8.761951,9.798333,8.262609,9.813212,8.984246,8.270182,9.403510,7.790942,...,10.265976,8.106213,9.036253,8.514579,8.819316,8.392213,9.151364,8.262410,8.972986,0.0
288,10.327574,8.886320,8.731006,9.709379,8.298635,9.813801,8.952147,8.470894,9.507957,7.834961,...,10.216570,8.106966,9.032146,8.510935,8.863640,8.697643,8.952026,8.444355,9.001902,0.0


In [69]:
new_GSE25066_df.to_csv('gan_GSE25066.csv', index=False)

In [93]:
gan_GSE25066_pCR = new_GSE25066_df[new_GSE25066_df['group'] == 0].drop('group',axis=1)

In [94]:
GSE25066_pCR_df = pd.read_csv('GSE25066_pCR.csv')
GSE25066_pCR_df.columns.values[0] = 'gene_name'
GSE25066_pCR_df

Unnamed: 0,gene_name,GSM615099,GSM615110,GSM615122,GSM615123,GSM615137,GSM615139,GSM615140,GSM615143,GSM615146,...,GSM615775,GSM615779,GSM615780,GSM615782,GSM615793,GSM615798,GSM615802,GSM615803,GSM615822,GSM615824
0,DDR1,11.828700,13.241262,12.993611,12.028597,12.275150,12.313212,12.576470,12.205089,11.714574,...,11.400452,11.965518,11.383388,11.202747,12.591981,12.266062,11.559467,12.123890,12.471186,11.900511
1,RFC2,9.523877,8.543289,8.004977,8.889190,7.982162,8.163444,7.916820,7.414843,8.380203,...,8.550332,8.442084,8.425894,8.053900,8.951475,9.241684,8.769740,9.368690,8.848875,7.908133
2,HSPA6,7.907263,8.435700,8.741661,7.975727,7.971315,8.757782,9.215672,8.072444,7.661083,...,8.216528,7.794136,8.272750,8.701260,7.726220,8.109099,7.720345,8.124811,7.569580,8.669415
3,PAX8,10.750475,11.931321,11.579397,10.501003,11.017791,10.735321,11.066361,11.345411,10.635503,...,10.267392,10.536505,10.527342,11.137687,10.505399,11.196548,10.309222,10.881557,10.452863,11.107900
4,GUCA1A,6.478643,7.151195,7.433843,7.320102,6.932531,7.479115,7.995924,6.886549,6.891103,...,7.100183,6.186551,6.288648,6.973356,6.502955,7.338256,6.762156,7.114815,7.006652,7.381252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13231,LOC102725263,9.029101,9.154389,8.554622,10.488880,9.305313,8.863490,9.031884,10.985185,8.159242,...,9.025092,9.003402,8.057661,8.880101,8.521056,10.591930,9.048223,9.584406,9.236633,9.360416
13232,FAM86B1,8.669479,8.743268,9.404803,8.439994,8.852204,7.628408,7.984313,8.763259,8.467488,...,8.650094,8.681698,8.124114,8.650458,9.090492,8.196299,8.145812,8.930901,8.702834,8.380616
13233,SNHG17,8.573230,9.011958,7.887245,9.210394,9.044612,8.507519,9.539290,9.337777,8.359296,...,8.825160,8.147968,9.106360,8.949937,8.426192,9.314493,8.131871,8.663448,8.823441,8.602202
13234,LOC100505915,5.535360,4.717732,7.180957,5.208338,6.692939,7.099980,4.862343,4.648788,6.277681,...,5.794316,4.617974,6.251629,4.892419,6.506186,8.032424,7.643098,4.948300,4.216281,4.611442


In [95]:
# 使用reset_index方法重新排序索引
gan_GSE25066_pCR = gan_GSE25066_pCR.reset_index(drop=True)

In [97]:
# 使用rename方法修改列名
gan_GSE25066_pCR = gan_GSE25066_pCR.T.rename(columns=lambda x: 'GSM' + str(x))
gan_GSE25066_pCR

Unnamed: 0,GSM0,GSM1,GSM2,GSM3,GSM4,GSM5,GSM6,GSM7,GSM8,GSM9,...,GSM379,GSM380,GSM381,GSM382,GSM383,GSM384,GSM385,GSM386,GSM387,GSM388
DDR1,11.828700,13.241262,12.993611,12.028597,12.275150,12.313212,12.576470,12.205089,11.714574,11.861684,...,10.253498,10.283988,10.608132,10.496068,10.450909,10.501408,10.289022,10.443093,10.327574,10.289376
RFC2,9.523877,8.543289,8.004977,8.889190,7.982162,8.163444,7.916820,7.414843,8.380203,9.088528,...,9.082888,8.876357,8.815203,8.749627,8.853852,9.016302,8.815002,8.886114,8.886320,8.820926
HSPA6,7.907263,8.435700,8.741661,7.975727,7.971315,8.757782,9.215672,8.072444,7.661083,7.861307,...,8.726858,8.599212,8.489453,8.678428,8.578940,8.783835,8.574663,8.761951,8.731006,8.689403
PAX8,10.750475,11.931321,11.579397,10.501003,11.017791,10.735321,11.066361,11.345411,10.635503,10.659652,...,9.610138,9.764252,9.736207,9.511065,9.531338,9.680231,9.619898,9.798333,9.709379,9.670519
GUCA1A,6.478643,7.151195,7.433843,7.320102,6.932531,7.479115,7.995924,6.886549,6.891103,6.887889,...,8.282845,8.467445,8.504788,8.248045,8.300835,8.268755,8.465206,8.262609,8.298635,8.018137
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LOC102725263,9.029101,9.154389,8.554622,10.488880,9.305313,8.863490,9.031884,10.985185,8.159242,9.533490,...,9.013741,8.846117,8.986135,8.953793,8.901990,9.251225,8.766034,8.819316,8.863640,9.012495
FAM86B1,8.669479,8.743268,9.404803,8.439994,8.852204,7.628408,7.984313,8.763259,8.467488,8.693230,...,8.496355,8.602278,8.312710,8.225665,8.451160,8.685712,8.326267,8.392213,8.697643,8.843158
SNHG17,8.573230,9.011958,7.887245,9.210394,9.044612,8.507519,9.539290,9.337777,8.359296,8.182982,...,9.027749,8.833577,9.129616,9.035217,8.990586,9.164466,8.972200,9.151364,8.952026,9.003032
LOC100505915,5.535360,4.717732,7.180957,5.208338,6.692939,7.099980,4.862343,4.648788,6.277681,5.648270,...,8.488754,8.453042,8.331416,8.602941,8.268628,8.595016,8.277074,8.262410,8.444355,8.235528


In [98]:
gan_GSE25066_pCR.to_csv('gan_GSE25066_pCR.csv')