In [None]:
# Code to download file into Colaboratory:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Breast data -----------------------
#miRNAseq
id = "1vm20ztM8XgefR1kfZiwld1fUHuVDopB_"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('breast_miRnaSeq.csv')

#train clinical 8:2
id = "1wGYJhL2ltGg0_jGnklyTDOkuVkk9E69p"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('train_clinical82_B.csv')

#test clinical 8:2
id = "1oU9ZSvWz_Xamc_EKDfHCNEbsI1wrh2wY"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('test_clinical82_B.csv')

#train clinical 7:3
id = "10-4o-p0uSQl8qc8a23wU0Ibw2ul0MVJh"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('train_clinical73_B.csv')

#test clinical 7:3
id = "1QnUOPgm3pKgr26ZntdNb_uuoc_6P2Uxy"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('test_clinical73_B.csv')

# Glioma data ---------------------------------
#miRNAseq
id = "1zfyAT1EIo6VXI5ZGEmt2-DZHHZGN2J9M"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('glioma_miRnaSeq.csv')

#train clinical 8:2
id = "1pXxRr81rk9lGVHY41Eovw05g11Y-00fD"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('train_clinical82_G.csv')

#test clinical 8:2
id = "1oONCRsD38fmUM-Z9sxsFeLFAoKIprKkT"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('test_clinical82_G.csv')

#train clinical 7:3
id = "1vOx2bB3d0Psj2XmAETJzNTSAxu9HCyNi"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('train_clinical73_G.csv')

#test clinical 7:3
id = "1JMbn7PP4YHC-PzrrqDhzPJRmtvrtzPUO"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('test_clinical73_G.csv')

In [None]:
import pandas as pd
import numpy as np
import tensorflow as ts
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torch.optim as optim
import nltk
import sklearn
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold


## MiRNA Cross Validation Models


In [None]:
# build up ae model for mirna
class autoencoder(nn.Module):
  def __init__(self):
    super(autoencoder, self).__init__()
    self.encoder = nn.Sequential(
      nn.Linear(embed_dim, 256),
      nn.ReLU(True),
      nn.Linear(256, 128))
    self.decoder = nn.Sequential(
      nn.Linear(128, 256),
      nn.ReLU(True), 
      nn.Linear(256, embed_dim), 
      nn.Tanh())
    
  def forward(self, x):
      x = self.encoder(x)
      x = self.decoder(x)
      return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(123)

#train_torch = torch.FloatTensor(train_scaled)
#test_torch = torch.FloatTensor(test_scaled)

criterion = nn.MSELoss()
#optimizer = optim.Adam(model.parameters(), lr=0.001)

k = 10
batch_size = 128
epochs = 10
lr = 0.001

kfold = KFold(n_splits=10, random_state=123, shuffle=True)


In [None]:
#define train function and validation function
def train_epoch(model, device, optimizer, loss_func, dataloader):
  model.train()
  train_loss = 0.0

  for features in dataloader:

    features = features.to(device)
    optimizer.zero_grad()
    output = model(features)
    loss = loss_func(output, features)
    
    loss.backward()
    optimizer.step()

    train_loss += loss.item()

  train_loss = train_loss/len(dataloader)

  return train_loss
  
def validation_epoch(model, device, loss_func, dataloader):
  model.eval()
  valid_loss = 0.0

  for features in dataloader:
    features = features.to(device)
    output = model(features)
    loss = loss_func(output, features)

    valid_loss += loss.item()
  
  valid_loss = valid_loss/len(dataloader)

  return valid_loss

## Load Breast data


In [None]:
breast_miRnaSeq = pd.read_csv('breast_miRnaSeq.csv')

In [None]:
breast_miRnaSeq = breast_miRnaSeq.rename(columns={"Unnamed: 0": "attrib_name"})
breast_miRnaSeq

Unnamed: 0,attrib_name,hsa-let-7a-1,hsa-let-7a-2,hsa-let-7a-3,hsa-let-7b,hsa-let-7c,hsa-let-7d,hsa-let-7e,hsa-let-7f-1,hsa-let-7f-2,hsa-let-7g,hsa-let-7i,hsa-mir-1-1,hsa-mir-1-2,hsa-mir-100,hsa-mir-101-1,hsa-mir-101-2,hsa-mir-103-1,hsa-mir-103-2,hsa-mir-105-1,hsa-mir-105-2,hsa-mir-106a,hsa-mir-106b,hsa-mir-107,hsa-mir-10a,hsa-mir-10b,hsa-mir-1178,hsa-mir-1179,hsa-mir-1180,hsa-mir-1181,hsa-mir-1182,hsa-mir-1184-1,hsa-mir-1185-1,hsa-mir-1185-2,hsa-mir-1193,hsa-mir-1197,hsa-mir-122,hsa-mir-1224,hsa-mir-1225,hsa-mir-1226,...,hsa-mir-769,hsa-mir-770,hsa-mir-802,hsa-mir-873,hsa-mir-874,hsa-mir-875,hsa-mir-876,hsa-mir-877,hsa-mir-885,hsa-mir-887,hsa-mir-888,hsa-mir-889,hsa-mir-891a,hsa-mir-891b,hsa-mir-892a,hsa-mir-892b,hsa-mir-9-1,hsa-mir-9-2,hsa-mir-9-3,hsa-mir-922,hsa-mir-92a-1,hsa-mir-92a-2,hsa-mir-92b,hsa-mir-93,hsa-mir-933,hsa-mir-934,hsa-mir-935,hsa-mir-937,hsa-mir-938,hsa-mir-939,hsa-mir-940,hsa-mir-941-1,hsa-mir-942,hsa-mir-943,hsa-mir-944,hsa-mir-95,hsa-mir-96,hsa-mir-98,hsa-mir-99a,hsa-mir-99b
0,TCGA.3C.AAAU,13.1299,14.1180,13.1479,14.5952,8.4191,8.6695,10.5228,3.9742,11.8252,8.6015,7.5532,0.0,2.6652,11.3997,14.5587,6.2707,14.5848,3.9037,3.5582,3.7514,5.4386,9.4079,6.6797,16.8186,15.6674,0.0,0.6438,5.0713,0.1293,0.0,0.0,0.1293,0.0000,0.0000,0.1293,0.1293,3.2944,0.0,1.7003,...,5.6819,0.0000,0.0,0.0000,4.9670,0.0,0.0,2.3574,2.2478,1.7003,0.0000,2.1597,0.6438,0.0,0.0000,0.0,12.6179,12.6201,3.7209,0.0000,9.4599,13.2644,7.8001,12.6861,0.0,0.0000,2.9187,1.5234,0.0,0.9541,2.8825,0.0,2.7072,0.2479,1.6146,2.3574,6.9187,5.7812,7.0356,15.5065
1,TCGA.3C.AALI,12.9183,13.9224,12.9134,14.5127,9.6483,9.0065,9.1343,4.4543,12.6791,8.4592,8.3933,0.0,1.2725,10.4019,11.7145,4.7270,14.2665,3.1224,4.8890,5.0207,4.9924,9.0846,5.8527,16.5135,14.8510,0.0,0.0000,3.9369,0.2108,0.0,0.0,0.2108,0.0000,0.0000,0.0000,0.0000,1.0712,0.0,0.8372,...,4.7940,0.0000,0.0,0.2108,4.1046,0.0,0.0,1.6791,0.0000,1.8774,0.0000,3.2941,0.8372,0.0,0.0000,0.0,11.0285,10.9998,1.5299,0.0000,8.2275,12.0930,6.4148,12.7541,0.0,0.3946,0.7042,1.9960,0.0,0.9589,1.0712,0.0,3.1985,0.0000,0.7042,1.1753,5.9366,6.4437,7.8914,13.6263
2,TCGA.3C.AALJ,13.0122,14.0101,13.0287,13.4197,9.3147,9.2793,11.3962,5.3505,13.5304,9.2330,8.6462,0.0,1.5722,10.1615,12.7569,6.0424,15.5417,4.7364,0.0000,0.0000,3.3047,9.3150,7.1969,13.2810,13.9550,0.0,0.1501,5.4112,0.2860,0.0,0.0,0.4103,0.0000,0.0000,0.1501,0.1501,0.0000,0.0,1.9451,...,5.1951,0.0000,0.0,0.0000,4.2548,0.0,0.0,1.5722,0.0000,3.4420,0.0000,4.4556,2.1371,0.0,0.1501,0.0,11.5465,11.5501,2.5146,0.0000,8.7468,12.5859,6.4639,13.5271,0.0,0.0000,1.8605,3.4126,0.0,0.2860,1.2114,0.0,2.2073,0.0000,0.8217,1.4031,6.6184,6.8905,7.5882,15.0139
3,TCGA.3C.AALK,13.1449,14.1418,13.1514,14.6673,11.5119,8.3891,10.3701,4.2378,12.6528,8.4756,8.8445,0.0,1.9434,12.0477,13.3707,6.0950,14.7991,2.8090,0.0000,0.0000,3.1025,8.5782,6.2378,14.6769,15.6076,0.0,0.0000,3.5931,0.0000,0.0,0.0,0.2118,0.3964,0.0000,0.2118,0.0000,0.2118,0.0,0.5601,...,4.8174,0.2118,0.0,0.0000,4.1760,0.0,0.0,0.5601,1.1794,2.2130,0.0000,4.1760,0.0000,0.0,0.0000,0.0,8.0441,8.0152,0.2118,0.0000,8.9794,13.1043,7.5156,12.6507,0.0,0.8404,0.3964,1.5347,0.0,0.7070,0.9625,0.0,2.1630,0.0000,2.1630,1.4538,6.2011,5.4122,10.0330,14.5548
4,TCGA.4H.AAAK,13.4118,14.4136,13.4206,14.4386,11.6944,8.4579,10.7422,4.5572,13.0097,8.3855,8.5531,0.0,1.5588,11.6705,13.3152,6.2279,14.5494,2.9655,0.2168,0.2168,3.3567,8.0015,5.5363,14.6162,16.0421,0.0,0.0000,3.8540,0.0000,0.0,0.0,0.2168,0.0000,0.0000,0.2168,0.0000,0.0000,0.0,0.5719,...,4.4108,0.0000,0.0,0.0000,3.9018,0.0,0.0,0.5719,0.8567,1.9096,0.0000,4.2724,0.8567,0.0,0.0000,0.0,6.1770,6.2860,0.4053,0.0000,9.3134,13.1844,5.4692,12.3728,0.0,0.4053,1.8459,1.0944,0.0,0.7213,0.4053,0.0,1.6361,0.0000,1.0944,0.8567,4.3887,5.1449,10.0795,14.6504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
611,TCGA.WT.AB44,13.3759,14.3667,13.3700,14.5141,11.9267,8.5241,10.5369,4.6090,13.3679,9.1616,8.8370,0.0,2.5197,11.5382,12.9396,6.2443,14.4496,2.5534,3.2918,3.3503,4.3844,8.4750,6.1430,17.0994,15.2689,0.0,0.3455,3.4608,0.0000,0.0,0.0,0.1830,0.0000,0.0000,0.1830,0.0000,0.3455,0.0,0.8573,...,4.3751,0.0000,0.0,0.0000,4.2878,0.0,0.0,0.4914,0.4914,3.1228,0.0000,4.0136,0.6240,0.0,0.0000,0.0,6.3793,6.2131,0.0000,0.0000,8.6294,12.5127,7.5779,12.4628,0.0,1.5989,0.1830,1.0581,0.0,0.0000,0.8573,0.0,1.1489,0.0000,0.4914,1.1489,6.3365,5.8735,10.1058,13.9961
612,TCGA.XX.A899,14.0362,15.0364,14.0434,14.3396,12.3620,7.6630,11.0863,5.2437,14.1539,9.7670,9.0186,0.0,2.1463,12.5628,13.4669,7.2592,14.7800,2.7538,0.2649,0.1385,3.1800,9.0217,6.1609,15.0037,16.0544,0.0,0.1385,3.8981,0.0000,0.0,0.0,0.5887,0.0000,0.0000,0.1385,1.0763,0.1385,0.0,0.2649,...,4.0106,0.1385,0.0,0.1385,4.3753,0.0,0.0,0.7702,1.0763,1.3855,0.0000,4.5469,0.8530,0.0,0.0000,0.0,7.5925,7.6479,0.5887,0.1385,9.1722,13.0447,6.9805,12.2265,0.0,2.5203,1.1437,1.2080,0.0,0.5887,0.5887,0.0,2.6649,0.0000,2.0442,1.7304,4.1400,5.8666,10.9073,13.7815
613,TCGA.XX.A89A,13.6797,14.6849,13.6916,14.1983,12.6844,8.5569,11.3152,4.8829,14.0395,9.6062,9.0163,0.0,0.9498,12.3469,12.0229,5.3726,14.3635,3.2864,0.0000,0.0000,7.5085,8.8239,5.8111,15.7941,17.0116,0.0,0.0000,2.7603,0.4569,0.0,0.0,0.9498,0.6406,0.0000,0.0000,0.0000,0.2465,0.0,0.8034,...,4.5108,0.4569,0.0,0.2465,4.0735,0.0,0.0,0.8034,0.0000,2.1827,0.2465,4.6565,2.6361,0.0,0.0000,0.0,6.7034,6.7491,0.8034,0.0000,8.6401,12.0553,6.3452,12.0652,0.0,0.4569,0.4569,0.4569,0.0,0.6406,1.4205,0.0,1.5176,0.0000,0.6406,0.9498,5.4236,5.7870,10.3566,14.0595
614,TCGA.Z7.A8R5,12.9623,13.9664,12.9851,14.3207,11.9806,8.2123,9.9116,4.6372,12.8967,8.7462,9.6088,0.0,1.8573,12.3896,13.8936,6.9814,14.6049,3.1262,0.0000,0.1866,3.2570,8.2442,5.7255,14.1535,16.0794,0.0,0.3518,3.6391,0.0000,0.0,0.0,0.5000,0.0000,0.1866,0.0000,0.1866,0.0000,0.0,0.3518,...,4.0044,0.1866,0.0,0.1866,4.2956,0.0,0.0,0.5000,0.5000,2.8227,0.0000,3.8746,0.6344,0.0,0.0000,0.0,6.6437,6.6616,0.0000,0.0000,8.9236,13.0108,5.5623,12.2234,0.0,1.1652,0.7573,0.3518,0.0,0.8706,0.9756,0.0,1.6187,0.0000,1.6821,1.4097,5.8132,5.7553,10.1318,14.1906


In [None]:

#train test split 8:2
train_clinical82_B = pd.read_csv('train_clinical82_B.csv')
test_clinical82_B = pd.read_csv('test_clinical82_B.csv')
#train test split 7:3
train_clinical73_B = pd.read_csv('train_clinical73_B.csv')
test_clinical73_B = pd.read_csv('test_clinical73_B.csv')


In [None]:
train_idx_82 = train_clinical82_B[['attrib_name']]
test_idx_82 = test_clinical82_B[['attrib_name']]

train_idx_73 = train_clinical73_B[['attrib_name']]
test_idx_73 = test_clinical73_B[['attrib_name']]

In [None]:
print(train_clinical82_B.shape)
print(test_clinical82_B.shape)

print(train_clinical73_B.shape)
print(test_clinical73_B.shape)

print(train_idx_82)

(492, 22)
(124, 22)
(431, 22)
(185, 22)
      attrib_name
0    TCGA.OK.A5Q2
1    TCGA.OL.A5D7
2    TCGA.BH.A8G0
3    TCGA.D8.A1JP
4    TCGA.E9.A24A
..            ...
487  TCGA.AC.A2QI
488  TCGA.D8.A1XC
489  TCGA.E2.A1IJ
490  TCGA.E2.A14U
491  TCGA.EW.A6SC

[492 rows x 1 columns]


In [None]:
train_clinical82_B

### Clinical 8:2


In [None]:
print(breast_miRnaSeq.shape)

train_breast_82B = breast_miRnaSeq.merge(train_idx_82, on='attrib_name', how="inner")
test_breast_82B = breast_miRnaSeq.merge(test_idx_82, on='attrib_name', how="inner")

print(train_breast_82B.shape)
print(test_breast_82B.shape)

(616, 824)
(492, 824)
(124, 824)


In [None]:
train_breast_82 = train_breast_82B.iloc[:, 1:]
test_breast_82 = test_breast_82B.iloc[:, 1:]
print(train_breast_82.shape)
print(test_breast_82.shape)

train_breast82_numpy = train_breast_82.to_numpy()
test_breast82_numpy = test_breast_82.to_numpy()

scaler_breast82 = MinMaxScaler()
train_breast82_scaled = scaler_breast82.fit_transform(train_breast82_numpy)
test_breast82_scaled = scaler_breast82.transform(test_breast82_numpy)



(492, 823)
(124, 823)


In [None]:
train_breast82_torch = torch.FloatTensor(train_breast82_scaled)
test_breast82_torch = torch.FloatTensor(test_breast82_scaled)

In [None]:
#save the attrib_name
train_breast82_name = train_breast_82B.iloc[:, 0]
test_breast82_name = test_breast_82B.iloc[:, 0]
print(train_breast82_name)
print(test_breast82_name)

train_breast82_name.to_csv("train_82B_idx.csv")
test_breast82_name.to_csv("test_82B_idx.csv")

0      TCGA.3C.AAAU
1      TCGA.3C.AALI
2      TCGA.3C.AALJ
3      TCGA.3C.AALK
4      TCGA.4H.AAAK
           ...     
487    TCGA.WT.AB41
488    TCGA.XX.A899
489    TCGA.XX.A89A
490    TCGA.Z7.A8R5
491    TCGA.Z7.A8R6
Name: attrib_name, Length: 492, dtype: object
0      TCGA.5L.AAT0
1      TCGA.A1.A0SF
2      TCGA.A1.A0SI
3      TCGA.A1.A0SQ
4      TCGA.A2.A25E
           ...     
119    TCGA.PL.A8LX
120    TCGA.S3.AA12
121    TCGA.S3.AA17
122    TCGA.W8.A86G
123    TCGA.WT.AB44
Name: attrib_name, Length: 124, dtype: object


#### breast 8:2 training and output


In [None]:
# 10 fold cross validation
train_fold_loss = []
valid_fold_loss = []
embed_dim = train_breast82_torch.shape[1]

# assign train_torch and test_torch
train_torch = train_breast82_torch
test_torch = test_breast82_torch

for k, (train_idx,valid_idx) in enumerate(kfold.split(np.arange(len(train_torch)))):
  
  print('Fold: ', k+1 )
  train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
  valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

  train_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=train_sampler
  )
  valid_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=valid_sampler
  )

  model = autoencoder().to(device)
  optimizer = optim.Adam(model.parameters(), lr=lr)

  for epoch in range(epochs):
    train_loss = train_epoch(model, device, optimizer, criterion, train_loader)
    valid_loss = validation_epoch(model, device, criterion, valid_loader)

    print('epoch [{}/{}], train_loss:{:.4f}, valid_loss:{:.4f}'
          .format(epoch + 1, epochs, train_loss, valid_loss))
    
  train_fold_loss.append(train_loss)
  valid_fold_loss.append(valid_loss)

Fold:  1
epoch [1/10], train_loss:0.0880, valid_loss:0.0519
epoch [2/10], train_loss:0.0401, valid_loss:0.0254
epoch [3/10], train_loss:0.0238, valid_loss:0.0223
epoch [4/10], train_loss:0.0214, valid_loss:0.0205
epoch [5/10], train_loss:0.0199, valid_loss:0.0196
epoch [6/10], train_loss:0.0192, valid_loss:0.0189
epoch [7/10], train_loss:0.0190, valid_loss:0.0187
epoch [8/10], train_loss:0.0188, valid_loss:0.0185
epoch [9/10], train_loss:0.0183, valid_loss:0.0182
epoch [10/10], train_loss:0.0181, valid_loss:0.0181
Fold:  2
epoch [1/10], train_loss:0.0903, valid_loss:0.0576
epoch [2/10], train_loss:0.0445, valid_loss:0.0264
epoch [3/10], train_loss:0.0247, valid_loss:0.0225
epoch [4/10], train_loss:0.0218, valid_loss:0.0205
epoch [5/10], train_loss:0.0204, valid_loss:0.0193
epoch [6/10], train_loss:0.0197, valid_loss:0.0186
epoch [7/10], train_loss:0.0188, valid_loss:0.0183
epoch [8/10], train_loss:0.0190, valid_loss:0.0181
epoch [9/10], train_loss:0.0184, valid_loss:0.0180
epoch [10/10

In [None]:
# 10 fold cv diagram

In [None]:
# formal training
train_breast82_loader = torch.utils.data.DataLoader(
      train_breast82_torch, batch_size=batch_size, shuffle=True
  )
test_breast82_loader = torch.utils.data.DataLoader(
      test_breast82_torch, batch_size=batch_size, shuffle=False
  )

model_mirna_82B = autoencoder().to(device)
optimizer = optim.Adam(model_mirna_82B.parameters(), lr=lr)

for epoch in range(epochs):
  train_loss = train_epoch(model_mirna_82B, device, optimizer, criterion, train_breast82_loader)

  print('epoch [{}/{}], train_loss:{:.4f}'
        .format(epoch + 1, epochs, train_loss))

epoch [1/10], train_loss:0.0921
epoch [2/10], train_loss:0.0467
epoch [3/10], train_loss:0.0250
epoch [4/10], train_loss:0.0217
epoch [5/10], train_loss:0.0203
epoch [6/10], train_loss:0.0193
epoch [7/10], train_loss:0.0187
epoch [8/10], train_loss:0.0184
epoch [9/10], train_loss:0.0183
epoch [10/10], train_loss:0.0181


In [None]:
# save and load the model
torch.save(model_mirna_82B, "mirna_82B.pt")

mirna_82B = torch.load("mirna_82B.pt")
mirna_82B

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=823, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=823, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for train data only
mirna_Tr82B_output = mirna_82B.encoder(train_breast82_torch)
print(mirna_Tr82B_output.shape)
print(mirna_Tr82B_output)
mirna_Tr82B_output = mirna_Tr82B_output.cpu().detach().numpy()
np.savetxt('mirna_Tr82B.csv', mirna_Tr82B_output, delimiter=',')

torch.Size([492, 128])
tensor([[ 0.3472,  0.4769, -0.2888,  ...,  0.2508,  0.1444, -0.2590],
        [ 0.3043,  0.4275, -0.2337,  ...,  0.1890,  0.0641, -0.1972],
        [ 0.3762,  0.4149, -0.2359,  ...,  0.2076,  0.0911, -0.2133],
        ...,
        [ 0.3559,  0.5109, -0.1798,  ...,  0.2235,  0.0059, -0.2773],
        [ 0.3541,  0.4998, -0.1805,  ...,  0.2362,  0.0288, -0.2305],
        [ 0.3908,  0.5351, -0.3024,  ...,  0.2864,  0.0809, -0.2441]],
       grad_fn=<AddmmBackward>)


In [None]:
# for test data only

# import the saved model
id = "1BCbuKxU6PZnJIYAvaTLZkQ6ft4nASqzM"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('mirna_82B.pt')

mirna82B = torch.load("mirna_82B.pt")
mirna82B

In [None]:
# for test data only
mirna_Te82B_output = mirna82B.encoder(test_breast82_torch)
print(mirna_Te82B_output.shape)
print(mirna_Te82B_output)
mirna_Te82B_output = mirna_Te82B_output.cpu().detach().numpy()
np.savetxt('mirna_Test82B.csv', mirna_Te82B_output, delimiter=',')

torch.Size([124, 128])
tensor([[ 0.3236,  0.4208, -0.1996,  ...,  0.1882,  0.0578, -0.1825],
        [ 0.3562,  0.4930, -0.1992,  ...,  0.2296,  0.0623, -0.2392],
        [ 0.3755,  0.4748, -0.1927,  ...,  0.2099,  0.0559, -0.2157],
        ...,
        [ 0.3190,  0.4189, -0.2511,  ...,  0.2483,  0.0667, -0.1870],
        [ 0.3443,  0.5037, -0.2049,  ...,  0.2250, -0.0167, -0.2245],
        [ 0.3424,  0.4770, -0.2056,  ...,  0.2137,  0.0218, -0.2275]],
       grad_fn=<AddmmBackward>)


In [None]:
# for whole data only

### Clinical 7:3


In [None]:
train_breast_73B = breast_miRnaSeq.merge(train_idx_73, how="inner")
test_breast_73B = breast_miRnaSeq.merge(test_idx_73, how="inner")

In [None]:
print(train_breast_73B.shape)
print(test_breast_73B.shape)
print(train_breast_73B)
print(test_breast_73B)

(431, 824)
(185, 824)
      attrib_name  hsa-let-7a-1  ...  hsa-mir-99a  hsa-mir-99b
0    TCGA.3C.AAAU       13.1299  ...       7.0356      15.5065
1    TCGA.3C.AALI       12.9183  ...       7.8914      13.6263
2    TCGA.3C.AALJ       13.0122  ...       7.5882      15.0139
3    TCGA.3C.AALK       13.1449  ...      10.0330      14.5548
4    TCGA.4H.AAAK       13.4118  ...      10.0795      14.6504
..            ...           ...  ...          ...          ...
426  TCGA.WT.AB41       13.0353  ...       9.7652      14.4590
427  TCGA.XX.A899       14.0362  ...      10.9073      13.7815
428  TCGA.XX.A89A       13.6797  ...      10.3566      14.0595
429  TCGA.Z7.A8R5       12.9623  ...      10.1318      14.1906
430  TCGA.Z7.A8R6       13.3498  ...       8.8803      15.0567

[431 rows x 824 columns]
      attrib_name  hsa-let-7a-1  ...  hsa-mir-99a  hsa-mir-99b
0    TCGA.5L.AAT0       13.3164  ...       9.3659      14.1468
1    TCGA.A1.A0SF       12.3430  ...      10.9691      15.2262
2    TC

In [None]:
train_breast_73 = train_breast_73B.iloc[:, 1:]
test_breast_73 = test_breast_73B.iloc[:, 1:]
print(test_breast_73.shape)
print(train_breast_73.shape)
train_breast73_numpy = train_breast_73.to_numpy()
test_breast73_numpy = test_breast_73.to_numpy()

scaler_breast73 = MinMaxScaler()

train_breast73_scaled = scaler_breast73.fit_transform(train_breast73_numpy)
test_breast73_scaled = scaler_breast73.transform(test_breast73_numpy)

(185, 823)
(431, 823)


In [None]:
train_breast73_torch = torch.FloatTensor(train_breast73_scaled)
test_breast73_torch = torch.FloatTensor(test_breast73_scaled)

In [None]:
#save the attrib_name
train_breast73_name = train_breast_73B.iloc[:, 0]
test_breast73_name = test_breast_73B.iloc[:, 0]
print(train_breast73_name)
print(test_breast73_name)

train_breast73_name.to_csv("train_73B_idx.csv")
test_breast73_name.to_csv("test_73B_idx.csv")

0      TCGA.3C.AAAU
1      TCGA.3C.AALI
2      TCGA.3C.AALJ
3      TCGA.3C.AALK
4      TCGA.4H.AAAK
           ...     
426    TCGA.WT.AB41
427    TCGA.XX.A899
428    TCGA.XX.A89A
429    TCGA.Z7.A8R5
430    TCGA.Z7.A8R6
Name: attrib_name, Length: 431, dtype: object
0      TCGA.5L.AAT0
1      TCGA.A1.A0SF
2      TCGA.A1.A0SI
3      TCGA.A1.A0SQ
4      TCGA.A2.A0CO
           ...     
180    TCGA.S3.AA12
181    TCGA.S3.AA17
182    TCGA.UL.AAZ6
183    TCGA.W8.A86G
184    TCGA.WT.AB44
Name: attrib_name, Length: 185, dtype: object


#### breast 7:3 training and output


In [None]:
# 10 fold cross validation
train_fold_loss = []
valid_fold_loss = []
embed_dim = train_breast73_torch.shape[1]

# assign train_torch and test_torch
train_torch = train_breast73_torch
test_torch = tesr_breast73_torch

for k, (train_idx,valid_idx) in enumerate(kfold.split(np.arange(len(train_torch)))):
  
  print('Fold: ', k+1 )
  train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
  valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

  train_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=train_sampler
  )
  valid_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=valid_sampler
  )

  model = autoencoder().to(device)
  optimizer = optim.Adam(model.parameters(), lr=lr)

  for epoch in range(epochs):
    train_loss = train_epoch(model, device, optimizer, criterion, train_loader)
    valid_loss = validation_epoch(model, device, criterion, valid_loader)

    print('epoch [{}/{}], train_loss:{:.4f}, valid_loss:{:.4f}'
          .format(epoch + 1, epochs, train_loss, valid_loss))
    
  train_fold_loss.append(train_loss)
  valid_fold_loss.append(valid_loss)

Fold:  1
epoch [1/10], train_loss:0.0920, valid_loss:0.0624
epoch [2/10], train_loss:0.0472, valid_loss:0.0279
epoch [3/10], train_loss:0.0254, valid_loss:0.0227
epoch [4/10], train_loss:0.0218, valid_loss:0.0215
epoch [5/10], train_loss:0.0211, valid_loss:0.0202
epoch [6/10], train_loss:0.0193, valid_loss:0.0193
epoch [7/10], train_loss:0.0198, valid_loss:0.0189
epoch [8/10], train_loss:0.0205, valid_loss:0.0189
epoch [9/10], train_loss:0.0192, valid_loss:0.0188
epoch [10/10], train_loss:0.0187, valid_loss:0.0186
Fold:  2
epoch [1/10], train_loss:0.0931, valid_loss:0.0698
epoch [2/10], train_loss:0.0515, valid_loss:0.0345
epoch [3/10], train_loss:0.0278, valid_loss:0.0257
epoch [4/10], train_loss:0.0237, valid_loss:0.0242
epoch [5/10], train_loss:0.0208, valid_loss:0.0232
epoch [6/10], train_loss:0.0210, valid_loss:0.0224
epoch [7/10], train_loss:0.0199, valid_loss:0.0219
epoch [8/10], train_loss:0.0189, valid_loss:0.0217
epoch [9/10], train_loss:0.0208, valid_loss:0.0215
epoch [10/10

In [None]:
# 10 fold cv diagram

In [None]:
# formal training
train_breast73_loader = torch.utils.data.DataLoader(
      train_breast73_torch, batch_size=batch_size, shuffle=True
  )
test_breast73_loader = torch.utils.data.DataLoader(
      test_breast73_torch, batch_size=batch_size, shuffle=False
  )

model_mirna_73B = autoencoder().to(device)
optimizer = optim.Adam(model_mirna_73B.parameters(), lr=lr)

for epoch in range(epochs):
  train_loss = train_epoch(model_mirna_73B, device, optimizer, criterion, train_breast73_loader)

  print('epoch [{}/{}], train_loss:{:.4f}'
        .format(epoch + 1, epochs, train_loss))

epoch [1/10], train_loss:0.0943
epoch [2/10], train_loss:0.0478
epoch [3/10], train_loss:0.0248
epoch [4/10], train_loss:0.0231
epoch [5/10], train_loss:0.0217
epoch [6/10], train_loss:0.0203
epoch [7/10], train_loss:0.0200
epoch [8/10], train_loss:0.0196
epoch [9/10], train_loss:0.0193
epoch [10/10], train_loss:0.0189


In [None]:
# save and load the model
torch.save(model_mirna_73B, "mirna_73B.pt")

mirna_73B = torch.load("mirna_73B.pt")
mirna_73B

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=823, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=823, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for train data only
mirna_Tr73B_output = mirna_73B.encoder(train_breast73_torch)
print(mirna_Tr73B_output.shape)
print(mirna_Tr73B_output)
mirna_Tr73B_output = mirna_Tr73B_output.cpu().detach().numpy()
np.savetxt('mirna_Tr73B.csv', mirna_Tr73B_output, delimiter=',')

torch.Size([431, 128])
tensor([[ 0.1343, -0.0950,  1.1333,  ...,  0.3808, -0.4243, -0.0919],
        [ 0.0964, -0.1436,  0.9825,  ...,  0.3630, -0.3531, -0.0932],
        [ 0.0906, -0.1018,  0.9736,  ...,  0.3337, -0.3592, -0.0661],
        ...,
        [ 0.0415, -0.1380,  1.0221,  ...,  0.2895, -0.3268, -0.1182],
        [ 0.0351, -0.1305,  1.0161,  ...,  0.3120, -0.3237, -0.0681],
        [ 0.1511, -0.1229,  1.1963,  ...,  0.3657, -0.4316, -0.1181]],
       grad_fn=<AddmmBackward>)


In [None]:
# for test data only

# import the saved model
id = "115q9iJnkCdyRslzblv-Er7F5fe5XfoRA"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('mirna_73B.pt')

mirna73B = torch.load("mirna_73B.pt")
mirna73B

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=823, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=823, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for test data only
mirna_Te73B_output = mirna73B.encoder(test_breast73_torch)
print(mirna_Te73B_output.shape)
print(mirna_Te73B_output)
mirna_Te73B_output = mirna_Te73B_output.cpu().detach().numpy()
np.savetxt('mirna_Test73B.csv', mirna_Te73B_output, delimiter=',')

torch.Size([185, 128])
tensor([[ 0.0499, -0.1045,  0.8669,  ...,  0.2767, -0.2627, -0.1152],
        [ 0.0701, -0.1339,  1.0166,  ...,  0.3394, -0.3153, -0.0722],
        [ 0.0471, -0.1317,  0.9600,  ...,  0.3192, -0.3143, -0.0767],
        ...,
        [ 0.1442, -0.1049,  0.9218,  ...,  0.3153, -0.3226, -0.1197],
        [ 0.0334, -0.1317,  1.0058,  ...,  0.3234, -0.3164, -0.1049],
        [ 0.0356, -0.1398,  0.9379,  ...,  0.2792, -0.3173, -0.0782]],
       grad_fn=<AddmmBackward>)


In [None]:
# for whole data only

## Load Glioma data

In [None]:
glioma_miRnaSeq = pd.read_csv('glioma_miRnaSeq.csv')
glioma_miRnaSeq = glioma_miRnaSeq.rename(columns={"Unnamed: 0": "attrib_name"})

glioma_miRnaSeq

Unnamed: 0,attrib_name,hsa-let-7a-1,hsa-let-7a-2,hsa-let-7a-3,hsa-let-7b,hsa-let-7c,hsa-let-7d,hsa-let-7e,hsa-let-7f-1,hsa-let-7f-2,hsa-let-7g,hsa-let-7i,hsa-mir-1-1,hsa-mir-1-2,hsa-mir-100,hsa-mir-101-1,hsa-mir-101-2,hsa-mir-103-1,hsa-mir-103-2,hsa-mir-105-1,hsa-mir-105-2,hsa-mir-106a,hsa-mir-106b,hsa-mir-107,hsa-mir-10a,hsa-mir-10b,hsa-mir-1178,hsa-mir-1179,hsa-mir-1180,hsa-mir-1181,hsa-mir-1182,hsa-mir-1185-1,hsa-mir-1185-2,hsa-mir-1193,hsa-mir-1197,hsa-mir-122,hsa-mir-1224,hsa-mir-1225,hsa-mir-1226,hsa-mir-1227,...,hsa-mir-769,hsa-mir-770,hsa-mir-802,hsa-mir-873,hsa-mir-874,hsa-mir-875,hsa-mir-876,hsa-mir-877,hsa-mir-885,hsa-mir-887,hsa-mir-888,hsa-mir-889,hsa-mir-890,hsa-mir-891a,hsa-mir-891b,hsa-mir-892a,hsa-mir-892b,hsa-mir-9-1,hsa-mir-9-2,hsa-mir-9-3,hsa-mir-922,hsa-mir-92a-1,hsa-mir-92a-2,hsa-mir-92b,hsa-mir-93,hsa-mir-933,hsa-mir-934,hsa-mir-935,hsa-mir-937,hsa-mir-938,hsa-mir-939,hsa-mir-940,hsa-mir-942,hsa-mir-943,hsa-mir-944,hsa-mir-95,hsa-mir-96,hsa-mir-98,hsa-mir-99a,hsa-mir-99b
0,TCGA.CS.4938,12.6226,13.6328,12.6518,14.2090,14.3770,8.5694,10.5123,2.5120,11.7146,7.8873,9.0401,0.0000,1.1805,14.2881,13.4646,5.2026,13.8492,1.6650,3.9177,4.2686,3.3286,9.6530,6.2078,8.0838,8.4490,0.000,0.9295,7.3417,0.9295,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.7451,0.0000,2.4175,0.0000,...,5.1667,0.0000,0.0,0.2399,4.3477,0.2399,0.0000,1.6650,5.6604,3.1637,0.0000,1.9616,0.0000,5.2167,0.4456,1.7451,0.2399,18.1610,18.1573,8.8619,0.0000,7.2174,12.6240,11.9491,11.3238,0.0,0.0000,4.8052,2.0898,0.0,0.7855,2.2629,1.7451,0.000,0.0000,2.9774,2.6432,5.9745,13.0651,15.7755
1,TCGA.CS.4941,11.8102,12.8160,11.8205,13.0480,11.9554,8.3917,9.4888,3.6266,11.3962,8.6486,7.3471,0.0000,0.4813,12.5678,12.8258,5.3004,13.5203,2.4905,2.9006,2.4562,4.4056,7.2046,4.4146,8.0037,13.2977,0.000,1.0398,4.7265,0.1789,0.0,0.1789,0.0000,0.0,0.1789,0.0000,2.3481,0.6116,1.3695,0.1789,...,4.8035,0.8415,0.0,0.1789,5.8486,0.0000,0.0000,0.7311,5.2563,4.3692,0.0000,3.3560,0.1789,4.7193,0.3380,0.0000,0.0000,18.4055,18.4086,7.6621,0.0000,8.0985,11.4803,10.3332,11.4543,0.0,0.0000,4.9150,1.1295,0.0,0.6116,1.7552,1.6378,0.000,0.0000,2.6507,4.3692,4.9462,10.8423,14.1382
2,TCGA.CS.4942,11.1146,12.1289,11.1662,12.4820,11.8589,7.1456,7.2813,2.7536,10.2240,7.1501,8.0787,0.0000,0.3393,11.4514,11.1315,4.6561,13.0552,2.3655,3.9370,4.0335,3.2721,6.9252,3.6010,5.5146,15.3016,0.000,0.1222,4.1602,0.0000,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.3733,0.1222,0.9136,0.1222,...,3.5692,0.1222,0.0,0.1222,4.1814,0.0000,0.0000,0.9797,3.8512,3.2588,1.9365,3.0894,0.2348,7.4476,2.3152,3.3366,1.7596,18.6872,18.6883,8.7010,0.1222,7.0746,10.7188,9.4236,11.4086,0.0,0.0000,4.4382,1.1617,0.0,0.5279,1.3232,0.2348,0.000,0.3393,1.6419,1.7967,4.5675,10.1744,12.8643
3,TCGA.CS.4943,10.8885,11.8951,10.9289,12.1122,11.7382,8.0178,7.7645,3.6759,10.6693,7.9962,8.5818,0.0000,0.8243,11.0748,11.5412,5.3984,13.5784,2.9884,5.1164,5.0461,4.1305,8.1184,4.3013,4.9822,15.2160,0.000,1.1447,3.6634,0.0000,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.4068,0.2871,1.5762,0.4117,...,3.7127,0.5265,0.0,0.1507,3.8838,0.0000,0.1507,0.8243,6.0019,3.2776,0.2871,3.0663,0.0000,4.8397,1.2148,0.9113,0.4117,18.6472,18.6506,9.3853,0.0000,8.4875,11.6230,9.6793,12.0127,0.0,0.0000,3.7836,0.1507,0.0,0.6327,2.0295,0.9113,0.000,0.0000,1.9078,4.8003,5.6997,9.8865,13.0006
4,TCGA.CS.4944,11.7949,12.7857,11.8171,13.3091,13.3270,7.4258,8.6701,3.6004,11.0302,7.9982,7.3176,0.0000,0.6018,13.1483,12.6151,5.9799,12.9867,1.9437,3.7327,3.5693,3.7186,6.9595,3.7327,6.7482,14.5760,0.000,0.6018,4.7346,0.1756,0.0,0.1756,0.0000,0.0,0.0000,0.0000,1.7903,1.4915,1.4235,0.0000,...,3.2344,0.0000,0.0,0.4732,5.5327,0.0000,0.0000,1.0252,4.7692,4.1346,0.0000,2.8242,0.0000,4.0469,0.0000,0.3321,0.3321,18.5745,18.5746,8.4772,0.0000,7.1022,10.9829,9.8884,11.2352,0.0,0.0000,4.4227,1.0252,0.0,0.4732,0.6018,0.4732,0.000,0.0000,1.3521,0.8290,4.8031,11.7756,13.6348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
503,TCGA.WY.A85A,12.7427,13.7359,12.7601,13.2856,13.1779,8.6163,10.8443,4.1598,12.8674,9.5278,8.3296,0.0000,3.1395,12.2668,12.9745,6.0121,14.6975,3.2853,6.3453,6.3509,4.0642,9.1409,6.4844,4.8116,12.1236,0.143,1.1699,6.1002,0.3923,0.0,1.1699,1.0297,0.0,1.6245,0.0000,3.4594,0.3923,1.4150,0.1430,...,6.2037,2.4539,0.0,2.3692,6.3060,0.0000,1.5749,2.2792,6.4624,4.2094,0.7004,6.7179,0.2730,7.1266,0.6048,1.6724,0.5025,18.3288,18.3295,9.1227,0.1430,8.3998,12.6923,11.1499,11.7984,0.0,0.0000,6.6671,1.3575,0.0,0.5025,3.3007,0.9542,0.143,0.3923,4.3560,6.2869,6.9483,11.3999,14.5000
504,TCGA.WY.A85B,12.8910,13.8958,12.8999,13.6016,13.5962,8.6006,10.4551,3.5996,12.8073,9.4061,9.0717,0.0000,1.2232,12.0039,13.2026,5.6517,13.8898,2.8313,4.8813,4.9190,3.7740,9.5415,4.8030,6.6613,12.1341,0.000,0.4154,5.2409,0.1522,0.0,0.1522,0.1522,0.0,0.0000,0.1522,0.2898,0.4154,2.1531,0.0000,...,4.6644,0.0000,0.0,0.0000,3.4461,0.0000,0.2898,2.5317,4.0114,3.3706,0.5310,3.2237,0.0000,5.5560,1.0007,1.0788,0.5310,18.4925,18.4948,9.2112,0.0000,7.0483,11.5725,12.0227,11.2233,0.0,0.0000,6.1100,1.5315,0.0,0.0000,3.6388,1.0007,0.000,0.0000,3.4164,3.2908,6.6047,11.6821,13.8018
505,TCGA.WY.A85C,12.9008,13.9031,12.9091,13.6080,12.9546,8.4263,9.7229,3.4755,12.2888,8.9683,8.3253,0.0000,1.5062,14.4187,14.2423,7.4387,14.1549,2.3805,5.4418,5.6563,4.2785,9.0561,5.5429,6.3757,15.8859,0.000,0.1782,4.9032,0.0000,0.0,0.3368,0.3368,0.0,0.7289,0.3368,1.6339,0.1782,1.0369,0.0000,...,4.6473,1.0369,0.0,0.3368,4.9774,0.1782,0.1782,0.1782,5.0819,3.0864,0.7289,4.0571,0.3368,5.9013,0.3368,1.4378,0.3368,18.2356,18.2365,9.4350,0.0000,8.5804,12.6769,10.2939,12.0796,0.0,0.0000,5.2257,1.0369,0.0,0.1782,1.0369,1.5715,0.000,0.3368,3.5902,2.8438,5.7332,11.7779,13.5597
506,TCGA.WY.A85D,12.6632,13.6669,12.6854,13.6434,13.5209,7.7778,9.6986,3.6656,12.4703,8.5105,8.8598,0.0000,0.8999,12.8360,13.7102,6.3690,13.8278,1.8814,3.4877,3.5950,3.4988,8.2329,5.0270,3.5207,11.0725,0.000,0.8999,5.4492,0.0000,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.6836,0.0000,2.4826,0.0000,...,4.9238,0.1198,0.0,0.5190,4.0443,0.0000,0.0000,1.0879,4.2812,4.4846,0.1198,2.3170,0.0000,4.4335,0.1198,0.5190,0.1198,18.5264,18.5278,9.5146,0.0000,7.8402,12.0969,10.4848,11.9734,0.0,0.1198,3.4075,1.9149,0.0,0.3332,1.6221,0.8313,0.000,0.1198,2.5267,3.0333,6.1402,11.6534,13.9789


In [None]:

#train test split 8:2
train_clinical82_G = pd.read_csv('train_clinical82_G.csv')
train_clinical82_G = train_clinical82_G.rename(columns={"Unnamed: 0.1": "attrib_name"})

test_clinical82_G = pd.read_csv('test_clinical82_G.csv')
test_clinical82_G = test_clinical82_G.rename(columns={"Unnamed: 0.1": "attrib_name"})

print(train_clinical82_G.shape)
print(test_clinical82_G.shape)
#train test split 7:3
train_clinical73_G = pd.read_csv('train_clinical73_G.csv')
train_clinical73_G = train_clinical73_G.rename(columns={"Unnamed: 0.1": "attrib_name"})

test_clinical73_G = pd.read_csv('test_clinical73_G.csv')
test_clinical73_G = test_clinical73_G.rename(columns={"Unnamed: 0.1": "attrib_name"})

print(train_clinical73_G.shape)
print(test_clinical73_G.shape)

(406, 11)
(102, 11)
(355, 11)
(153, 11)


In [None]:
train_clinical82_G

Unnamed: 0.1,Unnamed: 0,attrib_name,years_to_birth,histological_type,gender,radiation_therapy,race,ethnicity,overall_survival,status,overallsurvival
0,151,TCGA.DU.A7TI,32.0,astrocytoma,male,yes,white,nothispanicorlatino,1183.0,1.0,11831
1,347,TCGA.P5.A5EU,35.0,astrocytoma,male,no,white,nothispanicorlatino,,,"NA,NA"
2,469,TCGA.TM.A84T,19.0,oligoastrocytoma,male,no,white,nothispanicorlatino,724.0,0.0,7240
3,442,TCGA.S9.A7QZ,41.0,oligodendroglioma,male,no,white,nothispanicorlatino,826.0,0.0,8260
4,292,TCGA.HT.8015,21.0,astrocytoma,male,,white,nothispanicorlatino,,,"NA,NA"
...,...,...,...,...,...,...,...,...,...,...,...
401,98,TCGA.DU.7007,33.0,astrocytoma,male,,white,nothispanicorlatino,1915.0,1.0,19151
402,476,TCGA.TQ.A7RM,41.0,oligoastrocytoma,female,yes,white,hispanicorlatino,1116.0,0.0,11160
403,322,TCGA.HT.A61C,66.0,oligodendroglioma,male,yes,white,nothispanicorlatino,537.0,1.0,5371
404,382,TCGA.QH.A6CZ,38.0,oligoastrocytoma,male,no,white,nothispanicorlatino,279.0,0.0,2790


In [None]:
train_idx_82G = train_clinical82_G[['attrib_name']]
test_idx_82G = test_clinical82_G[['attrib_name']]

train_idx_73G = train_clinical73_G[['attrib_name']]
test_idx_73G = test_clinical73_G[['attrib_name']]

In [None]:
train_idx_82G

Unnamed: 0,attrib_name
0,TCGA.DU.A7TI
1,TCGA.P5.A5EU
2,TCGA.TM.A84T
3,TCGA.S9.A7QZ
4,TCGA.HT.8015
...,...
401,TCGA.DU.7007
402,TCGA.TQ.A7RM
403,TCGA.HT.A61C
404,TCGA.QH.A6CZ


### Clinical 8:2

In [None]:
print(glioma_miRnaSeq.shape)

train_glioma_82G = glioma_miRnaSeq.merge(train_idx_82G, on='attrib_name', how="inner")
test_glioma_82G = glioma_miRnaSeq.merge(test_idx_82G, on='attrib_name', how="inner")

print(train_glioma_82G.shape)
print(test_glioma_82G.shape)

(508, 792)
(406, 792)
(102, 792)


In [None]:
train_glioma_82G

Unnamed: 0,attrib_name,hsa-let-7a-1,hsa-let-7a-2,hsa-let-7a-3,hsa-let-7b,hsa-let-7c,hsa-let-7d,hsa-let-7e,hsa-let-7f-1,hsa-let-7f-2,hsa-let-7g,hsa-let-7i,hsa-mir-1-1,hsa-mir-1-2,hsa-mir-100,hsa-mir-101-1,hsa-mir-101-2,hsa-mir-103-1,hsa-mir-103-2,hsa-mir-105-1,hsa-mir-105-2,hsa-mir-106a,hsa-mir-106b,hsa-mir-107,hsa-mir-10a,hsa-mir-10b,hsa-mir-1178,hsa-mir-1179,hsa-mir-1180,hsa-mir-1181,hsa-mir-1182,hsa-mir-1185-1,hsa-mir-1185-2,hsa-mir-1193,hsa-mir-1197,hsa-mir-122,hsa-mir-1224,hsa-mir-1225,hsa-mir-1226,hsa-mir-1227,...,hsa-mir-769,hsa-mir-770,hsa-mir-802,hsa-mir-873,hsa-mir-874,hsa-mir-875,hsa-mir-876,hsa-mir-877,hsa-mir-885,hsa-mir-887,hsa-mir-888,hsa-mir-889,hsa-mir-890,hsa-mir-891a,hsa-mir-891b,hsa-mir-892a,hsa-mir-892b,hsa-mir-9-1,hsa-mir-9-2,hsa-mir-9-3,hsa-mir-922,hsa-mir-92a-1,hsa-mir-92a-2,hsa-mir-92b,hsa-mir-93,hsa-mir-933,hsa-mir-934,hsa-mir-935,hsa-mir-937,hsa-mir-938,hsa-mir-939,hsa-mir-940,hsa-mir-942,hsa-mir-943,hsa-mir-944,hsa-mir-95,hsa-mir-96,hsa-mir-98,hsa-mir-99a,hsa-mir-99b
0,TCGA.CS.4938,12.6226,13.6328,12.6518,14.2090,14.3770,8.5694,10.5123,2.5120,11.7146,7.8873,9.0401,0.0000,1.1805,14.2881,13.4646,5.2026,13.8492,1.6650,3.9177,4.2686,3.3286,9.6530,6.2078,8.0838,8.4490,0.0000,0.9295,7.3417,0.9295,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.7451,0.0000,2.4175,0.0000,...,5.1667,0.0000,0.0,0.2399,4.3477,0.2399,0.0000,1.6650,5.6604,3.1637,0.0000,1.9616,0.0000,5.2167,0.4456,1.7451,0.2399,18.1610,18.1573,8.8619,0.0000,7.2174,12.6240,11.9491,11.3238,0.0,0.0,4.8052,2.0898,0.0,0.7855,2.2629,1.7451,0.000,0.0000,2.9774,2.6432,5.9745,13.0651,15.7755
1,TCGA.CS.4941,11.8102,12.8160,11.8205,13.0480,11.9554,8.3917,9.4888,3.6266,11.3962,8.6486,7.3471,0.0000,0.4813,12.5678,12.8258,5.3004,13.5203,2.4905,2.9006,2.4562,4.4056,7.2046,4.4146,8.0037,13.2977,0.0000,1.0398,4.7265,0.1789,0.0,0.1789,0.0000,0.0,0.1789,0.0000,2.3481,0.6116,1.3695,0.1789,...,4.8035,0.8415,0.0,0.1789,5.8486,0.0000,0.0000,0.7311,5.2563,4.3692,0.0000,3.3560,0.1789,4.7193,0.3380,0.0000,0.0000,18.4055,18.4086,7.6621,0.0000,8.0985,11.4803,10.3332,11.4543,0.0,0.0,4.9150,1.1295,0.0,0.6116,1.7552,1.6378,0.000,0.0000,2.6507,4.3692,4.9462,10.8423,14.1382
2,TCGA.CS.4942,11.1146,12.1289,11.1662,12.4820,11.8589,7.1456,7.2813,2.7536,10.2240,7.1501,8.0787,0.0000,0.3393,11.4514,11.1315,4.6561,13.0552,2.3655,3.9370,4.0335,3.2721,6.9252,3.6010,5.5146,15.3016,0.0000,0.1222,4.1602,0.0000,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.3733,0.1222,0.9136,0.1222,...,3.5692,0.1222,0.0,0.1222,4.1814,0.0000,0.0000,0.9797,3.8512,3.2588,1.9365,3.0894,0.2348,7.4476,2.3152,3.3366,1.7596,18.6872,18.6883,8.7010,0.1222,7.0746,10.7188,9.4236,11.4086,0.0,0.0,4.4382,1.1617,0.0,0.5279,1.3232,0.2348,0.000,0.3393,1.6419,1.7967,4.5675,10.1744,12.8643
3,TCGA.CS.4943,10.8885,11.8951,10.9289,12.1122,11.7382,8.0178,7.7645,3.6759,10.6693,7.9962,8.5818,0.0000,0.8243,11.0748,11.5412,5.3984,13.5784,2.9884,5.1164,5.0461,4.1305,8.1184,4.3013,4.9822,15.2160,0.0000,1.1447,3.6634,0.0000,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.4068,0.2871,1.5762,0.4117,...,3.7127,0.5265,0.0,0.1507,3.8838,0.0000,0.1507,0.8243,6.0019,3.2776,0.2871,3.0663,0.0000,4.8397,1.2148,0.9113,0.4117,18.6472,18.6506,9.3853,0.0000,8.4875,11.6230,9.6793,12.0127,0.0,0.0,3.7836,0.1507,0.0,0.6327,2.0295,0.9113,0.000,0.0000,1.9078,4.8003,5.6997,9.8865,13.0006
4,TCGA.CS.4944,11.7949,12.7857,11.8171,13.3091,13.3270,7.4258,8.6701,3.6004,11.0302,7.9982,7.3176,0.0000,0.6018,13.1483,12.6151,5.9799,12.9867,1.9437,3.7327,3.5693,3.7186,6.9595,3.7327,6.7482,14.5760,0.0000,0.6018,4.7346,0.1756,0.0,0.1756,0.0000,0.0,0.0000,0.0000,1.7903,1.4915,1.4235,0.0000,...,3.2344,0.0000,0.0,0.4732,5.5327,0.0000,0.0000,1.0252,4.7692,4.1346,0.0000,2.8242,0.0000,4.0469,0.0000,0.3321,0.3321,18.5745,18.5746,8.4772,0.0000,7.1022,10.9829,9.8884,11.2352,0.0,0.0,4.4227,1.0252,0.0,0.4732,0.6018,0.4732,0.000,0.0000,1.3521,0.8290,4.8031,11.7756,13.6348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,TCGA.WY.A858,12.4502,13.4547,12.4613,13.0244,12.6616,8.9675,10.5740,3.8599,12.5348,9.6396,9.2749,0.0000,0.9814,13.8423,13.2967,6.2939,14.6151,2.9216,3.9307,3.9983,4.8464,7.7741,5.3770,7.0608,7.9598,0.1658,0.5726,4.9805,0.1658,0.0,0.1658,0.0000,0.0,0.1658,0.0000,0.4493,0.1658,1.2263,0.0000,...,4.0092,0.3145,0.0,0.0000,4.7453,0.0000,0.0000,1.4357,4.1245,2.3621,0.4493,2.3958,0.0000,4.6644,0.6861,0.9814,0.1658,18.3748,18.3727,8.9559,0.0000,8.0461,12.2306,11.0620,11.3615,0.0,0.0,5.2114,0.7914,0.0,0.1658,1.2263,1.4357,0.000,0.0000,1.8309,0.4493,5.5963,11.8029,14.1280
402,TCGA.WY.A85A,12.7427,13.7359,12.7601,13.2856,13.1779,8.6163,10.8443,4.1598,12.8674,9.5278,8.3296,0.0000,3.1395,12.2668,12.9745,6.0121,14.6975,3.2853,6.3453,6.3509,4.0642,9.1409,6.4844,4.8116,12.1236,0.1430,1.1699,6.1002,0.3923,0.0,1.1699,1.0297,0.0,1.6245,0.0000,3.4594,0.3923,1.4150,0.1430,...,6.2037,2.4539,0.0,2.3692,6.3060,0.0000,1.5749,2.2792,6.4624,4.2094,0.7004,6.7179,0.2730,7.1266,0.6048,1.6724,0.5025,18.3288,18.3295,9.1227,0.1430,8.3998,12.6923,11.1499,11.7984,0.0,0.0,6.6671,1.3575,0.0,0.5025,3.3007,0.9542,0.143,0.3923,4.3560,6.2869,6.9483,11.3999,14.5000
403,TCGA.WY.A85B,12.8910,13.8958,12.8999,13.6016,13.5962,8.6006,10.4551,3.5996,12.8073,9.4061,9.0717,0.0000,1.2232,12.0039,13.2026,5.6517,13.8898,2.8313,4.8813,4.9190,3.7740,9.5415,4.8030,6.6613,12.1341,0.0000,0.4154,5.2409,0.1522,0.0,0.1522,0.1522,0.0,0.0000,0.1522,0.2898,0.4154,2.1531,0.0000,...,4.6644,0.0000,0.0,0.0000,3.4461,0.0000,0.2898,2.5317,4.0114,3.3706,0.5310,3.2237,0.0000,5.5560,1.0007,1.0788,0.5310,18.4925,18.4948,9.2112,0.0000,7.0483,11.5725,12.0227,11.2233,0.0,0.0,6.1100,1.5315,0.0,0.0000,3.6388,1.0007,0.000,0.0000,3.4164,3.2908,6.6047,11.6821,13.8018
404,TCGA.WY.A85C,12.9008,13.9031,12.9091,13.6080,12.9546,8.4263,9.7229,3.4755,12.2888,8.9683,8.3253,0.0000,1.5062,14.4187,14.2423,7.4387,14.1549,2.3805,5.4418,5.6563,4.2785,9.0561,5.5429,6.3757,15.8859,0.0000,0.1782,4.9032,0.0000,0.0,0.3368,0.3368,0.0,0.7289,0.3368,1.6339,0.1782,1.0369,0.0000,...,4.6473,1.0369,0.0,0.3368,4.9774,0.1782,0.1782,0.1782,5.0819,3.0864,0.7289,4.0571,0.3368,5.9013,0.3368,1.4378,0.3368,18.2356,18.2365,9.4350,0.0000,8.5804,12.6769,10.2939,12.0796,0.0,0.0,5.2257,1.0369,0.0,0.1782,1.0369,1.5715,0.000,0.3368,3.5902,2.8438,5.7332,11.7779,13.5597


In [None]:
train_glioma_82 = train_glioma_82G.iloc[:, 1:]
test_glioma_82 = test_glioma_82G.iloc[:, 1:]
print(train_glioma_82.shape)
print(test_glioma_82.shape)

train_glioma82_numpy = train_glioma_82.to_numpy()
test_glioma82_numpy = test_glioma_82.to_numpy()

scaler_glioma82 = MinMaxScaler()
train_glioma82_scaled = scaler_glioma82.fit_transform(train_glioma82_numpy)
test_glioma82_scaled = scaler_glioma82.transform(test_glioma82_numpy)

(406, 791)
(102, 791)


In [None]:
train_glioma82_torch = torch.FloatTensor(train_glioma82_scaled)
test_glioma82_torch = torch.FloatTensor(test_glioma82_scaled)

In [None]:
#save the attrib_name
train_glioma82_name = train_glioma_82G.iloc[:, 0]
test_glioma82_name = test_glioma_82G.iloc[:, 0]
print(train_glioma82_name)
print(test_glioma82_name)

train_glioma82_name.to_csv("train_82G_idx.csv")
test_glioma82_name.to_csv("test_82G_idx.csv")

0      TCGA.CS.4938
1      TCGA.CS.4941
2      TCGA.CS.4942
3      TCGA.CS.4943
4      TCGA.CS.4944
           ...     
401    TCGA.WY.A858
402    TCGA.WY.A85A
403    TCGA.WY.A85B
404    TCGA.WY.A85C
405    TCGA.WY.A85E
Name: attrib_name, Length: 406, dtype: object
0      TCGA.CS.5390
1      TCGA.CS.5396
2      TCGA.CS.6186
3      TCGA.CS.6290
4      TCGA.CS.6666
           ...     
97     TCGA.VM.A8C8
98     TCGA.VV.A86M
99     TCGA.VW.A7QS
100    TCGA.WY.A859
101    TCGA.WY.A85D
Name: attrib_name, Length: 102, dtype: object


#### Glioma 8:2 training and output


In [None]:
# 10 fold cross validation
train_fold_loss = []
valid_fold_loss = []
embed_dim = train_glioma82_torch.shape[1]

# assign train_torch and test_torch
train_torch = train_glioma82_torch
test_torch = test_glioma82_torch

for k, (train_idx,valid_idx) in enumerate(kfold.split(np.arange(len(train_torch)))):
  
  print('Fold: ', k+1 )
  train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
  valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

  train_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=train_sampler
  )
  valid_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=valid_sampler
  )

  model = autoencoder().to(device)
  optimizer = optim.Adam(model.parameters(), lr=lr)

  for epoch in range(epochs):
    train_loss = train_epoch(model, device, optimizer, criterion, train_loader)
    valid_loss = validation_epoch(model, device, criterion, valid_loader)

    print('epoch [{}/{}], train_loss:{:.4f}, valid_loss:{:.4f}'
          .format(epoch + 1, epochs, train_loss, valid_loss))
    
  train_fold_loss.append(train_loss)
  valid_fold_loss.append(valid_loss)

Fold:  1
epoch [1/10], train_loss:0.1059, valid_loss:0.0795
epoch [2/10], train_loss:0.0641, valid_loss:0.0437
epoch [3/10], train_loss:0.0349, valid_loss:0.0260
epoch [4/10], train_loss:0.0244, valid_loss:0.0238
epoch [5/10], train_loss:0.0229, valid_loss:0.0229
epoch [6/10], train_loss:0.0215, valid_loss:0.0220
epoch [7/10], train_loss:0.0205, valid_loss:0.0211
epoch [8/10], train_loss:0.0199, valid_loss:0.0203
epoch [9/10], train_loss:0.0194, valid_loss:0.0200
epoch [10/10], train_loss:0.0191, valid_loss:0.0198
Fold:  2
epoch [1/10], train_loss:0.1077, valid_loss:0.0891
epoch [2/10], train_loss:0.0707, valid_loss:0.0487
epoch [3/10], train_loss:0.0391, valid_loss:0.0275
epoch [4/10], train_loss:0.0254, valid_loss:0.0229
epoch [5/10], train_loss:0.0228, valid_loss:0.0221
epoch [6/10], train_loss:0.0219, valid_loss:0.0213
epoch [7/10], train_loss:0.0210, valid_loss:0.0202
epoch [8/10], train_loss:0.0201, valid_loss:0.0194
epoch [9/10], train_loss:0.0195, valid_loss:0.0190
epoch [10/10

In [None]:
# 10 fold cv diagram

In [None]:
# formal training
train_glioma82_loader = torch.utils.data.DataLoader(
      train_glioma82_torch, batch_size=batch_size, shuffle=True
  )
test_glioma82_loader = torch.utils.data.DataLoader(
      test_glioma82_torch, batch_size=batch_size, shuffle=False
  )

model_mirna_82G = autoencoder().to(device)
optimizer = optim.Adam(model_mirna_82G.parameters(), lr=lr)

for epoch in range(epochs):
  train_loss = train_epoch(model_mirna_82G, device, optimizer, criterion, train_glioma82_loader)

  print('epoch [{}/{}], train_loss:{:.4f}'
        .format(epoch + 1, epochs, train_loss))

epoch [1/10], train_loss:0.1032
epoch [2/10], train_loss:0.0548
epoch [3/10], train_loss:0.0279
epoch [4/10], train_loss:0.0233
epoch [5/10], train_loss:0.0217
epoch [6/10], train_loss:0.0205
epoch [7/10], train_loss:0.0199
epoch [8/10], train_loss:0.0198
epoch [9/10], train_loss:0.0190
epoch [10/10], train_loss:0.0194


In [None]:
# save and load the model
torch.save(model_mirna_82G, "mirna_82G.pt")

mirna_82G = torch.load("mirna_82G.pt")
mirna_82G

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=791, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=791, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for train data only
mirna_Tr82G_output = mirna_82G.encoder(train_glioma82_torch)
print(mirna_Tr82G_output.shape)
print(mirna_Tr82G_output)
mirna_Tr82G_output = mirna_Tr82G_output.cpu().detach().numpy()
np.savetxt('mirna_Tr82G.csv', mirna_Tr82G_output, delimiter=',')

torch.Size([406, 128])
tensor([[ 0.0989,  0.3143, -0.0319,  ..., -0.0825,  0.0637, -0.0956],
        [ 0.0636,  0.3023,  0.0504,  ..., -0.0685,  0.0562, -0.1120],
        [ 0.0967,  0.1918,  0.0559,  ..., -0.0577,  0.0548, -0.0569],
        ...,
        [ 0.0949,  0.3331,  0.0159,  ..., -0.0750,  0.1060, -0.0914],
        [ 0.0880,  0.3310,  0.0483,  ..., -0.0937,  0.0833, -0.0489],
        [ 0.0998,  0.3365,  0.0280,  ..., -0.1167,  0.0829, -0.1249]],
       grad_fn=<AddmmBackward>)


In [None]:
# for test data only

In [None]:
# for test data only

# import the saved model
id = "1nKcBgQOCp4XIGL81NbAlpedMf28HUdWo"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('mirna_82G.pt')

mirna82G = torch.load("mirna_82G.pt")
mirna82G

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=791, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=791, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for test data only
mirna_Te82G_output = mirna82G.encoder(test_glioma82_torch)
print(mirna_Te82G_output.shape)
print(mirna_Te82G_output)
mirna_Te82G_output = mirna_Te82G_output.cpu().detach().numpy()
np.savetxt('mirna_Test82G.csv', mirna_Te82G_output, delimiter=',')

torch.Size([102, 128])
tensor([[ 0.1087,  0.2322,  0.0404,  ..., -0.0566,  0.0823, -0.0778],
        [ 0.0739,  0.2292,  0.0214,  ..., -0.0410,  0.0456, -0.1059],
        [ 0.0628,  0.2563, -0.0135,  ..., -0.0645,  0.0346, -0.0548],
        ...,
        [ 0.0790,  0.1920, -0.0027,  ..., -0.1089,  0.0170, -0.0786],
        [ 0.1077,  0.4279,  0.1139,  ..., -0.0782,  0.1387, -0.1380],
        [ 0.0987,  0.2703, -0.0167,  ..., -0.1002,  0.0421, -0.1152]],
       grad_fn=<AddmmBackward>)


In [None]:
# for whole data only

### Clinical 7:3

In [None]:
print(glioma_miRnaSeq.shape)

train_glioma_73G = glioma_miRnaSeq.merge(train_idx_73G, on='attrib_name', how="inner")
test_glioma_73G = glioma_miRnaSeq.merge(test_idx_73G, on='attrib_name', how="inner")

print(train_glioma_73G.shape)
print(test_glioma_73G.shape)

(508, 792)
(355, 792)
(153, 792)


In [None]:
test_glioma_73G

Unnamed: 0,attrib_name,hsa-let-7a-1,hsa-let-7a-2,hsa-let-7a-3,hsa-let-7b,hsa-let-7c,hsa-let-7d,hsa-let-7e,hsa-let-7f-1,hsa-let-7f-2,hsa-let-7g,hsa-let-7i,hsa-mir-1-1,hsa-mir-1-2,hsa-mir-100,hsa-mir-101-1,hsa-mir-101-2,hsa-mir-103-1,hsa-mir-103-2,hsa-mir-105-1,hsa-mir-105-2,hsa-mir-106a,hsa-mir-106b,hsa-mir-107,hsa-mir-10a,hsa-mir-10b,hsa-mir-1178,hsa-mir-1179,hsa-mir-1180,hsa-mir-1181,hsa-mir-1182,hsa-mir-1185-1,hsa-mir-1185-2,hsa-mir-1193,hsa-mir-1197,hsa-mir-122,hsa-mir-1224,hsa-mir-1225,hsa-mir-1226,hsa-mir-1227,...,hsa-mir-769,hsa-mir-770,hsa-mir-802,hsa-mir-873,hsa-mir-874,hsa-mir-875,hsa-mir-876,hsa-mir-877,hsa-mir-885,hsa-mir-887,hsa-mir-888,hsa-mir-889,hsa-mir-890,hsa-mir-891a,hsa-mir-891b,hsa-mir-892a,hsa-mir-892b,hsa-mir-9-1,hsa-mir-9-2,hsa-mir-9-3,hsa-mir-922,hsa-mir-92a-1,hsa-mir-92a-2,hsa-mir-92b,hsa-mir-93,hsa-mir-933,hsa-mir-934,hsa-mir-935,hsa-mir-937,hsa-mir-938,hsa-mir-939,hsa-mir-940,hsa-mir-942,hsa-mir-943,hsa-mir-944,hsa-mir-95,hsa-mir-96,hsa-mir-98,hsa-mir-99a,hsa-mir-99b
0,TCGA.CS.4938,12.6226,13.6328,12.6518,14.2090,14.3770,8.5694,10.5123,2.5120,11.7146,7.8873,9.0401,0.0,1.1805,14.2881,13.4646,5.2026,13.8492,1.6650,3.9177,4.2686,3.3286,9.6530,6.2078,8.0838,8.4490,0.0,0.9295,7.3417,0.9295,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.7451,0.0000,2.4175,0.0000,...,5.1667,0.0000,0.0,0.2399,4.3477,0.2399,0.0000,1.6650,5.6604,3.1637,0.0000,1.9616,0.0000,5.2167,0.4456,1.7451,0.2399,18.1610,18.1573,8.8619,0.0,7.2174,12.6240,11.9491,11.3238,0.0000,0.0000,4.8052,2.0898,0.0,0.7855,2.2629,1.7451,0.0000,0.0000,2.9774,2.6432,5.9745,13.0651,15.7755
1,TCGA.CS.5390,11.9711,12.9824,11.9876,13.4807,11.2006,7.9615,8.2830,3.2412,10.6768,8.2874,7.9079,0.0,2.2718,11.4167,11.5307,5.3924,13.0907,2.5719,5.0543,5.0953,4.0700,7.1289,4.3094,6.4665,8.6497,0.0,1.5433,3.6051,0.2136,0.0,0.3995,0.0000,0.0,0.2136,0.0000,2.9450,0.0000,0.7121,0.2136,...,3.7490,0.0000,0.0,0.3995,5.2363,0.0000,0.0000,1.0819,4.8461,2.7190,1.3760,3.4452,0.0000,7.2228,1.0819,2.7190,0.8462,18.7191,18.7194,8.9929,0.0,7.6630,11.5191,8.3395,11.4269,0.0000,0.0000,4.6583,0.0000,0.0,0.3995,1.7626,1.3760,0.0000,0.0000,2.6473,2.0673,4.7889,9.5536,12.8821
2,TCGA.CS.5396,11.9079,12.9042,11.9024,13.6257,11.0146,7.4381,7.8353,2.9684,10.8243,7.6516,6.6823,0.0,1.7117,10.5342,11.2725,4.3892,13.1710,2.5176,5.9563,5.8596,4.0814,6.5266,4.2765,8.0324,12.8319,0.0,1.0358,2.7228,0.2327,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.6089,0.0000,0.4330,0.0000,...,3.2330,0.0000,0.0,0.0000,5.5231,0.0000,0.0000,0.7657,3.5671,1.5487,0.9071,1.5487,0.0000,6.8317,1.2632,1.2632,0.9071,18.6702,18.6717,8.0826,0.0,7.8767,11.6950,7.0146,11.4924,0.0000,0.0000,4.6326,0.0000,0.0,0.4330,0.7657,1.2632,0.0000,0.0000,1.9263,1.8582,4.6728,8.9760,12.9193
3,TCGA.CS.6186,13.4597,14.4615,13.4855,14.1266,13.0539,8.1080,13.0907,4.0380,13.5036,9.3040,8.8824,0.0,0.1206,13.9412,14.8114,6.0237,13.6246,2.5772,1.1507,1.0929,3.7322,8.4654,6.4404,3.1160,11.9342,0.0,0.6069,5.0665,0.2318,0.0,0.1206,0.0000,0.0,0.0000,0.0000,1.0327,0.1206,2.5560,0.2318,...,5.5738,0.1206,0.0,1.3113,3.4738,0.0000,0.8354,1.9218,4.4936,5.3423,1.0929,1.7076,0.2318,7.9501,2.1372,3.1729,0.9042,18.0401,18.0421,8.0413,0.0,6.9880,11.7376,10.3726,11.6821,0.1206,0.0000,3.2409,1.5447,0.0,0.4315,1.9867,0.5218,0.1206,0.2318,2.2993,0.3351,6.5863,11.7450,16.8502
4,TCGA.CS.6290,13.4212,14.4253,13.4393,14.2401,14.2439,8.5067,10.9691,3.9859,14.0477,8.8979,8.8774,0.0,2.6603,13.8198,13.6917,6.8008,14.3698,2.9490,3.4520,3.4146,3.2331,8.7100,6.0922,6.2169,10.7620,0.0,0.0000,6.5609,0.0000,0.0,0.1891,0.1891,0.0,0.1891,0.1891,3.1895,0.9857,1.7574,0.1891,...,5.0596,1.0843,0.0,0.8799,5.1306,0.0000,0.0000,1.3451,4.3050,5.1982,0.8799,4.3552,0.0000,5.1760,0.3562,1.2633,0.3562,18.1240,18.1234,9.1091,0.0,6.4862,11.5712,10.4497,11.5341,0.0000,0.0000,4.8374,1.9263,0.0,0.6417,2.2990,1.6326,0.0000,0.5060,2.1703,2.1247,7.0711,11.9511,14.5708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148,TCGA.VM.A8C9,13.9784,14.9795,13.9976,14.9215,14.3233,8.9959,11.8618,4.9812,14.0363,9.5253,9.0903,0.0,2.5524,15.0347,14.7176,6.9893,15.0487,3.0729,5.2331,5.1971,5.5791,8.5560,7.1719,5.5074,6.5569,0.0,0.7614,6.4230,0.0000,0.0,0.7614,0.9462,0.0,0.3007,0.0000,2.5524,0.0000,1.9186,0.3007,...,6.1351,1.1101,0.0,0.5493,6.0669,0.0000,0.3007,1.5128,3.9633,4.6361,0.9462,5.8052,0.0000,5.6474,0.7614,1.1101,0.0000,17.4570,17.4561,8.4539,0.0,7.5002,11.9858,10.8210,12.0425,0.0000,0.0000,5.4852,2.8592,0.0,0.0000,2.7139,2.3042,0.0000,0.5493,2.2349,0.7614,7.2442,11.8441,15.0379
149,TCGA.VV.A86M,13.3444,14.3340,13.3509,14.2922,14.5039,8.3327,10.3582,4.8835,13.1183,9.2402,8.2522,0.0,2.3288,12.5365,13.7490,6.6213,13.4652,2.2777,3.5232,3.7652,2.9678,9.3261,4.7770,6.6766,9.2979,0.0,0.7654,4.5695,0.4329,0.0,0.2326,0.0000,0.0,0.0000,0.0000,0.2326,0.0000,0.2326,0.0000,...,4.7397,0.0000,0.0,0.0000,4.2227,0.0000,0.0000,0.9067,2.7967,2.3288,0.6087,0.6087,0.0000,5.0297,0.4329,0.4329,0.2326,18.3368,18.3373,10.0858,0.0,8.6710,12.8461,10.8498,11.9722,0.0000,0.0000,4.9089,1.0354,0.0,0.4329,1.6320,1.5482,0.2326,0.0000,3.2323,0.7654,5.9392,12.7306,14.0264
150,TCGA.VW.A7QS,12.7043,13.7075,12.7011,13.8843,12.4618,7.6211,8.8171,3.1475,11.3708,7.7419,7.9016,0.0,1.3993,13.1023,13.1729,4.7570,12.5150,1.1019,5.5299,5.4456,3.0076,7.6880,4.4889,8.4136,15.6489,0.0,1.4862,3.9763,0.0000,0.0,0.0000,0.0000,0.0,0.0000,0.0000,1.1019,0.2188,0.4088,0.0000,...,3.5005,0.0000,0.0,0.2188,4.0917,0.0000,0.0000,0.2188,4.2979,1.3993,3.0931,2.7510,1.4862,11.2895,3.8178,5.8900,3.3695,18.4974,18.4966,9.3012,0.0,6.9291,11.6195,8.2509,11.5611,0.0000,0.0000,4.5098,0.5766,0.0,0.4088,1.3068,0.5766,0.0000,0.2188,1.7894,1.3068,4.6292,11.3946,12.7069
151,TCGA.WY.A859,12.2999,13.2938,12.3038,12.6302,12.7542,9.0511,10.9400,4.4782,12.8556,9.4852,8.3393,0.0,4.3815,12.4057,13.2361,6.2511,14.9158,3.2934,5.9809,6.0165,3.9127,8.7109,6.9311,6.5410,15.7008,0.0,1.5058,6.4485,0.3367,0.0,1.3657,1.1261,0.0,0.9410,0.1781,5.0756,0.3367,1.5058,0.3367,...,7.3732,2.8694,0.0,3.6207,7.2391,0.0000,1.7508,2.5187,7.0660,2.9452,1.0365,6.9419,0.0000,7.3617,1.0365,1.5710,0.4795,18.2874,18.2865,9.3155,0.0,8.4972,12.3321,10.1424,11.6391,0.0000,0.0000,7.0082,0.6094,0.0,0.3367,3.3316,0.9410,0.0000,0.0000,4.3815,3.8745,7.0603,11.2630,14.4195


In [None]:
train_glioma_73 = train_glioma_73G.iloc[:, 1:]
test_glioma_73 = test_glioma_73G.iloc[:, 1:]
print(train_glioma_73.shape)
print(test_glioma_73.shape)
train_glioma73_numpy = train_glioma_73.to_numpy()
test_glioma73_numpy = test_glioma_73.to_numpy()

scaler_glioma73 = MinMaxScaler()
train_glioma73_scaled = scaler_glioma73.fit_transform(train_glioma73_numpy)
test_glioma73_scaled = scaler_glioma73.transform(test_glioma73_numpy)

(355, 791)
(153, 791)


In [None]:
train_glioma73_torch = torch.FloatTensor(train_glioma73_scaled)
test_glioma73_torch = torch.FloatTensor(test_glioma73_scaled)

In [None]:
#save the attrib_name
train_glioma73_name = train_glioma_73G.iloc[:, 0]
test_glioma73_name = test_glioma_73G.iloc[:, 0]
print(train_glioma73_name)
print(test_glioma73_name)

train_glioma73_name.to_csv("train_73G_idx.csv")
test_glioma73_name.to_csv("test_73G_idx.csv")

0      TCGA.CS.4941
1      TCGA.CS.4942
2      TCGA.CS.4943
3      TCGA.CS.4944
4      TCGA.CS.5393
           ...     
350    TCGA.WY.A858
351    TCGA.WY.A85A
352    TCGA.WY.A85B
353    TCGA.WY.A85C
354    TCGA.WY.A85E
Name: attrib_name, Length: 355, dtype: object
0      TCGA.CS.4938
1      TCGA.CS.5390
2      TCGA.CS.5396
3      TCGA.CS.6186
4      TCGA.CS.6290
           ...     
148    TCGA.VM.A8C9
149    TCGA.VV.A86M
150    TCGA.VW.A7QS
151    TCGA.WY.A859
152    TCGA.WY.A85D
Name: attrib_name, Length: 153, dtype: object


#### Glioma 7:3 training and output


In [None]:
# 10 fold cross validation
train_fold_loss = []
valid_fold_loss = []
embed_dim = train_glioma73_torch.shape[1]

# assign train_torch and test_torch
train_torch = train_glioma73_torch
test_torch = test_glioma73_torch

for k, (train_idx,valid_idx) in enumerate(kfold.split(np.arange(len(train_torch)))):
  
  print('Fold: ', k+1 )
  train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
  valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

  train_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=train_sampler
  )
  valid_loader = torch.utils.data.DataLoader(
      train_torch, batch_size=batch_size, sampler=valid_sampler
  )

  model = autoencoder().to(device)
  optimizer = optim.Adam(model.parameters(), lr=lr)

  for epoch in range(epochs):
    train_loss = train_epoch(model, device, optimizer, criterion, train_loader)
    valid_loss = validation_epoch(model, device, criterion, valid_loader)

    print('epoch [{}/{}], train_loss:{:.4f}, valid_loss:{:.4f}'
          .format(epoch + 1, epochs, train_loss, valid_loss))
    
  train_fold_loss.append(train_loss)
  valid_fold_loss.append(valid_loss)

Fold:  1
epoch [1/10], train_loss:0.1085, valid_loss:0.0973
epoch [2/10], train_loss:0.0676, valid_loss:0.0536
epoch [3/10], train_loss:0.0379, valid_loss:0.0321
epoch [4/10], train_loss:0.0255, valid_loss:0.0293
epoch [5/10], train_loss:0.0236, valid_loss:0.0283
epoch [6/10], train_loss:0.0221, valid_loss:0.0273
epoch [7/10], train_loss:0.0209, valid_loss:0.0257
epoch [8/10], train_loss:0.0205, valid_loss:0.0251
epoch [9/10], train_loss:0.0202, valid_loss:0.0247
epoch [10/10], train_loss:0.0195, valid_loss:0.0244
Fold:  2
epoch [1/10], train_loss:0.1091, valid_loss:0.0813
epoch [2/10], train_loss:0.0654, valid_loss:0.0461
epoch [3/10], train_loss:0.0361, valid_loss:0.0276
epoch [4/10], train_loss:0.0255, valid_loss:0.0258
epoch [5/10], train_loss:0.0239, valid_loss:0.0252
epoch [6/10], train_loss:0.0224, valid_loss:0.0238
epoch [7/10], train_loss:0.0219, valid_loss:0.0228
epoch [8/10], train_loss:0.0208, valid_loss:0.0221
epoch [9/10], train_loss:0.0207, valid_loss:0.0218
epoch [10/10

In [None]:
# 10 fold cv diagram

In [None]:
# formal training
train_glioma73_loader = torch.utils.data.DataLoader(
      train_glioma73_torch, batch_size=batch_size, shuffle=True
  )
test_glioma73_loader = torch.utils.data.DataLoader(
      test_glioma73_torch, batch_size=batch_size, shuffle=False
  )

model_mirna_73G = autoencoder().to(device)
optimizer = optim.Adam(model_mirna_73G.parameters(), lr=lr)

for epoch in range(epochs):
  train_loss = train_epoch(model_mirna_73G, device, optimizer, criterion, train_glioma73_loader)

  print('epoch [{}/{}], train_loss:{:.4f}'
        .format(epoch + 1, epochs, train_loss))

epoch [1/10], train_loss:0.1094
epoch [2/10], train_loss:0.0707
epoch [3/10], train_loss:0.0382
epoch [4/10], train_loss:0.0259
epoch [5/10], train_loss:0.0240
epoch [6/10], train_loss:0.0225
epoch [7/10], train_loss:0.0218
epoch [8/10], train_loss:0.0209
epoch [9/10], train_loss:0.0202
epoch [10/10], train_loss:0.0200


In [None]:
# save and load the model
torch.save(model_mirna_73G, "mirna_73G.pt")

mirna_73G = torch.load("mirna_73G.pt")
mirna_73G

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=791, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=791, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for train data only
mirna_Tr73G_output = mirna_73G.encoder(train_glioma73_torch)
print(mirna_Tr73G_output.shape)
print(mirna_Tr73G_output)
mirna_Tr73G_output = mirna_Tr73G_output.cpu().detach().numpy()
np.savetxt('mirna_Tr73G.csv', mirna_Tr73G_output, delimiter=',')

torch.Size([355, 128])
tensor([[ 6.4726e-02, -4.2632e-01, -3.5382e-01,  ..., -1.2889e-02,
          7.1031e-02, -1.0436e+00],
        [-9.2673e-03, -2.1383e-01, -2.4442e-01,  ..., -3.5325e-02,
          2.2565e-02, -6.8443e-01],
        [ 2.5302e-03, -2.6197e-01, -2.4555e-01,  ..., -1.5348e-02,
          7.6830e-02, -7.4028e-01],
        ...,
        [ 4.5974e-02, -3.4195e-01, -3.6260e-01,  ...,  1.1801e-02,
          8.5198e-02, -1.0136e+00],
        [ 3.7366e-02, -4.3328e-01, -4.0167e-01,  ...,  3.9748e-02,
          6.0322e-02, -1.0789e+00],
        [ 2.2317e-04, -3.4477e-01, -3.6741e-01,  ...,  6.3414e-02,
         -6.3524e-02, -9.5566e-01]], grad_fn=<AddmmBackward>)


In [None]:
# for test data only

# import the saved model
id = "1HfRnDTMEhfmRokSNRdnvSZQhDtVws4KX"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('mirna_73G.pt')

mirna73G = torch.load("mirna_73G.pt")
mirna73G

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=791, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=791, bias=True)
    (3): Tanh()
  )
)

In [None]:
# for test data only
mirna_Te73G_output = mirna73G.encoder(test_glioma73_torch)
print(mirna_Te73G_output.shape)
print(mirna_Te73G_output)
mirna_Te73G_output = mirna_Te73G_output.cpu().detach().numpy()
np.savetxt('mirna_Test73G.csv', mirna_Te73G_output, delimiter=',')

torch.Size([153, 128])
tensor([[ 0.0949, -0.3832, -0.3728,  ...,  0.0486,  0.0377, -1.0439],
        [ 0.0031, -0.2848, -0.2804,  ..., -0.0015,  0.0326, -0.7668],
        [ 0.0182, -0.2588, -0.2258,  ..., -0.0125,  0.0437, -0.7629],
        ...,
        [ 0.0066, -0.2992, -0.2122,  ..., -0.0280,  0.0579, -0.7181],
        [ 0.0751, -0.4864, -0.5458,  ..., -0.0046,  0.0725, -1.4314],
        [ 0.0399, -0.3030, -0.3000,  ...,  0.0245,  0.0404, -0.8259]],
       grad_fn=<AddmmBackward>)


In [None]:
# for whole data only

## Mirna (No Cross validation)


In [None]:
data_miRnaSeq = pd.read_csv('data_miRnaSeq.csv')
data_miRnaSeq = data_miRnaSeq.iloc[:, 1:]
npary_miRna = data_miRnaSeq.to_numpy()

#norm_miRna = normalize(array_miRnaSeq, norm = "l2")
scaler = MinMaxScaler()
scaled_miRna = scaler.fit_transform(npary_miRna)

In [None]:
miRna_torch = torch.FloatTensor(scaled_miRna)

miRna_loader = torch.utils.data.DataLoader(
    miRna_torch, batch_size = 256, shuffle=False
)

embed_dim = scaled_miRna.shape[1]

In [None]:
# build up ae model for mirna
class autoencoder(nn.Module):
  def __init__(self):
    super(autoencoder, self).__init__()
    self.encoder = nn.Sequential(
      nn.Linear(embed_dim, 256),
      nn.ReLU(True),
      nn.Linear(256, 128))
    self.decoder = nn.Sequential(
      nn.Linear(128, 256),
      nn.ReLU(True), 
      nn.Linear(256, embed_dim), 
      nn.Tanh())
    
  def forward(self, x):
      x = self.encoder(x)
      x = self.decoder(x)
      return x

In [None]:
# minmax scaler mirna
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_mirna128 = autoencoder().to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model_mirna128.parameters(), lr=0.001)

epochs = 10
#batch_size = 256

for epoch in range(epochs):
  model_mirna128.train()
  for batch_features in miRna_loader:
    output = model_mirna128(batch_features)
    loss = criterion(output, batch_features)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch + 1, epochs, loss.data.numpy()))
  

epoch [1/10], loss:0.0781
epoch [2/10], loss:0.0417
epoch [3/10], loss:0.0226
epoch [4/10], loss:0.0185
epoch [5/10], loss:0.0176
epoch [6/10], loss:0.0162
epoch [7/10], loss:0.0154
epoch [8/10], loss:0.0149
epoch [9/10], loss:0.0146
epoch [10/10], loss:0.0145


In [None]:
model_mirna128.eval()

for features in miRna_loader:
  test = features

result_mirna128 = model_mirna128.encoder(test)

resultNp_mirna128 = result_mirna128.cpu().detach().numpy()

np.savetxt('resultNp_mirna104-128.csv', resultNp_mirna128, delimiter=',')

array([[ 0.46319434, -0.13032623, -0.03938269, ...,  0.46743885,
        -0.22245201, -0.3501378 ],
       [ 0.795352  , -0.1801069 ,  0.00738584, ...,  0.70923835,
        -0.36854056, -0.5305142 ],
       [ 0.55283695, -0.13001405, -0.02533646, ...,  0.55947953,
        -0.2599942 , -0.42309454],
       ...,
       [ 0.62339664, -0.14806284, -0.05587918, ...,  0.6592212 ,
        -0.3343306 , -0.4683654 ],
       [ 0.65795475, -0.17544797, -0.02611945, ...,  0.63121736,
        -0.30837235, -0.466317  ],
       [ 0.7439706 , -0.14961256,  0.00157071, ...,  0.7285044 ,
        -0.32549438, -0.5925204 ]], dtype=float32)

### pre process is normalizaed

In [None]:
# normalized mirna
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = autoencoder().to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
batch_size = 256

for epoch in range(epochs):
 # loss = 0
  for batch_features in miRna_loader:
    output = model(batch_features)
    loss = criterion(output, batch_features)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  #  loss += loss.item()
  
  #loss = loss / len(miRna_loader)

  print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch + 1, epochs, loss.data.numpy()))

epoch [1/10], loss:0.0018
epoch [2/10], loss:0.0009
epoch [3/10], loss:0.0005
epoch [4/10], loss:0.0002
epoch [5/10], loss:0.0001
epoch [6/10], loss:0.0001
epoch [7/10], loss:0.0001
epoch [8/10], loss:0.0001
epoch [9/10], loss:0.0001
epoch [10/10], loss:0.0001


In [None]:

data_methylation = pd.read_csv('data_methylation.csv')

In [None]:

array_methylation = []



for i in range(0, ):
  temp = []
  for j in range(1, ):
    temp.append(data_methylation.iloc[i][j])
  array_methylation.append(temp)


In [None]:



#norm_methylation = normalize(array_methylation, norm = "l2")

scaler = MinMaxScaler()

#scaled_methylation = scaler.fit_transform(array_methylation)


#print(scaled_methylation.shape)

(616, 823)


In [None]:

# methy_torch = torch.FloatTensor(scaled_methylation)
# methy_loader = torch.utils.data.DataLoader(
#     methy_torch, batch_size = 256, shuffle=True
# )
# print(methy_loader)


# make search on the dataloader and how to handle this before put in AE model.

<torch.utils.data.dataloader.DataLoader object at 0x7fd2b79e6610>


In [None]:
# build up ae model for methylation
class autoencoder(nn.Module):
  def __init__(self):
    super(autoencoder, self).__init__()
    self.encoder = nn.Sequential(
      nn.Linear(823, 256),
      nn.ReLU(True),
      nn.Linear(256, 128),
      nn.ReLu(True),
      )
    self.decoder = nn.Sequential(
      nn.Linear(128, 256),
      nn.ReLU(True), 
      nn.Linear(256, 823), 
      nn.Tanh())
    
  def getLowEmbeds(self, x):
      
    
  def forward(self, x):
      x = self.encoder(x)
      
      x = self.decoder(x)
      return x

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = autoencoder().to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
batch_size = 256

for epoch in range(epochs):
 # loss = 0
  for batch_features in miRna_loader:
    output = model(batch_features)
    loss = criterion(output, batch_features)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch + 1, epochs, loss.data.numpy()))
  

In [None]:
#RNAseq 
id = "1siCbnbxD3Farp2QDoSE0EkvPtq5NAnxU"
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('data_scnv128.csv')

data_scnv = pd.read_csv('data_scnv128.csv')

In [None]:
scnv = data_scnv.iloc[:, 1:]
scnv

Unnamed: 0,1q21.3,1q44,3p25.1,3q26.32,4q13.3,5p15.33,6p23,6q21,8p11.23,8p11.21,8q24.21,10p15.1,10q22.3,11p13,11q13.3,11q14.1,12p13.33,12q15,13q34,14q21.1,15q26.3,17p11.2,17q12,17q23.1,19p13.12,19q12,19q13.42,20q13.2,1p36.13,1p21.2,2q37.3,3p21.31,4p16.3,4q35.2,5q11.2,5q21.3,6p25.3,6q15,6q27,7p22.3,...,0.19,0.20,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.30,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.40,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.50,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58
0,3.6569,0.1874,-0.4576,0.2586,3.6569,0.1659,0.1470,-1.0883,0.1976,0.1976,0.2218,-0.4827,-0.4676,0.1843,2.0965,2.6037,0.1862,3.6569,-0.4534,-0.4279,0.1184,-0.5673,0.0783,0.0783,0.0000,0.0000,0.0000,1.8238,0.0690,0.2159,-0.3991,-0.4659,-0.4717,-0.3800,0.2476,0.2875,0.1470,-1.0883,-1.1190,0.2267,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.3873,0.3858,0.4438,0.4765,-0.4365,0.0226,-0.0087,-0.4901,0.4473,0.4473,0.4650,-0.4742,-0.4608,0.0072,0.0072,0.0072,-0.0066,-0.0003,-0.4263,0.5003,-0.0162,-0.5305,3.6569,2.3088,-0.1543,0.0964,0.0964,0.7833,-0.5178,-0.4478,0.0000,-0.4765,-0.4365,0.0562,0.4820,-0.4407,-0.4952,-0.4352,-0.4529,0.4494,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.0833,0.4121,0.0000,-0.1448,-0.3167,0.0127,1.1985,0.0375,3.6569,-0.7089,1.1520,0.0343,0.0588,0.0146,-0.3206,-0.3206,0.0229,0.3786,-0.3161,1.6240,-0.3274,-0.6558,1.0898,1.4188,-0.3210,0.0053,0.0053,3.6569,-0.3236,0.0833,-0.3907,0.0000,-0.3265,-0.3309,0.0250,0.0250,-0.1379,-1.0618,0.0314,0.2113,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.5887,0.6172,-0.0162,-0.0577,-0.0326,0.0231,0.0134,0.0134,0.0264,0.0264,0.0264,0.0142,0.0142,0.0179,0.0233,0.0233,-0.0022,0.0000,-0.0196,-0.0153,0.0253,-0.6440,2.9314,1.6935,-0.0312,0.0083,-0.0144,0.0316,-0.0233,-0.0233,-0.0069,-0.0577,-0.0247,-0.0286,0.0191,0.0188,0.0134,0.0134,0.0134,0.0114,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.0000,0.0000,0.0046,-0.0028,-0.0039,0.0338,-0.0193,-0.0102,0.0171,0.0171,0.9728,0.0085,0.0207,0.0018,0.0018,0.0121,-0.3438,-0.0149,0.0233,0.0357,0.1050,0.2890,0.2890,0.2890,1.1270,0.2952,0.2952,0.0288,0.0000,0.0000,0.0429,0.0089,0.0160,-0.0057,-0.6381,0.0212,-0.0193,-0.0150,-0.0102,-0.0036,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
611,1.8555,1.8555,-0.0856,-0.0856,-0.3846,0.0591,0.0146,0.0146,-0.1805,-0.1805,-0.2051,0.0335,0.0335,0.0247,0.0399,0.0399,0.0300,0.0122,-0.0791,0.3840,-0.7443,-0.1830,-0.1995,-0.2027,-0.2270,-0.2270,-0.2270,-0.1986,-0.7217,-0.7752,-0.0213,-0.0856,-0.3846,-0.4040,0.0000,0.0000,0.0146,0.0146,0.0146,0.3963,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
612,0.2754,0.2754,0.0011,0.0011,-0.0003,0.3554,0.0039,0.0039,2.2140,2.7946,0.2937,0.0000,0.0000,-0.0018,3.6569,3.6569,-0.0033,0.5011,-0.0116,-0.1958,0.0134,0.0083,0.0083,0.0083,-0.0003,-0.0003,-0.0003,0.0025,-0.0709,-0.0709,-0.0829,0.0011,-0.0003,-0.0003,0.0504,0.1360,0.0039,0.0039,0.0039,-0.0017,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
613,0.0122,0.0043,-0.2560,-0.2653,-0.0028,0.0085,-0.0004,0.0000,-0.5301,0.0118,0.0001,0.0146,0.0146,0.0163,3.6569,-0.5296,0.0093,0.0106,-0.5320,-0.2518,0.0272,-0.5123,-0.5123,-0.5134,1.5697,0.0624,0.0624,0.0320,-0.5115,-0.2595,-0.2542,-0.2560,-0.0028,-0.0028,0.0085,0.0036,-0.0004,0.0000,0.0000,0.0060,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
614,0.4855,0.4964,-0.1585,-0.1585,0.1377,0.1467,0.1528,0.1372,0.0849,0.0849,0.0849,-0.0042,-0.0042,-0.1563,-0.1563,-0.1563,0.1500,0.1394,-0.1629,-0.0008,-0.1504,0.0089,0.0089,0.0089,0.0086,0.0086,0.0086,0.0113,-0.1532,-0.1532,0.0057,-0.1585,0.1840,0.1377,0.1467,0.1467,0.1528,-0.1732,0.1474,-0.0080,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
scnv_npary = scnv.to_numpy()
scnv_npary

array([[3.6569, 0.1874, -0.4576, ..., 0, 0, 0],
       [0.3873, 0.3858, 0.4438, ..., 0, 0, 0],
       [0.0833, 0.4121, 0.0, ..., 0, 0, 0],
       ...,
       [0.0122, 0.0043, -0.256, ..., 0, 0, 0],
       [0.4855, 0.4964, -0.1585, ..., 0, 0, 0],
       [0.6693, 0.6614, -0.0109, ..., 0, 0, 0]], dtype=object)

In [None]:
'Ω' in scnv_npary
string = np.where(scnv_npary == 'Ω')
print(string)
print(scnv_npary[string])

print(scnv_npary[4])
print(scnv_npary[0])

(array([0]), array([69]))
['Ω']
[0.0 0.0 0.0046 -0.0028 -0.0039 0.0338 -0.0193 -0.0102 0.0171 0.0171
 0.9728 0.0085 0.0207 0.0018 0.0018 0.0121 -0.3438 -0.0149 0.0233 0.0357
 0.105 0.289 0.289 0.289 1.127 0.2952 0.2952 0.0288 0.0 0.0 0.0429 0.0089
 0.016 -0.0057 -0.6381 0.0212 -0.0193 -0.015 -0.0102 -0.0036 -0.0036
 -0.6626 0.3251 0.3251 0.3251 0.3259 0.0207 0.0372 0.0138 0.0018 -0.652
 -0.652 -0.3438 -0.0149 -0.0149 0.0048 0.0357 0.0357 0.0072 -0.6684
 -0.6687 0.289 0.0134 -0.7006 0.2952 0.0303 -0.0135 -0.0278 0.0128 '0' 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[3.6569 0.1874 -0.4576 0.2586 3.6569 0.1659 0.147 -1.0883 0.1976 0.1976
 0.2218 -0.4827 -0.4676 0.1843 2.0965 2.6037 0.1862 3.6569 -0.4534 -0.4279
 0.1184 -0.5673 0.0783 0.0783 0.0 0.0 0.0 1.8238 0.069 0.2159 -0.3991
 -0.4659 -0.4717 -0.38 0.2476 0.2875 0.147 -1.0883 -1.119 0.2267 0.2087
 0.1829 -0.4577 -0.488 -0.488 -0.488 -0.4676 -0.4676 0.1054 -0.5

In [None]:
scaler = MinMaxScaler()
scaled_SCNV128 = scaler.fit(scnv_npary)
scaled_SCNV128

ValueError: ignored

In [None]:
class Encoder(nn.Module):
  def __init__(self):
    super(Encoder, self).__init__()
    self.encoder = nn.Sequential(
        nn.Linear(embed_dim, 256),
        nn.ReLU(True),
        nn.Linear(256,128)
    )

  def forward(self, x):
    output = self.encoder(x)
    return output

class Decoder(nn.Module):
  def __init__(self):
    super(Decoder, self).__init__()
    self.decoder = nn.Sequential(
        nn.Linear(128, 256),
        nn.ReLU(True),
        nn.Linear(256, embed_dim)
    )

  def forward(self, x):
    output = self.decoder(x)
    return output

class AutoEncoder(nn.Module):
  def __init__(self):
    super(AutoEncoder, self).__init__()
    self.encoder = Encoder()
    self.decoder = Decoder()

  def forward(self, x):
    x = self.encoder(x)
    output = self.decoder(x)
    return output
    