# Import

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Read data and pre-process

In [None]:
def select_features(df, top_n=2000):
    var = df.var(axis=1)
    top = var.sort_values(ascending=False).head(top_n).index
    return df.loc[top]

# Đọc file multi-omics (đổi tên file phù hợp)
mrna = pd.read_csv("LUAD_FPKM.tsv", sep='\t', index_col=0)
mirna = pd.read_csv("LUAD_miRNA.tsv", sep='\t', index_col=0)
cnv = pd.read_csv("LUAD_CNV.tsv", sep='\t', index_col=0)
meth = pd.read_csv("LUAD_METHYL.tsv", sep='\t', index_col=0)

# Lấy giao các sample chung
sample_common = list(set(mrna.columns) & set(mirna.columns) & set(cnv.columns) & set(meth.columns))
mrna = select_features(mrna[sample_common], 2000)
mirna = select_features(mirna[sample_common], 200)
cnv = select_features(cnv[sample_common], 500)
meth = select_features(meth[sample_common], 2000)

def scale_omics(df):
    scaler = StandardScaler()
    return pd.DataFrame(scaler.fit_transform(df.T), index=df.columns, columns=df.index)

mrna_scaled = scale_omics(mrna)
mirna_scaled = scale_omics(mirna)
cnv_scaled = scale_omics(cnv)
meth_scaled = scale_omics(meth)

df_all = pd.concat([mrna_scaled, mirna_scaled, cnv_scaled, meth_scaled], axis=1)
print(f"Shape input: {df_all.shape}")

# Đầu vào autoencoder (samples x features)
X = df_all.values.astype(np.float32)

# Define autoencoder model

In [None]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim=100):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, latent_dim),
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 1024),
            nn.ReLU(),
            nn.Linear(1024, input_dim)
        )
    def forward(self, x):
        z = self.encoder(x)
        out = self.decoder(z)
        return out

# Initialize model, optimizer, loss

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = X.shape[1]
latent_dim = 100
model = Autoencoder(input_dim, latent_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
loss_fn = nn.MSELoss()

# Train

In [None]:
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
n_epochs = 200
batch_size = 32

losses = []
for epoch in range(n_epochs):
    perm = torch.randperm(X_tensor.size(0))
    epoch_loss = 0
    for i in range(0, X_tensor.size(0), batch_size):
        idx = perm[i:i + batch_size]
        batch = X_tensor[idx]
        optimizer.zero_grad()
        output = model(batch)
        loss = loss_fn(output, batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    losses.append(epoch_loss)
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}, loss = {epoch_loss:.4f}")

plt.plot(losses)
plt.title("Autoencoder Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

# Export latent embedding for downstream clustering

In [None]:
model.eval()
with torch.no_grad():
    latent_emb = model.encoder(X_tensor).cpu().numpy()  # shape (num_samples, latent_dim)

pd.DataFrame(latent_emb, index=df_all.index).to_csv("luad_omics_latent.csv")
print("Đã lưu file latent embedding shape:", latent_emb.shape)

# Code

In [None]:
import torch

import torch.nn.functional as F

class Encoder(torch.nn.Module):
 def __init__(self, latent_dim=128):
   super(Encoder, self).__init__()
   self.enc1 = torch.nn.Linear(128*128, 512) # input image shape: (3,128,128)
   self.mean_x = torch.nn.Linear(512,latent_dim)
   self.logvar_x = torch.nn.Linear(512, latent_dim)
   
 def forward(self,inputs):
   x = self.enc1(inputs)
   x= F.relu(x)
   z_mean = self.mean_x(x)
   z_log_var = self.logvar_x(x)
   return z_mean, z_log_var


# Decode

In [None]:
class Decoder(torch.nn.Module):
 def __init__(self, latent=128):
   super(Decoder, self).__init__()
   self.dec1 = torch.nn.Linear(latent, 512)
   self.out = torch.nn.Linear(512, 128*128)


 def forward(self,z):
   z = self.dec1(z)
   z = F.relu(z)
   return self.out(z)
