# Multilayer Perceptron

---
## General Imports

In [1]:
import torch
import numpy as np
import torch.nn as nn
import pandas as pd

In [4]:
df_HCC_tr = pd.read_csv("raw_data/HCC1806_SmartS_Filtered_Normalised_3000_Data_train.txt", delimiter="\ ",engine='python',index_col=0)
df_MCF_tr = pd.read_csv("raw_data/MCF7_SmartS_Filtered_Normalised_3000_Data_train.txt", delimiter="\ ",engine='python',index_col=0)
df_HCC_tr = df_HCC_tr.T
df_MCF_tr = df_MCF_tr.T

In [5]:
df_HCC_tr.head()

Unnamed: 0,"""DDIT4""","""ANGPTL4""","""CALML5""","""KRT14""","""CCNB1""","""IGFBP3""","""AKR1C2""","""KRT6A""","""NDRG1""","""KRT4""",...,"""MST1R""","""ZYG11A""","""NRG1""","""RBMS3""","""VCPIP1""","""LINC02693""","""OR8B9P""","""NEAT1""","""ZDHHC23""","""ODAD2"""
"""output.STAR.PCRPlate1G12_Normoxia_S32_Aligned.sortedByCoord.out.bam""",0,48,0,321,298,82,6250,634,0,0,...,78,10,136,0,0,29,0,29,0,0
"""output.STAR.PCRPlate1G1_Hypoxia_S102_Aligned.sortedByCoord.out.bam""",8739,2101,55,96,1824,1938,62,0,522,413,...,279,0,264,0,134,68,0,213,0,0
"""output.STAR.PCRPlate1G2_Hypoxia_S2_Aligned.sortedByCoord.out.bam""",13098,14032,0,0,1616,247,430,907,348,0,...,311,0,38,0,0,0,0,92,0,0
"""output.STAR.PCRPlate1G3_Hypoxia_S7_Aligned.sortedByCoord.out.bam""",2880,356,0,6211,3,3430,79,1953,592,176,...,125,0,16,0,4,1,0,1,0,0
"""output.STAR.PCRPlate1G4_Hypoxia_S107_Aligned.sortedByCoord.out.bam""",7777,5661,4383,0,145,4618,246,85,206,0,...,268,1,25,0,0,0,0,128,0,0


---
## MLP Model Definition

In [50]:
class MLP(nn.Module):
    def __init__(self, D_in, D_out, H):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            nn.Linear(H, D_out),
            nn.ReLU(),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [43]:
def Train(X, y, H, eta, epochs):
    D_in= X.shape[1]
    D_out = y.shape[1]
    model = MLP(D_in, D_out, H)
    optimizer = torch.optim.SGD(model.parameters(), lr=eta)
    for i in range(epochs):
        loss = torch.mean((y-model(X))**2)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    return model

---
## Temporary

In [13]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [14]:
#Metadata Files
filepath_HCC = "raw_data/HCC1806_SmartS_MetaData.tsv"
filepath_MCF = "raw_data/MCF7_SmartS_MetaData.tsv"
df_meta_HCC = pd.read_csv(filepath_HCC,delimiter="\t",engine='python',index_col=0)
df_meta_MCF = pd.read_csv(filepath_MCF,delimiter="\t",engine='python',index_col=0)

#Filtered Files
df_HCC_s_f = pd.read_csv("raw_data/HCC1806_SmartS_Filtered_Data.txt", delimiter="\ ",engine='python',index_col=0)
df_MCF_s_f = pd.read_csv("raw_data/MCF7_SmartS_Filtered_Data.txt", delimiter="\ ",engine='python',index_col=0)

#Transposition
df_HCC_F = df_HCC_s_f.T
df_MCF_F = df_MCF_s_f.T

In [16]:
steps = [('scaler', StandardScaler()), ('normalizer', MinMaxScaler())]
pipeline = Pipeline(steps)
df_HCC_F_N = pd.DataFrame(pipeline.fit_transform(df_HCC_F), columns=df_HCC_F.columns, index=df_HCC_F.index)
df_MCF_F_N = pd.DataFrame(pipeline.fit_transform(df_MCF_F), columns=df_MCF_F.columns, index=df_MCF_F.index)

In [17]:
def MainCols(df,n):
    #Given Dataframe df and number n, returns the first n columns of df in term of nonzero elements
    non_zero_counts = pd.DataFrame((df != 0).sum(axis=0), columns = ["Counts"]).T
    sorted_df = non_zero_counts.sort_values(by = 'Counts', axis = 1, ascending = False)
    columns = (sorted_df.iloc[:,0:n]).columns
    filtered = df.loc[:, df.columns.isin(columns)]
    return filtered
df_HCC = MainCols(df_HCC_F_N, 3000)
df_MCF = MainCols(df_MCF_F_N, 3000)

In [18]:
def split_train_test(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]
df_HCC_train , df_HCC_test = split_train_test(df_HCC, 0.2)
df_MCF_train , df_MCF_test = split_train_test(df_MCF, 0.2)

In [19]:
def Label(df, meta):
    Names = [cell[1:-1] for cell in df.index]
    Hypo = [meta.loc[i,"Condition"] for i in Names]
    OH_Hypo = [1 if x=="Normo" else 0 for x in Hypo]
    result = pd.DataFrame(OH_Hypo, columns=["Condition"], index=df.index)
    return result
Y_HCC_train = Label(df_HCC_train, df_meta_HCC)
Y_HCC_test = Label(df_HCC_test, df_meta_HCC)
Y_MCF_train = Label(df_MCF_train, df_meta_MCF)
Y_MCF_test = Label(df_MCF_test, df_meta_MCF)

---
## Torch Tensor Conversion

In [29]:
HCC_tr_tensor = torch.from_numpy(df_HCC_train.values).to(torch.float32)
MCF_tr_tensor = torch.from_numpy(df_MCF_train.values).to(torch.float32)

Y_HCC_tr_tensor = torch.from_numpy(Y_HCC_train.values).to(torch.float32)
Y_MCF_tr_tensor = torch.from_numpy(Y_MCF_train.values).to(torch.float32)

HCC_ts_tensor = torch.from_numpy(df_HCC_test.values).to(torch.float32)
MCF_ts_tensor = torch.from_numpy(df_MCF_test.values).to(torch.float32)

Y_HCC_ts_tensor = torch.from_numpy(Y_HCC_test.values).to(torch.float32)
Y_MCF_ts_tensor = torch.from_numpy(Y_MCF_test.values).to(torch.float32)


---
## Training

In [51]:
N, D_in = HCC_tr_tensor.shape
D_out = 1
eta, epochs = 1e-3, 5000
model_HCC = Train(HCC_tr_tensor, Y_HCC_tr_tensor, 100, eta, epochs)

In [53]:
N, D_in = MCF_tr_tensor.shape
D_out = 1
eta, epochs = 1e-3, 5000
model_MCF = Train(MCF_tr_tensor, Y_MCF_tr_tensor, 100, eta, epochs)

---
## Model Evaluation

In [54]:
def Error(X,Y, model):
    return torch.mean((Y-model(X))**2).item()

print("Error HCC:", Error(HCC_ts_tensor,Y_HCC_ts_tensor, model_HCC)*100, "%")
print("Error MCF:", Error(MCF_ts_tensor,Y_MCF_ts_tensor, model_MCF)*100, "%")

Error HCC: 7.184535264968872 %
Error MCF: 0.0 %
