In [2]:
%cd /content/drive/MyDrive/Colab\ Notebooks/COMP9417_MoA_Prediction/notebooks
!pwd
!pip install iterative-stratification


/content/drive/MyDrive/Colab Notebooks/COMP9417_MoA_Prediction/notebooks
/content/drive/MyDrive/Colab Notebooks/COMP9417_MoA_Prediction/notebooks
Collecting iterative-stratification
  Downloading iterative_stratification-0.1.6-py3-none-any.whl (8.7 kB)
Installing collected packages: iterative-stratification
Successfully installed iterative-stratification-0.1.6


In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from matplotlib import *
from scipy.special import expit

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader

import matplotlib as plt
from pytorch_tabnet.multitask import TabNetMultiTaskClassifier
from pytorch_tabnet.tab_model import TabNetRegressor
from pytorch_tabnet.metrics import Metric


import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold


In [5]:
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2,
                 output_size, dropout=0.2):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [6]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features.values
        self.targets = targets.values
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        idx_features = torch.tensor(self.features[idx], dtype=torch.float)
        idx_targets = torch.tensor(self.targets[idx], dtype=torch.float)
        return idx_features, idx_targets


class TestMoADataset:
    def __init__(self, features):
        self.features = features.values
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        idx_features = torch.tensor(self.features[idx], dtype=torch.float)
        return idx_features

In [7]:
def seed_everything(seed, use_cuda=False):
    np.random.seed(seed)
    torch.manual_seed(seed)

    if use_cuda:
        torch.cuda.manual_seed(seed)

    torch.backends.cudnn.deterministic = True 

In [8]:
def preprocess(df):
    
    df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
    df['cp_time'] = df['cp_time'].map({24: 0, 48: 1, 72:2})
    df = df.drop('cp_type', axis=1)
    df = df.drop('sig_id', axis=1)
        
    return df


def prepare_data(dir):
    X_train = pd.read_csv(f'{dir}/train_features.csv')
    Y_train = pd.read_csv(f'{dir}/train_targets_scored.csv')
    X_test = pd.read_csv(f'{dir}/test_features.csv')
    ss = pd.read_csv(f'{dir}/sample_submission.csv')

    train = X_train.merge(Y_train, on='sig_id')
    X_train = train.loc[:, X_train.columns]
    Y_train = train.loc[:, Y_train.columns]

    # Remove control samples
    is_ctl_train = X_train['cp_type'] != 'ctl_vehicle'
    X_train = X_train[is_ctl_train].reset_index(drop=True)
    Y_train = Y_train[is_ctl_train].reset_index(drop=True)
    
    is_ctl_test = X_test['cp_type'] != 'ctl_vehicle'
    X_test = X_test[is_ctl_train].reset_index(drop=True)


    X_train = preprocess(X_train)
    X_test = preprocess(X_test)
    
    Y_train = Y_train.drop('sig_id', axis=1)
    return X_train, Y_train, X_test, ss

In [None]:
def PCA_features(X_tra)

In [28]:
def my_log_loss(y_test,y_pred):
    y_test = y_test.cpu().detach().numpy().astype(np.float16)
    y_pred = y_pred.cpu().detach().numpy().astype(np.float16).reshape(-1,2)
    N,M = y_test.shape
    print("test N: ",N, "test M: ", M)
    print("pred N: ",y_pred.shape[0], "pred M: ", y_pred.shape[1])

    a=[]
    for m in range(M-1):
        loss=0
        for i in range(N-1):
            loss -= ((y_test[i,m]*np.log(y_pred[i,m]))+((1.0-y_test[i,m])*np.log(1.0-y_pred[i,m])))
        loss = loss/N
        a.append(round(loss,8))
    return a

In [8]:
class LogitsLogLoss(Metric):

    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        aux = (1 - y_true) * np.log(1 - expit(y_pred) + 1e-15) + y_true * np.log(expit(y_pred) + 1e-15)
        return np.mean(-aux)

In [30]:
EPOCHS = 30
BATCH_SIZE = 1024
LEARNING_RATE = 2e-2
WEIGHT_DECAY = 1e-5
SEED = 42
NUM_FOLDS = 5


def train_fun(model, optimizer, loss_fun, train_loader, device, epoch):
    model.train()
    running_loss = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # if (batch_idx + 1) % 5 == 0 or (batch_idx + 1) == len(train_loader):
        #     print(f'Train Epoch: {epoch + 1}, Batch: [{(batch_idx + 1)}/{len(train_loader)}], Loss: {loss.item():.3f}')

    mean_loss = running_loss / len(train_loader)
    return mean_loss


def validate_fun(model, loss_fun, val_loader, device, epoch):
    model.eval()
    running_loss = 0

    for batch_idx, (inputs, targets) in enumerate(val_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)
        
        loss = loss_fun(outputs, targets)
        running_loss += loss.item()

        # if (batch_idx + 1) % 1024 == 0 or (batch_idx + 1) == len(val_loader):
        #     print(f'Validate Epoch: {epoch + 1}, Batch: [{batch_idx + 1}/{len(val_loader)}], Loss: {loss.item():.6f}')

    mean_loss = running_loss / len(val_loader)
    return mean_loss


def test_fun(model, test_loader, device):
    Y_pred_lst = []
    model.eval()

    for batch_idx, inputs in enumerate(test_loader):
        inputs = inputs.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)

        Y_pred_lst.append(outputs.sigmoid().detach().cpu().numpy())

    Y_pred = np.concatenate(Y_pred_lst)

    return Y_pred


def train_simple_net(fold, X_test, train_loader, val_loader, in_size, out_size, device):
    model = SimpleNet(in_size, 2048, 1024, out_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    loss_fun = nn.BCEWithLogitsLoss()
    best_loss = np.inf

    for epoch in range(EPOCHS):
        epoch_train_loss = train_fun(model, optimizer, loss_fun, train_loader, device, epoch)
        epoch_val_loss = validate_fun(model, loss_fun, val_loader, device, epoch)

        # print(f'Epoch: {epoch}, Train Loss: {epoch_train_loss}, Val Loss: {epoch_val_loss}')

        if epoch_val_loss < best_loss:
            best_loss = epoch_val_loss
            torch.save(model.state_dict(), f'simple_fold_{fold + 1}.pth')
    
    test_dataset = TestMoADataset(X_test)   
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    model = SimpleNet(in_size, 2048, 1024, out_size).to(device)
    model.load_state_dict(torch.load(f'simple_fold_{fold + 1}.pth'))
    model.to(device)

    Y_pred = test_fun(model, test_loader, device)

    return best_loss, Y_pred

def train_tabnet(fold, X_test, trainX, trainY, valX, valY, in_size, out_size):

    # Reduce class imbalance
    # trainY[-1:] = [1]
    tabnet_params = dict(
                        n_d = 32,
                        n_a = 32,
                        n_steps = 1,
                        gamma = 1.3,
                        lambda_sparse = 0,
                        optimizer_fn = torch.optim.Adam,
                        optimizer_params = dict(lr = LEARNING_RATE, weight_decay = 1e-5),
                        mask_type = "entmax",
                        verbose = 10
                      )
    model = TabNetRegressor(**tabnet_params)
    model.fit(
        X_train = trainX.to_numpy(), 
        y_train=trainY.to_numpy(),
        eval_set = [(valX.to_numpy(), valY.to_numpy())],
        eval_name = ["val"],
        eval_metric = ["logits_ll"],
        max_epochs = EPOCHS,
        patience = 30,
        batch_size = BATCH_SIZE,
        virtual_batch_size = 32,
        num_workers = 1,
        drop_last = False,
        loss_fn = nn.BCEWithLogitsLoss()
    )
    model.save_model(f'../fold_models/tabnet_fold_{fold + 1}.pth')

    # PLOT
    # fig = plt.figure(figsize=(9,9))
    # lr = fig.add_subplot(221)
    # trainlogloss = fig.add_subplot(222)
    # vallogloss = fig.add_subplot(223)
    # loss = fig.add_subplot(224)    

    # lr.plot(model.history['lr'])
    # trainlogloss.plot(model.history['train_logloss'])
    # vallogloss.plot(model.history['valid_logloss'])
    # loss.plot(model.history['loss'])


    # TEST 
    Y_pred = model.predict(X_test.to_numpy())

    return model.best_cost, Y_pred


def run_msk_fold_cv(X_train, Y_train, X_test, num_folds, model_name, device):
    running_loss = 0
    # The predicted labels for X_test to returb
    Y_pred = np.zeros((X_test.shape[0], Y_train.shape[1]))

    # Get indicies to split into train and validation sets
    mskf = MultilabelStratifiedKFold(n_splits=num_folds, shuffle=False, random_state=None)

    # Perform regular k-fold validation
    for fold, (trn_idx, val_idx) in enumerate(mskf.split(X_train, Y_train)):
        fold_X_train = X_train.loc[trn_idx, :]
        fold_Y_train = Y_train.loc[trn_idx, :]
        fold_X_val = X_train.loc[val_idx, :]
        fold_Y_val = Y_train.loc[val_idx, :]

        train_dataset = MoADataset(fold_X_train, fold_Y_train)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

        val_dataset = MoADataset(fold_X_val, fold_Y_val)   
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        print(f'Fold: {fold + 1}')

        if model_name == 'simple':
            fold_loss, fold_Y_pred = train_simple_net(fold, X_test, train_loader, 
                                                      val_loader, X_train.shape[1], 
                                                      Y_train.shape[1], device)
            Y_pred += fold_Y_pred
            running_loss += fold_loss
            
        elif model_name == 'tabnet':
            # Find the model's loss for this fold
            fold_loss, fold_Y_pred = train_tabnet(fold, X_test, 
                                                  fold_X_train, fold_Y_train,
                                                  fold_X_val,fold_Y_val, 
                                                  X_train.shape[1], Y_train.shape[1])
            
            Y_pred += np.array(fold_Y_pred).astype('float64')
            print(Y_pred)
            running_loss += fold_loss

    # Take the average of all the predictions
    Y_pred /= num_folds
    if model_name == 'tabnet':
        Y_pred = expit(Y_pred) # Apply sigmoid function

    # This overall loss score for this model (lower is better)
    cv_loss = running_loss / num_folds


    print(f'CV loss: {cv_loss:.6f}')
    return Y_pred




In [31]:
def run_model(mode):
    use_cuda = False
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    Y_pred = None

    if device == ('cuda'):
        use_cuda = True

    seed_everything(SEED, use_cuda)

    X_train, Y_train, X_test, ss = prepare_data('../data/lish-moa')

    if mode == 'cv':
        # Y_pred = run_msk_fold_cv(X_train, Y_train, X_test, NUM_FOLDS, 'simple', device)
        Y_pred = run_msk_fold_cv(X_train, Y_train, X_test, NUM_FOLDS, 'tabnet', device)
        print(Y_pred)

    # ss.iloc[:, 1:] = Y_pred
    # return ss
    return Y_pred

In [32]:
# sub = run_model('cv')
Y_pred = run_model('cv')




Fold: 1
train:  (17558, 874) (17558, 206) 
 val:  (4390, 874) (4390, 206) 
 test: (3677, 874)
Device used : cpu
epoch 0  | loss: 0.39229 | val_logits_ll: 0.04971 |  0:00:11s




epoch 10 | loss: 0.02013 | val_logits_ll: 0.01983 |  0:01:45s
epoch 20 | loss: 0.01826 | val_logits_ll: 0.02057 |  0:03:17s
Stop training because you reached max_epochs = 30 with best_epoch = 26 and best_val_logits_ll = 0.01805
Best weights from best epoch are automatically used!
Successfully saved model at tabnet_fold_1.pth.zip
Fold: 2
train:  (17559, 874) (17559, 206) 
 val:  (4389, 874) (4389, 206) 
 test: (3677, 874)
Device used : cpu
epoch 0  | loss: 0.39056 | val_logits_ll: 0.04739 |  0:00:09s




epoch 10 | loss: 0.01994 | val_logits_ll: 0.02013 |  0:01:41s
epoch 20 | loss: 0.01783 | val_logits_ll: 0.01815 |  0:03:13s
Stop training because you reached max_epochs = 30 with best_epoch = 27 and best_val_logits_ll = 0.01804
Best weights from best epoch are automatically used!
Successfully saved model at tabnet_fold_2.pth.zip
Fold: 3
train:  (17558, 874) (17558, 206) 
 val:  (4390, 874) (4390, 206) 
 test: (3677, 874)
Device used : cpu
epoch 0  | loss: 0.39185 | val_logits_ll: 0.04948 |  0:00:09s




epoch 10 | loss: 0.01977 | val_logits_ll: 0.02005 |  0:01:45s
epoch 20 | loss: 0.01789 | val_logits_ll: 0.02081 |  0:03:18s
Stop training because you reached max_epochs = 30 with best_epoch = 29 and best_val_logits_ll = 0.01783
Best weights from best epoch are automatically used!
Successfully saved model at tabnet_fold_3.pth.zip
Fold: 4
train:  (17558, 874) (17558, 206) 
 val:  (4390, 874) (4390, 206) 
 test: (3677, 874)
Device used : cpu
epoch 0  | loss: 0.39506 | val_logits_ll: 0.05143 |  0:00:09s




epoch 10 | loss: 0.02012 | val_logits_ll: 0.0198  |  0:01:44s
epoch 20 | loss: 0.01797 | val_logits_ll: 0.01914 |  0:03:20s
Stop training because you reached max_epochs = 30 with best_epoch = 25 and best_val_logits_ll = 0.0181
Best weights from best epoch are automatically used!
Successfully saved model at tabnet_fold_4.pth.zip
Fold: 5
train:  (17559, 874) (17559, 206) 
 val:  (4389, 874) (4389, 206) 
 test: (3677, 874)
Device used : cpu
epoch 0  | loss: 0.39397 | val_logits_ll: 0.04556 |  0:00:09s
epoch 10 | loss: 0.01971 | val_logits_ll: 0.0206  |  0:01:41s
epoch 20 | loss: 0.01794 | val_logits_ll: 0.02041 |  0:03:13s
Stop training because you reached max_epochs = 30 with best_epoch = 23 and best_val_logits_ll = 0.01813
Best weights from best epoch are automatically used!
Successfully saved model at tabnet_fold_5.pth.zip
CV loss: 0.018030
[[-6.39596319 -6.40720196 -6.27915697 ... -6.1568512  -5.97248392
  -5.93817139]
 [-6.99315453 -6.79087458 -6.05624523 ... -6.02879696 -5.47040272


**GIVEN A LIST OF PREDICTIONS, SHOW THE TOP MOAs**


In [89]:
def show_top_moa(preds):
  for n, Y_pred in enumerate(preds):
    # df = pd.DataFrame(expit(Y_pred))
    df = pd.DataFrame(Y_pred)
    
    row = df.iloc[0,:]

    top_10_moa = row.sort_values(ascending=False).index[0:10]
    top_10_prob = row.sort_values(ascending=False).values[0:10]
    print(f"========MODEL {n}========")
    for i, moa in enumerate(Y_train.columns[top_10_moa]):
      print(i,"-->", round(top_10_prob[i], 5), "-->", moa)
    print("\n")

X_train, Y_train, X_test, ss = prepare_data('../data/lish-moa')
model1 = TabNetRegressor()
model1.load_model('/content/drive/MyDrive/Colab Notebooks/COMP9417_MoA_Prediction/fold_models/tabnet_fold_1.pth.zip')
Y_pred1 = model1.predict(X_test.to_numpy())

model2 = TabNetRegressor()
model2.load_model('/content/drive/MyDrive/Colab Notebooks/COMP9417_MoA_Prediction/fold_models/tabnet_fold_2.pth.zip')
Y_pred2 = model2.predict(X_test.to_numpy())

show_top_moa([Y_pred1, Y_pred2])



Device used : cpu
Device used : cpu
Device used : cpu
Device used : cpu
0 --> 0.03694 --> cyclooxygenase_inhibitor
1 --> 0.0273 --> adrenergic_receptor_antagonist
2 --> 0.02559 --> dna_inhibitor
3 --> 0.02466 --> dopamine_receptor_antagonist
4 --> 0.02435 --> serotonin_receptor_antagonist
5 --> 0.02393 --> glutamate_receptor_antagonist
6 --> 0.02181 --> adrenergic_receptor_agonist
7 --> 0.02084 --> acetylcholine_receptor_antagonist
8 --> 0.01913 --> phosphodiesterase_inhibitor
9 --> 0.0181 --> acetylcholine_receptor_agonist


0 --> 0.03104 --> cyclooxygenase_inhibitor
1 --> 0.0305 --> adrenergic_receptor_antagonist
2 --> 0.02923 --> dna_inhibitor
3 --> 0.02843 --> acetylcholine_receptor_antagonist
4 --> 0.02744 --> dopamine_receptor_antagonist
5 --> 0.02626 --> glutamate_receptor_antagonist
6 --> 0.02187 --> serotonin_receptor_antagonist
7 --> 0.02072 --> phosphodiesterase_inhibitor
8 --> 0.02007 --> serotonin_receptor_agonist
9 --> 0.02001 --> histamine_receptor_antagonist




In [90]:
from sklearn.decomposition import PCA
pca = PCA(.95)
pca.fit(X_train)
X_train_gene = X_train.iloc[:,2:774]
X_train_cell = X_train.iloc[:,774:]
pca_gene = PCA(.95)
pca_cell = PCA(.95)

pca_gene.fit(X_train_gene)
pca_cell.fit(X_train_cell)
print(pca_gene.n_components_)
print(pca_cell.n_components_)
X_train_gene

507
44


Unnamed: 0,g-0,g-1,g-2,g-3,g-4,g-5,g-6,g-7,g-8,g-9,g-10,g-11,g-12,g-13,g-14,g-15,g-16,g-17,g-18,g-19,g-20,g-21,g-22,g-23,g-24,g-25,g-26,g-27,g-28,g-29,g-30,g-31,g-32,g-33,g-34,g-35,g-36,g-37,g-38,g-39,...,g-732,g-733,g-734,g-735,g-736,g-737,g-738,g-739,g-740,g-741,g-742,g-743,g-744,g-745,g-746,g-747,g-748,g-749,g-750,g-751,g-752,g-753,g-754,g-755,g-756,g-757,g-758,g-759,g-760,g-761,g-762,g-763,g-764,g-765,g-766,g-767,g-768,g-769,g-770,g-771
0,1.0620,0.5577,-0.2479,-0.6208,-0.1944,-1.0120,-1.0220,-0.0326,0.5548,-0.0921,1.1830,0.1530,0.5574,-0.4015,0.1789,-0.6528,-0.7969,0.6342,0.1778,-0.3694,-0.5688,-1.1360,-1.1880,0.6940,0.4393,0.2664,0.1907,0.1628,-0.2853,0.5819,0.2934,-0.5584,-0.0916,-0.3010,-0.1537,0.2198,0.2965,-0.5055,-0.5119,-0.2162,...,-0.9041,-0.8089,0.6598,-0.4772,0.1289,-0.0297,0.4564,-0.1447,0.4683,-0.1722,0.9998,1.2860,-0.6166,0.0963,1.6340,-0.8744,0.4931,-0.0283,-0.3110,-0.1117,-0.4528,1.3240,-0.0503,-1.2750,-0.1750,-0.6972,-0.2720,0.2931,1.5280,0.3453,-0.5055,-0.3167,1.0930,0.0084,0.8611,-0.5582,0.3008,1.6490,0.2968,-0.0224
1,0.0743,0.4087,0.2991,0.0604,1.0190,0.5207,0.2341,0.3372,-0.4047,0.8507,-1.1520,-0.4201,-0.0958,0.4590,0.0803,0.2250,0.5293,0.2839,-0.3494,0.2883,0.9449,-0.1646,-0.2657,-0.3372,0.3135,-0.4316,0.4773,0.2075,-0.4216,-0.1161,-0.0499,-0.2627,0.9959,-0.2483,0.2655,-0.2102,0.1656,0.5300,-0.2568,-0.0455,...,3.1050,0.0204,-0.0961,0.2919,-0.4584,0.0556,0.9844,-0.6653,-0.3092,-0.1507,-0.9481,-0.1966,0.8554,0.5839,-1.6840,0.0648,0.5845,0.0891,-0.5025,-0.1047,0.3764,-1.4800,-0.2947,0.2745,-0.5234,0.1575,-0.1214,0.0476,0.1657,1.0310,-0.5338,0.0224,-0.4831,0.2128,-0.6999,-0.1214,-0.1626,-0.3340,-0.3289,-0.2718
2,0.6280,0.5817,1.5540,-0.0764,-0.0323,1.2390,0.1715,0.2155,0.0065,1.2300,-0.4797,-0.5631,-0.0366,-1.8300,0.6057,-0.3278,0.6042,-0.3075,-0.1147,-0.0570,-0.0799,-0.8181,-1.5320,0.2307,0.4901,0.4780,-1.3970,4.6240,-0.0437,1.2870,-1.8530,0.6069,0.4290,0.1783,0.0018,-1.1800,0.1256,-0.1219,5.4470,1.0310,...,-0.4954,-0.1840,-1.1110,-0.5649,-0.2617,2.9120,2.7720,0.1523,-1.1230,-0.5340,-0.7937,0.3457,-0.6136,-0.0706,-0.7474,0.1795,-0.4674,-0.1552,0.6338,0.5130,-0.3625,0.0873,0.2689,-0.2174,0.7069,-0.6456,0.6822,0.2237,-0.3601,1.2780,2.5770,0.2356,1.3230,-1.3730,-0.2682,0.8427,0.5797,0.3143,0.8133,0.7923
3,-0.5138,-0.2491,-0.2656,0.5288,4.0620,-0.8095,-1.9590,0.1792,-0.1321,-1.0600,-0.8269,-0.3584,-0.8511,-0.5844,-2.5690,0.8183,-0.0532,-0.8554,0.1160,-2.3520,2.1200,-1.1580,-0.7191,-0.8004,-1.4670,-0.0107,-0.8995,0.2406,-0.2479,-1.0890,-0.7575,0.0881,-2.7370,0.8745,0.5787,-1.6740,-1.6720,-1.2690,3.0900,-0.3814,...,0.2239,0.4428,-0.2822,-0.8175,1.4930,-0.0711,-1.6250,-0.4317,-0.0239,0.5046,-0.8061,0.9736,1.2540,2.0730,0.6604,-0.4758,-0.1766,-0.8959,0.6560,-0.0875,0.1082,-0.7158,0.0712,-0.1939,0.4286,-1.0220,0.9017,1.1750,-0.5760,0.7801,-0.1292,3.4310,1.2720,-0.4733,-2.0560,0.5699,0.1996,0.4374,0.1588,-0.0343
4,-0.3254,-0.4009,0.9700,0.6919,1.4180,-0.8244,-0.2800,-0.1498,-0.8789,0.8630,-0.2219,-0.5121,-0.9577,1.1750,0.2042,0.1970,0.1244,-1.7090,-0.3543,-0.5160,-0.3330,-0.2685,0.7649,0.2057,1.3720,0.6835,0.8056,-0.3754,-1.2090,0.2965,-0.0712,0.6389,0.6674,-0.0783,1.1740,-0.7110,-1.4470,1.0620,0.7888,-0.0848,...,-0.2094,1.1270,0.1874,-0.0465,1.0560,-0.6324,-0.3651,0.9485,-0.6745,0.0479,-1.0470,-1.2440,-0.7675,0.2327,0.3698,1.4900,0.3632,1.4380,-0.5047,-0.2092,-0.4386,-1.1780,0.8236,1.0540,0.6745,-0.6354,-0.6745,-1.3780,0.7521,-0.9456,-0.6904,2.0540,-0.3131,-0.0809,0.3910,1.7660,-1.0020,-0.7534,0.5000,-0.6269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,0.1608,-1.0500,0.2551,-0.2239,-0.2431,0.4256,-0.1166,-0.1777,-0.7480,0.1368,0.5493,-0.5083,0.4112,0.1181,0.7861,-0.1877,0.2476,0.1956,0.0273,0.1182,-0.5037,-0.5939,0.4825,-0.5187,0.1618,-0.2253,0.3153,0.4839,0.0520,1.3850,0.6502,-0.4198,0.2723,-0.3629,1.0820,-0.0352,0.1911,1.0900,0.0008,0.5474,...,2.2970,0.9437,-0.8612,0.2207,1.2770,0.3657,-0.0851,0.4182,-0.1292,0.4951,0.1072,-0.0222,0.1819,-0.3353,0.2156,-0.1052,0.0857,-0.0398,0.0452,-0.0881,-0.3713,-0.0897,0.1063,0.4550,0.0000,0.0829,-0.1449,0.8754,0.2555,0.4796,-0.0481,-0.0098,-0.0345,-0.1156,0.3262,0.5243,-0.0003,0.1715,0.8418,-0.5982
21944,0.1394,-0.0636,-0.1112,-0.5080,-0.4713,0.7201,0.5773,0.3055,-0.4726,0.1269,0.2531,0.1730,-0.4532,-1.0790,0.2474,-0.4550,0.3588,0.1600,-0.7362,-0.1103,0.8550,-0.4139,0.5541,0.2310,-0.5573,-0.4397,-0.9260,-0.2424,-0.6686,0.2326,0.6456,0.0136,-0.5141,-0.6320,0.7166,-0.1736,0.3686,-0.1565,-0.7362,0.1318,...,0.3982,-0.1537,-0.3957,1.0370,0.6222,1.5470,-0.0624,0.1023,0.2465,0.6802,0.5289,0.1255,-0.2395,-0.1544,0.6257,0.3508,-0.7258,-0.6351,-1.0200,0.1589,-0.3008,0.0423,0.3369,-0.6222,0.3070,0.3007,0.1275,-0.0176,0.8832,1.1980,0.7790,0.5393,0.4112,-0.5059,0.0240,-0.2297,0.7221,0.5099,-0.1423,0.3806
21945,-1.3260,0.3478,-0.3743,0.9905,-0.7178,0.6621,-0.2252,-0.5565,0.5112,0.6727,-0.1851,2.8650,-0.2140,-0.6153,0.8362,0.5584,-0.2589,0.1292,0.0148,0.0949,-0.2182,-0.9235,0.0749,-1.5910,-0.8359,-0.9217,0.3013,0.1716,0.0880,0.1842,0.1835,0.5436,-0.0533,-0.0491,0.9543,0.4626,0.0819,0.1586,1.2050,0.0384,...,0.9713,0.5073,0.2552,0.2894,-0.3094,2.9370,-0.7162,0.4444,-0.4089,-0.2635,0.2614,0.8788,-0.8276,-2.4130,0.1505,0.3372,-2.5970,0.2835,-0.7155,-0.2361,-0.3432,0.6308,-0.2580,2.7150,-0.4739,0.1783,0.4858,-0.2523,-0.5757,0.5157,-0.0858,0.3606,-0.0248,0.0672,-0.5901,-0.1022,0.5247,0.5438,-0.1875,-0.4751
21946,0.6660,0.2324,0.4392,0.2044,0.8531,-0.0343,0.0323,0.0463,0.4299,-0.7985,0.5742,0.1421,2.2700,0.2046,0.5363,-1.7330,0.1450,0.6097,0.2024,0.9865,-0.7805,0.9608,0.3440,2.7650,0.4925,0.6698,0.2374,-0.3372,0.8771,-2.6560,-0.2000,-0.2043,0.6797,-0.0248,-0.0927,1.8480,-2.7180,-5.3160,-1.0750,-0.1437,...,-0.4677,-0.5125,0.6954,-2.7690,0.7210,0.3261,-1.3160,-0.9186,1.3630,-0.9480,-0.2286,0.7634,1.0010,1.9820,-0.3600,0.1271,0.8032,-1.9900,0.7946,-1.0310,0.9842,2.2030,-0.3180,0.5809,0.0158,0.8857,-2.3030,2.4360,0.4753,-5.5030,-0.1410,1.9590,0.8224,1.2500,-3.1930,-2.8720,0.1794,0.3109,-0.3491,-0.4741


In [60]:
X_train_gene = pca_gene.transform(X_train_gene)
X_train_cell = pca_cell.transform(X_train_cell)

In [62]:
pd.DataFrame(X_train_gene)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506
0,-8.801571,-0.110224,-4.775375,5.740293,-5.092321,0.079119,-4.581671,4.569360,-2.220581,3.229858,0.307681,0.323891,-0.362956,-1.690279,0.316105,0.071146,0.944297,-0.407727,-0.141816,-0.469436,1.271800,0.730119,-1.900868,0.666005,0.134983,-0.905064,0.390108,-1.054379,1.546574,0.756859,0.588082,-1.404761,0.619890,0.948596,-1.015201,-0.158795,-0.153142,-1.309409,1.025162,0.579594,...,0.265974,0.118413,0.647886,-0.229335,-0.126353,0.041571,-0.007788,-0.073045,0.741075,0.169784,0.851208,0.322156,-0.288170,-0.535408,0.047394,0.275840,-0.161841,0.022631,-0.130706,0.307234,-0.410784,-0.161836,-0.602962,-0.318735,-0.171522,1.062897,0.487265,0.191254,-0.123982,0.391751,-0.489211,0.362042,-0.510727,0.343212,0.013989,-0.769580,0.209346,0.966534,-0.161289,0.790775
1,-7.095146,3.194623,8.760277,2.028411,4.171620,-2.315913,4.937990,-0.240936,-0.016856,-1.382512,1.328528,-1.857137,-0.720878,0.432118,1.164496,-3.918499,-1.544309,-0.586394,-1.871653,-0.178097,-0.107885,2.174996,-0.943342,-0.052661,0.668574,1.746645,-1.360129,-1.672614,-0.559384,1.917088,1.632861,-0.470237,-1.008136,0.198623,-0.511623,-1.066452,0.815428,1.990728,0.575184,-1.408280,...,0.031809,-0.104867,-0.374793,0.441283,0.106275,-0.449777,-0.921877,-0.495742,0.233825,-0.231076,0.988574,0.114282,0.109234,0.122619,0.009006,0.564856,-0.173643,-0.275468,0.040274,-0.195402,0.425928,0.104555,-0.252082,0.527755,-0.149249,0.378336,-0.837119,0.218998,0.448931,-0.306303,0.014845,-0.180356,0.643898,-0.585759,-0.077638,-0.376561,-0.061955,0.621070,-0.632337,-0.301287
2,-2.293737,2.378255,0.825913,-7.014346,0.025333,2.366353,2.304250,-0.976516,1.735624,4.224225,-2.158271,-0.055494,0.198617,-1.727841,-2.328875,1.187965,-0.782494,-2.462820,0.222689,0.001431,-2.963629,0.652963,-1.758197,-0.090843,1.699949,0.864386,1.433050,2.657583,-0.007202,-2.210904,1.832152,-1.227357,-0.325539,2.128810,-1.484173,2.076480,1.604306,2.055541,0.425812,1.595687,...,-0.627921,-0.689151,-0.909448,-0.035729,-0.413294,0.060713,0.856852,0.491090,0.437650,1.686688,-0.283537,-0.131664,0.603236,0.121263,0.144356,0.633330,0.141849,-0.679154,-0.094202,0.820242,1.102149,-0.028880,0.139637,-0.451188,-0.385231,-0.267178,0.266668,-0.616806,-0.790245,0.156064,0.281996,0.295810,-0.297578,-0.829559,-1.265369,-0.449682,-0.790321,-1.111710,-0.096573,-0.381175
3,8.624553,-4.658414,0.496497,-11.856244,-3.680356,10.624315,2.449112,1.109058,-1.620285,-6.008610,-0.306941,-0.567916,-0.749717,1.309068,-3.210693,3.023685,1.542434,-1.804737,-2.311504,-1.054502,-3.242078,1.932085,-1.442209,2.400794,2.696716,1.623712,2.804409,1.047036,-3.035882,3.726886,0.869491,2.350723,-2.592513,-2.798051,1.833634,1.455737,2.634185,-1.511189,-0.367191,0.487433,...,0.252582,0.725853,0.652856,-0.012638,-0.135691,-0.306216,-0.073453,-0.583809,0.859315,0.251674,-0.247981,0.783725,-0.238848,-0.261377,-0.280293,-0.185231,0.040642,-0.433780,0.977585,-0.783082,-0.695794,-0.478376,0.222003,-1.343941,0.282264,-0.346528,0.682415,-0.188685,0.271984,0.641831,0.234229,0.475770,0.556034,-0.264327,-0.389666,-0.290892,0.572233,0.215685,0.170257,0.099028
4,-7.774184,0.256054,9.275677,-5.628110,0.018114,2.822920,-0.889123,3.895648,5.885709,-6.523851,2.504682,-1.223599,0.899444,0.778259,-3.304558,2.330241,2.021165,2.888224,-1.812681,2.521775,-1.505429,1.002254,1.790367,-0.650507,-0.951464,0.879710,-0.511380,-1.419433,-1.126345,1.917843,-0.044108,-1.567449,-1.158376,-0.152953,1.206618,0.212090,0.072425,0.054314,0.413320,-2.127177,...,0.691422,-0.585895,0.523041,-0.765447,-0.356290,1.118297,-0.211937,-0.356230,1.185586,0.386421,-0.135937,0.212025,-0.325168,-0.873402,0.488655,-0.268066,0.875568,-0.715745,-0.858075,-0.124679,1.772993,0.345891,0.430650,0.227191,-0.606646,-0.246695,0.447244,0.261642,-0.686421,0.430584,-0.308125,-0.445838,0.066628,0.008191,0.452233,-0.251486,0.220012,0.796417,-0.511540,-0.812453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,-7.875455,0.723755,-0.155925,-2.179285,0.118749,-2.111379,-1.085439,-0.528821,1.485256,1.921900,-2.499822,-2.938257,-0.647248,-0.330328,1.743401,2.675442,1.200589,-1.458456,-0.827006,-0.301593,-0.432503,-0.993369,-1.062862,-1.921968,1.982247,0.354128,-1.209327,-0.063234,-0.447565,0.061528,-0.724729,-0.931111,-0.763327,0.472862,0.081578,-0.317295,-1.743068,-0.125983,0.559531,-0.018919,...,-0.124289,-0.636147,-0.723936,0.378606,-0.399862,0.317501,0.327223,-1.166478,-0.565330,-0.504360,-0.271616,-0.456721,-0.713237,-0.192506,0.463110,0.117609,0.051207,0.161163,0.369689,0.207449,-0.111918,0.370328,-0.007971,-0.243961,-0.286126,0.220779,0.162940,-0.071202,-0.375178,-0.596120,-0.215212,0.034915,-0.167432,0.059452,-0.127701,0.085780,0.221739,-0.031740,0.179057,-0.619595
21944,-6.627655,3.016185,0.297970,-3.814237,-0.021969,0.858877,1.397244,-0.375828,-2.474766,1.954896,-1.083019,1.680076,0.459352,-0.573916,-0.039044,0.587736,-0.963713,-0.834558,1.686916,-2.244586,-0.291151,0.938181,-0.929700,-2.504637,2.331897,-0.327388,-0.454023,-0.100767,0.735067,-1.181559,0.772384,-2.041961,1.698360,0.034925,-2.798430,0.636190,1.203959,-0.552302,-0.204377,0.711177,...,0.174703,1.015460,-0.450574,0.605199,-0.909255,0.746850,0.129588,0.009204,-0.121813,-0.169597,0.422142,-0.464049,0.314208,0.255886,1.006835,0.924013,-0.773922,0.196372,-0.031181,0.751717,0.465591,0.422170,0.000755,-0.888206,-0.488318,0.200326,-0.182423,0.435035,0.379685,0.016121,-0.540082,-0.204855,-0.127511,0.041390,-0.272063,0.086296,0.683474,0.180253,0.595378,-0.135514
21945,-5.538995,-0.282439,-0.711410,0.316338,-4.518251,-0.363144,-1.059758,-3.561267,1.077187,-1.673326,-0.441722,-0.409452,0.101489,1.301627,-0.609844,0.325402,-1.338878,-2.010184,0.954911,-0.512790,1.885170,0.146592,1.212375,-1.409873,-1.469146,-0.624154,0.033915,0.205869,0.748143,-0.324220,0.469367,-1.371387,-1.019811,2.348036,0.690983,1.581205,-2.063673,-0.227202,-1.829658,1.297843,...,0.098729,-0.184355,0.209866,-0.339298,0.057476,0.194303,0.293188,-0.689099,0.169543,0.208773,-0.039863,-1.295434,0.698013,-0.061012,0.063175,0.828972,-0.278470,-0.281644,0.483517,-0.784411,-0.884565,0.990227,0.085391,0.407197,0.747233,-0.115955,0.255860,-0.639364,-0.443932,-0.045178,0.143510,1.061799,-0.109451,-0.615020,-0.015360,0.600484,0.161916,0.580240,-0.484387,0.760575
21946,6.981182,-20.714410,-5.926650,11.427097,6.771255,0.102444,-1.232914,7.688153,-6.735363,-2.883369,10.710505,1.392055,-2.769971,0.063029,-1.415436,0.269760,-9.734261,-2.511549,-1.350957,4.277634,1.511877,-3.936868,0.206103,-1.962527,2.209678,-1.812902,2.357621,0.054319,5.646960,0.070553,1.232973,-0.501844,2.103874,1.141835,-0.119142,0.269256,-1.105391,0.704347,-0.477942,-1.197448,...,-0.318858,-0.354088,-0.124584,0.105376,-1.068188,0.320869,0.746915,-0.144875,0.179715,-0.378604,-0.930933,0.377684,0.188588,0.349424,0.023615,0.291557,-0.816022,-0.792951,-0.141118,-0.839540,-0.217833,-0.058198,-0.687026,-0.219666,0.117651,0.541395,-0.607197,0.346902,-0.222800,-0.865174,0.243457,0.089208,0.714271,-0.600403,0.221068,-0.646843,0.333925,1.019278,0.752646,-0.419203
