In [1]:
import xgboost as xgb
import pandas as pd
import sklearn
from datasets import OrganoidDataset
data = OrganoidDataset(device='cpu')

X_train,y_train = data.train
X_val,y_val = data.val

In [2]:
X_train_xgb,y_train_xgb = pd.DataFrame(X_train), pd.DataFrame(y_train)[1].astype('int')-1
X_val_xgb,y_val_xgb = pd.DataFrame(X_val), pd.DataFrame(y_val)[1].astype('int')-1

In [3]:
reg = xgb.XGBRegressor(tree_method="gpu_hist", objective='multi:softmax',num_class=7)
reg.fit(X_train_xgb, y_train_xgb)
y_pred = reg.predict(X_val_xgb)
print("Accuracy score", sklearn.metrics.accuracy_score(y_val_xgb, y_pred))

Accuracy score 0.4340263118616602


In [4]:
model_2_path = '/data/PycharmProjects/cytof_benchmark/logs/BetaVAE/grid/latent2/run_9/model.pth'
model_5_path = '/data/PycharmProjects/cytof_benchmark/logs/BetaVAE/grid/latent5/run_9/model.pth'
model_10_path = '/data/PycharmProjects/cytof_benchmark/logs/BetaVAE/grid/latent10/run_9/model.pth'

In [5]:
import ml_collections
from ml_collections import config_dict
from configs import beta_vae
config_2 = beta_vae.get_config()

In [6]:
config_2.batch_size = 16384
config_2.epochs = 3000
config_2.learning_rate = 0.005
config_2.hidden_dims = (128,128,128,128,128)
config_5 = config_dict.ConfigDict(config_2)
config_10 = config_dict.ConfigDict(config_2)
config_5.latent_dim = 5
config_10.latent_dim = 10

In [7]:
from models.beta_vae import BetaVAE
import torch
model_2,model_5,model_10 = BetaVAE(config_2),BetaVAE(config_5),BetaVAE(config_10)
model_2.load_state_dict(torch.load(model_2_path))
model_5.load_state_dict(torch.load(model_5_path))
model_10.load_state_dict(torch.load(model_10_path))

<All keys matched successfully>

In [8]:
X_train_batches = torch.split(X_train, split_size_or_sections=config_2.batch_size)
X_val_batches = torch.split(X_val, split_size_or_sections=config_2.batch_size)

In [9]:
def get_latents(batches, model):
    latents = []
    with torch.no_grad():
        for X_batch in batches:
            latent_batch = model.latent(X_batch).to('cpu')
            latents.append(latent_batch)
    latent = torch.cat(latents)
    return latent

In [10]:
latent_train_2 = get_latents(X_train_batches,model_2)
latent_train_5 =get_latents(X_train_batches,model_5)
latent_train_10 =get_latents(X_train_batches,model_10)
latent_val_2 = get_latents(X_val_batches,model_2)
latent_val_5 =get_latents(X_val_batches,model_5)
latent_val_10 =get_latents(X_val_batches,model_10)

In [11]:
for latent_train,latent_val in [(latent_train_2,latent_val_2),(latent_train_5,latent_val_5),(latent_train_10,latent_val_10)]:
    reg = xgb.XGBRegressor(tree_method="gpu_hist", objective='multi:softmax',num_class=7)
    reg.fit(pd.DataFrame(latent_train), y_train_xgb)
    y_pred = reg.predict(pd.DataFrame(latent_val))
    print("Accuracy score", sklearn.metrics.accuracy_score(y_val_xgb, y_pred))

Accuracy score 0.31770826670078256
Accuracy score 0.34182818396980746
Accuracy score 0.36614000298513827


In [12]:
for latent_train,latent_val in [(latent_train_2,latent_val_2),(latent_train_5,latent_val_5),(latent_train_10,latent_val_10)]:
    reg = xgb.XGBRegressor(tree_method="gpu_hist", objective='multi:softmax',num_class=7)
    reg.fit(pd.concat([pd.DataFrame(latent_train),X_train_xgb],axis=1).to_numpy(), y_train_xgb)
    y_pred = reg.predict(pd.concat([pd.DataFrame(latent_val),X_val_xgb],axis=1).to_numpy())
    print("Accuracy score", sklearn.metrics.accuracy_score(y_val_xgb, y_pred))

Accuracy score 0.43543359133457005
Accuracy score 0.43546770720058
Accuracy score 0.4355828482483635


In [13]:
from sklearn.decomposition import PCA
pca_2,pca_5,pca_10,pca_41 = PCA(n_components=2), PCA(n_components=5), PCA(n_components=10), PCA(n_components=41)
pca_2.fit(X_train), pca_5.fit(X_train), pca_10.fit(X_train), pca_41.fit(X_train)

(PCA(n_components=2),
 PCA(n_components=5),
 PCA(n_components=10),
 PCA(n_components=41))

In [14]:
for pca in [pca_2,pca_5,pca_10,pca_41]:
    reg = xgb.XGBRegressor(tree_method="gpu_hist", objective='multi:softmax',num_class=7)
    reg.fit(pca.transform(X_train), y_train_xgb)
    y_pred = reg.predict(pca.transform(X_val))
    print("Accuracy score", sklearn.metrics.accuracy_score(y_val_xgb, y_pred))

Accuracy score 0.29515341478496343
Accuracy score 0.330032623296872
Accuracy score 0.3578626409944775
Accuracy score 0.4083498582059319
