# Deconvolve simulated data with linear functions, vary number of samples

In [None]:
import torch
from ternadecov.simulator import *
from ternadecov.time_deconv import *
from ternadecov.evaluation import *
import matplotlib.pyplot as plt

# Configure

In [None]:
device = torch.device("cuda:0")
dtype = torch.float32
dtype_np = np.float32

# Load data

In [None]:
bulk_anndata_path = "/home/nbarkas/disk1/work/deconvolution_method/datasets/ebov/load_data_python/ebov_bulk.h5ad"
sc_anndata_path = "/home/nbarkas/disk1/work/deconvolution_method/datasets/ebov/load_data_python/ebov_sc.h5ad"

In [None]:
with open(sc_anndata_path, 'rb') as fh:
    sc_anndata = anndata.read_h5ad(fh)

# Simulate data

In [None]:
ebov_sc_dataset = SingleCellDataset(sc_anndata, sc_celltype_col="Subclustering_reduced", dtype_np=dtype_np, dtype = dtype, device=device)

In [None]:
# trajectory generating function
trajectory_type = 'periodic'
n_sample_range = list(range(100,1000,100)) 
n_sample_range

In [None]:
# Use a single trajectory for all iterations
trajectory_coef = sample_trajectories(
    type = trajectory_type,
    num_cell_types = ebov_sc_dataset.w_hat_gc.shape[1]
)

In [None]:
n_iters = 3_000

n_samples = []
l1_error_norm = []
fit_time = []

for n in n_sample_range:
    eval_result = evaluate_with_trajectory(
        sc_dataset = ebov_sc_dataset, 
        n_samples = n, 
        trajectory_type = trajectory_type, 
        trajectory_coef = trajectory_coef,
        n_iters = n_iters,
        dtype_np = dtype_np,
        dtype = dtype,
        device = device,
    )
    
    n_samples.append(eval_result['n_samples'])
    l1_error_norm.append(eval_result['l1_error_norm'])
    fit_time.append(eval_result['fit_time'])
    
evaluation_resuts_df = pd.DataFrame({'n_samples':n_samples, 'l1_error_norm': list(x.item() for x in l1_error_norm), 'fit_time': fit_time})

In [None]:
evaluation_resuts_df = pd.DataFrame({'n_samples':n_samples, 'l1_error_norm': list(x.item() for x in l1_error_norm), 'fit_time': fit_time})

In [None]:
evaluation_resuts_df

## Plot

In [None]:
# Plot run-time
plt.plot(evaluation_resuts_df.n_samples.to_numpy(), evaluation_resuts_df.fit_time.to_numpy())
plt.xlabel("Number of simulated bulk samples")
plt.ylabel("Time (s)")
plt.title("Run time against number of samples \n (3k iter; polynomial-3)")
plt.savefig("../results/runtime_vs_bulk-vary_n-3k_iter-polynomial_3.png", dpi=300)

In [None]:
# Plot L1 loss
plt.plot(evaluation_resuts_df.n_samples.to_numpy(), evaluation_resuts_df.l1_error_norm.to_numpy())
plt.xlabel("Number of simulated samples")
plt.ylabel("Normalized L1 error")
plt.title("Normalized L1 error against number of samples \n (3k iter; polynomial-3)")
plt.ylim(0.75,1.25)
plt.savefig("../results/NormL1_vs_bulk-vary_n-3k_iter-polynomial_3.png", dpi=300)