In [1]:
import os
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
os.chdir(current_dir)

import numpy as np
import matplotlib.pyplot as plt

# 연산

In [2]:
import pandas as pd

def get_files_in_dir(directory):
    return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]

filenames = np.array(get_files_in_dir('data/Topas - single cell/raw/LOR'))

In [3]:
tm1, tm2 = 0, 149700
Nr = 500
times500 = np.linspace(tm1, tm2, Nr) # [0, 300, 600, ..., 149700] 총 500개의 시점

def get_MSE(X, x, y, z):
    
    '''
    X: 1D array of control points
    x, y, z: ground truth
    '''
    X = X.reshape((3, -1))
    s_x = X[0,]
    s_y = X[1,]
    s_z = X[2,]

    spl_x = BSpline(knots, s_x, k)
    spl_y = BSpline(knots, s_y, k)
    spl_z = BSpline(knots, s_z, k)
    x_predict = spl_x(times500)
    y_predict = spl_y(times500)
    z_predict = spl_z(times500)

    MSE = ((x_predict - x) ** 2 + (y_predict - y) ** 2 + (z_predict - z) ** 2).sum() / 500

    return MSE

In [4]:
import timeit
from scipy.optimize import minimize
from commons.loss import objf3D
from scipy.interpolate import BSpline
from commons.optimizers import single_cellGPS

import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)



Comparisons = []

for iteration_num, filename in enumerate(filenames, start=1):
    Comparison = []
    
    # Read files
    LORs_dir = 'data/Topas - single cell/raw/LOR/' + filename
    ground_truths_dir = 'data/Topas - single cell/raw/GT/' + filename
    LORs = pd.read_csv(LORs_dir).values
    ground_truths = pd.read_csv(ground_truths_dir).values
    
    Comparison.append(filename)
    
    # get variables
    t, x, y, z = ground_truths.T
    k = 3
    knot_space = 20
    A = LORs.T
    P1 = A[0:3, :]
    P2 = A[4:7, :]
    times = A[3, :]
    tc1, tc2 = 0, 150000
    v = P2 - P1
    norms = np.linalg.norm(v, axis=0)
    v = v / norms
    N = int(np.round(times.size / knot_space)) + 1 # basis function 수
    T = np.linspace(tc1, tc2, np.maximum(2, N + 1 - k))   
    knots = np.r_[[0]*k, T, [T[-1]]*k]   
    a0 = np.zeros((3, N)) 
    
    
    
    
    # 최적화 
    lambda0 = 0.005
    dmax = 4
    options = {'maxiter': 2000, 'disp': False}
    
    ## Backpropagation
    start_time = timeit.default_timer()
    
    a = single_cellGPS(a0, LORs, N, k, knots, lambda0, dmax, loss_tracking=False, options=options)
    
    Comparison.append(timeit.default_timer() - start_time)
    
    ## Numerical Gradient
    start_time = timeit.default_timer()
    b = a0.copy()
    options = {'maxiter': 2000, 'disp': False}
    result = minimize(lambda s: objf3D(k, s, knots, P1, P2, times, dmax, lambda0), b, method='BFGS', options=options)
    b = result.x

    Comparison.append(timeit.default_timer() - start_time)
    
    
    # MSE
    Comparison.append(get_MSE(a, x, y, z))
    Comparison.append(get_MSE(b, x, y, z))
    
    Comparisons.append(Comparison)
    print(f'{iteration_num}/{len(filenames)}', Comparison)

1/148 ['0.0012.csv', 0.04559809993952513, 1.1107249000342563, 0.23935486795162925, 0.2372348775658338]
2/148 ['0.0019.csv', 0.033591600018553436, 0.908733700052835, 0.24294881363506848, 0.24133564680250424]
3/148 ['0.0021.csv', 0.020469399983994663, 0.7186293000122532, 0.4381598542872116, 0.43506752385696756]
4/148 ['0.0041.csv', 0.03181559999939054, 0.7414137000450864, 0.23125193919172046, 0.23027722402507267]
5/148 ['0.0046.csv', 0.022143300040625036, 0.9220059999497607, 0.27320776450146267, 0.2709640901928781]
6/148 ['0.0056.csv', 0.022829500027000904, 0.9709618999622762, 0.6554471032050901, 0.6540296556730307]
7/148 ['0.0065.csv', 0.02776110009290278, 0.7938634000020102, 0.17370095478542633, 0.17206598503474393]
8/148 ['0.0072.csv', 0.020783000043593347, 0.8605847000144422, 0.23355288812321542, 0.23169535563539534]
9/148 ['0.0085.csv', 0.029081500018946826, 1.380609599989839, 0.16336304141357363, 0.1615824613954635]
10/148 ['0.0088.csv', 0.02271269995253533, 0.885443800012581, 0.43

80/148 ['0.0512.csv', 0.020025899983011186, 0.83126650005579, 0.5880833529456437, 0.5890845069299268]
81/148 ['0.0521.csv', 0.028057600022293627, 1.0243529999861494, 0.6565269352034343, 0.6578552935381238]
82/148 ['0.0526.csv', 0.018920299946330488, 0.7029697000980377, 0.602381153819073, 0.6015303288072927]
83/148 ['0.0532.csv', 0.018240700010210276, 0.6596104999771342, 0.6850158134290457, 0.6786042359040201]
84/148 ['0.0534.csv', 0.021007300005294383, 0.814244500012137, 0.7680879225179483, 0.7752823840993037]
85/148 ['0.0539.csv', 0.03502080007456243, 1.4923426999012008, 0.551121374365779, 0.5480176018123254]
86/148 ['0.0542.csv', 0.02141629997640848, 0.6716480000177398, 0.5957215617230516, 0.5956513514397723]
87/148 ['0.0544.csv', 0.03803850000258535, 1.3838840000098571, 0.4351960359319646, 0.4388822526709558]
88/148 ['0.0551.csv', 0.030608699889853597, 0.7829353000270203, 0.5349815612992985, 0.5376715273156362]
89/148 ['0.0568.csv', 0.020118500106036663, 0.8238276999909431, 0.711244

시간 outlier 2-3개 존재

# 분석

In [5]:
Comparisons = pd.DataFrame(Comparisons)
Comparisons.columns = ['filename', 'Backpropagation time', 'Numerical time', 'Backpropagation MSE', 'Numerical MSE']

In [6]:
import pickle
filename = "data/Comparisons.pkl"

with open(filename, "wb") as file:
    pickle.dump(Comparisons, file)

In [7]:
import pickle
filename = "data/Comparisons.pkl"

with open(filename, "rb") as file:
    Comparisons = pickle.load(file)

In [8]:
Comparisons

Unnamed: 0,filename,Backpropagation time,Numerical time,Backpropagation MSE,Numerical MSE
0,0.0012.csv,0.045598,1.110725,0.239355,0.237235
1,0.0019.csv,0.033592,0.908734,0.242949,0.241336
2,0.0021.csv,0.020469,0.718629,0.438160,0.435068
3,0.0041.csv,0.031816,0.741414,0.231252,0.230277
4,0.0046.csv,0.022143,0.922006,0.273208,0.270964
...,...,...,...,...,...
143,0.0992.csv,0.019416,0.811273,0.898715,0.898694
144,0.0993.csv,0.024234,0.681697,0.860893,0.858348
145,0.101.csv,0.027496,1.150662,0.748005,0.738600
146,0.1048.csv,0.020388,0.791643,1.012177,1.013524


In [9]:
Comparisons['Numerical time'].mean() / Comparisons['Backpropagation time'].mean()

47.62145835965662

In [10]:
Comparisons['Backpropagation MSE'].mean() / Comparisons['Numerical MSE'].mean()

1.0004668950388034