In [3]:
import numpy as np
from scipy.integrate import odeint
from scipy.optimize import minimize
from scipy.optimize import differential_evolution  
from scipy.interpolate import interp1d
import matplotlib.pylab as plt
from matplotlib.pyplot import savefig
from matplotlib.lines import Line2D
from scipy.integrate import solve_ivp
import pandas as pd
import math 
from datetime import date
from scipy.integrate import solve_ivp
import sys
import os
import copy 
#!pip install lifelines
import seaborn as sns
import statistics
from lifelines import KaplanMeierFitter, CoxPHFitter
from lifelines.statistics import logrank_test
from scipy import stats
from matplotlib.patches import Patch
import matplotlib.ticker as ticker



In [5]:
### reading data including individual tumour growth rates
df_all = pd.read_csv("clean_erlotinib.csv") # to see markers etc
total_tumors_df = pd.read_csv("total_tumors.csv")
single_tumors_df = pd.read_csv("single_tumors.csv")
total_tumors_df.head()
no_first_point_df = pd.read_csv("no_first_point.csv")

total_tumors_df.head()

Unnamed: 0,ID,days,Sum_tumor_lengths,Sum_tumor_volume,Dose_mg,ctDNA_copies_ml,dose_proportion,Sum_tumor_vol_cm
0,A1002,0,58.7,43559.570202,0.0,0.0,0.0,43.55957
1,A1002,62,30.3,5243.630727,150.0,0.0,1.0,5.243631
2,A1002,110,24.1,4106.008715,150.0,0.0,1.0,4.106009
3,A1002,152,26.0,4417.875141,150.0,0.0,1.0,4.417875
4,A1002,187,26.0,4417.875141,150.0,0.0,1.0,4.417875


#### Functions

In [6]:
# Logistic population model
def system_ode_logistic(time, populations, params):
    Spop = populations[0]
    Rpop = populations[1]
    r = params[0] #growth rate
    kmax = params[1] #carrying capacity
    kd = params[2] #death rate due to medication

    dSpop_dt = Spop * (r * (1- (Spop + Rpop)/kmax) - kd)
    dRpop_dt = Rpop * (r * (1- (Spop + Rpop)/kmax))
    return [dSpop_dt, dRpop_dt]

# Cost function
def cost_func_logistic(params, time, data):
    initial_populations = params[3:]
    #initial_populations = [params[3], data[0]-params[3]]
    tspan = [time[0], time[-1]]
    
    pred_data_log = solve_ivp(system_ode_logistic, tspan, initial_populations, 
                             t_eval = np.ravel(time), args = tuple([params[:3]]))
    
    if pred_data_log.y.shape[1] == np.ravel(time).shape[0]:
        spop = pred_data_log.y[0]
        rpop = pred_data_log.y[1]
        sum_pops = spop + rpop
        sumsq_error = np.sum((sum_pops - data)**2)
        msq_error = sumsq_error/len(data)
        mean_abs_error = (np.sum(np.abs(sum_pops-data)))/len(data)
        rmse = np.sqrt(msq_error)
        # pseudo huber loss
        delta = 100
        huber_loss = np.sum(np.where(np.abs(sum_pops - data) < delta, 0.5*(sum_pops - data)**2, delta*(np.abs(sum_pops - data) - 0.5*delta)))/len(data)
    else: 
        msq_error = 1e30
        rmse = 1e30
        mean_abs_error = 1e30
        huber_loss = 1e30

    #return mean_abs_error
    #return msq_error
    return rmse
    #return huber_loss
   

In [7]:
#### use total tumors df
df = total_tumors_df
unique_IDs = df.ID.unique()
master_dictionary = {} # dictionary to store the optimized parameters for each patient 

# Find optimal parameters for each patient
for id in unique_IDs:

    days = df.days[df.ID == id].tolist()
    
    data = df.Sum_tumor_volume[df.ID == id].tolist()
    max_value = max(data[0], data[-1])

    print("Working on patient:", id, " Number of data points:", len(days))
    bounds = [(1e-3, 5e-1), (1.1*max_value, 1.5*max_value), (1e-2, 1e-1),(0.8*data[0], data[0]-1e-2),(1e-2,0.2*data[0])] # r, K, kd, spop0, 
    bounds = [(1e-3, 5e-1), (1.1*max_value, 1.5*max_value), (5e-3, 2e-1),(0.8*data[0], data[0]-1e-2),(1e-2,0.2*data[0])] # r, K, kd, spop0, 
    bounds = [(5e-4, 5e-1), (1.1*max_value, 1.5*max_value), (5e-3, 2e-1),(0.8*data[0], data[0]-1e-2),(1e-2,0.2*data[0])] # r, K, kd, spop0, 
    bounds = [(1e-4, 5e-1), (1.1*max_value, 2*max_value), (5e-3, 2e-1),(0.8*data[0], data[0]-1e-2),(1e-2,0.2*data[0])] # r, K, kd, spop0, 
    bounds = [(1e-4, 5e-1), (1.1*max_value, 2*max_value), (5e-3, 1e-1),(0.8*data[0], data[0]-1e-2),(1e-2,0.2*data[0])] # r, K, kd, spop0,

    # Use differential_evolution to perform a random search
    opt_result_log = differential_evolution(cost_func_logistic, bounds, args=(days, data))
    # store the optimal parameters and the cost function value (last entry in list)
    master_dictionary[id] = opt_result_log.x.tolist() + [opt_result_log.fun]

Working on patient: A1002  Number of data points: 16
Working on patient: A1004  Number of data points: 5
Working on patient: A1006  Number of data points: 8
Working on patient: A1007  Number of data points: 13
Working on patient: A1008  Number of data points: 12
Working on patient: A1009  Number of data points: 6
Working on patient: A1011  Number of data points: 6
Working on patient: A1012  Number of data points: 9
Working on patient: A1013  Number of data points: 7
Working on patient: A1014  Number of data points: 18
Working on patient: A1016  Number of data points: 14
Working on patient: A1017  Number of data points: 7
Working on patient: E1008  Number of data points: 6
Working on patient: E1012  Number of data points: 16


In [None]:
df_log = pd.DataFrame.from_dict(master_dictionary, orient='index', 
                                     columns=['r','Kmax','kd','spop_0','rpop_0','error'])