<a href="https://colab.research.google.com/github/achett/clinical_trial_simulation/blob/main/Trial_Cost_Sim_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
##############
# INSTALL PACKAGES
##############
!pip install simpy



In [None]:
##############
# IMPORT PACKAGES
##############
import simpy
import random
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
# from datetime import datetime, timedelta
import datetime
import pymc as pm
from dateutil.relativedelta import relativedelta

In [None]:
##############
# DATA LOAD
##############
file_path = '/content/eac_spread.csv'
eac_spread = pd.read_csv(file_path)

file_path = '/content/accrual_period.csv'
accrual_period = pd.read_csv(file_path)

file_path = '/content/subjects.csv'
subjects = pd.read_csv(file_path)

file_path = '/content/trial_info.csv'
trial_info = pd.read_csv(file_path)

In [None]:
##############
# VARIABLE LOAD
##############
trials2sim = ['2215-CL-0201', '8951-CL-0302']
sim_start_date = datetime.datetime(2012, 1, 1) # Date of earliest active trial start
data_end_date = datetime.datetime(2023, 10, 1) # Date at which actuals end, this is used for adding uncertainty

In [None]:
##############
# SAMPLE
##############
eac_spread = eac_spread[eac_spread['ISN'].isin(trials2sim)]

In [None]:
##############
# TAG
##############
# Concatenate 'CAB' and 'Activity Type' to create 'Cost Account'
accrual_period['Cost Account'] = accrual_period['CAB'] + accrual_period['Activity Type']
eac_spread['Cost Account'] = eac_spread['CAB'] + eac_spread['Activity Type']

In [None]:
##############
# SITE COSTS
##############

def site_inputs(eac_spread_df, trial_info_df, trial_name):
  start_date = eac_spread_df[(eac_spread_df['ISN']==trial_name) & (eac_spread_df['Methodology']=='Site Curve')]['Start Date'].iloc[0]
  end_date = eac_spread_df[(eac_spread_df['ISN']==trial_name) & (eac_spread_df['Methodology']=='Site Curve')]['End Date'].iloc[0]
  total_eac = eac_spread_df[(eac_spread_df['ISN']==trial_name) & (eac_spread_df['Methodology']=='Site Curve')]['TotalEAC'].iloc[0]
  cost_account = eac_spread_df[(eac_spread_df['ISN']==trial_name) & (eac_spread_df['Methodology']=='Site Curve')]['Cost Account'].iloc[0]
  planned_sites = trial_info_df[(trial_info_df['ISN']==trial_name)]['Total # of planned sites'].iloc[0]

  return start_date, end_date, total_eac, cost_account, planned_sites

def calculate_site_costs(i, trial_name, cost_account, start_date, end_date, total_cost, planned_sites, events_df):
    # Check if start_date and end_date are not datetime objects
    if not isinstance(start_date, datetime.datetime):
        start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    if not isinstance(end_date, datetime.datetime):
        end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d')

    # Calculate the total duration in months
    duration_months = (end_date.year - start_date.year) * 12 + end_date.month - start_date.month

    # Monthly cost per site
    monthly_cost_per_site = (total_cost / planned_sites) / duration_months

    # Initialize monthly costs dictionary
    monthly_costs = {}

    # Initialize number of open sites
    open_sites = 0

    # Filter for relevant events
    events_df = events_df[events_df['trial_name']==trial_name]

    # Iterate over each month
    for month in range(duration_months):
        current_month = (start_date.month + month - 1) % 12 + 1
        current_year = start_date.year + (start_date.month + month - 1) // 12

        # Check for events (opening/closing sites) in this month
        for index, row in events_df.iterrows():
            event_date, change = row['event_date'], row['change']
            if event_date.month == current_month and event_date.year == current_year:
                open_sites += change

        # Calculate and store the cost for the current month
        monthly_costs[(current_year, current_month)] = open_sites * monthly_cost_per_site

    # Convert the monthly_costs dictionary to a DataFrame
    df = pd.DataFrame(list(monthly_costs.items()), columns=['ds', 'value'])
    df['ds'] = df['ds'].apply(lambda x: datetime.date(x[0], x[1], 1))
    df.sort_values(by='ds', inplace=True)

    # Add additional columns
    df['sim'] = i
    df['trial_name'] = trial_name
    df['cost_account'] = cost_account

    return df

In [None]:
##############
# TRIAL DURATION FUNCTIONS
##############

# Function to get a random number of months - replace with your distribution
def get_random_months():
    return np.random.randint(1, 6)  # Randomly choose between 1 to 5 months

def month_diff(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month

def create_trial_durations_df(inputs, sim_start_date, data_end_date, month_diff_function, get_random_months_function):
    """
    Create a trial durations DataFrame.

    :param inputs: DataFrame containing initial data.
    :param sim_start_date: The simulation start date for time calculations.
    :param data_end_date: The end date for filtering.
    :param month_diff_function: Function to calculate the month difference.
    :param get_random_months_function: Function to generate a random number of months.
    :return: DataFrame with trial durations and updated dates.
    """
    # Convert the date columns to datetime
    inputs['Start Date'] = pd.to_datetime(inputs['Start Date'])
    inputs['End Date'] = pd.to_datetime(inputs['End Date'])

    # Calculating the month differences and creating a new DataFrame
    q1 = inputs.copy()[['ISN', 'Start Spread', 'Start Date']].drop_duplicates()
    q1.columns = ['ISN', 'milestone', 'ds']
    q2 = inputs.copy()[['ISN', 'End Spread', 'End Date']].drop_duplicates()
    q2.columns = ['ISN', 'milestone', 'ds']
    trial_durations = pd.concat([q1, q2]).drop_duplicates()

    # Calculate time2milestone
    trial_durations['time2milestone_orig'] = trial_durations['ds'].apply(lambda x: month_diff(x, sim_start_date))

    # Sort dataframe
    trial_durations = trial_durations.sort_values(by=['ISN', 'ds'], ascending=[True, True]).reset_index(drop=True)

    # Track the total months added for each ISN
    total_months_added = {isn: 0 for isn in trial_durations['ISN'].unique()}

    # Iterate over the DataFrame rows
    for index, row in trial_durations.iterrows():
        if row['ds'] > data_end_date:
            # Draw random months and add to total for this ISN
            additional_months = get_random_months()
            total_months_added[row['ISN']] += additional_months

            # Update the date
            trial_durations.at[index, 'ds'] = row['ds'] + pd.DateOffset(months=total_months_added[row['ISN']])

    # Adjust subsequent dates
    for index, row in trial_durations.iterrows():
        if row['ds'] > data_end_date:
            trial_durations.at[index, 'ds'] += pd.DateOffset(months=total_months_added[row['ISN']])

    # Calculate new time2milestone
    trial_durations['time2milestone'] = trial_durations['ds'].apply(lambda x: month_diff(x, sim_start_date))

    return trial_durations

In [None]:
##############
# COST FUNCTION
##############
def create_eac_df(inputs, trial_durations, month_diff_function):
  """
  Create an EAC DataFrame.

  :param inputs: DataFrame containing initial data.
  :param trial_durations: DataFrame with trial duration information.
  :param month_diff_function: Function to calculate the month difference.
  :return: EAC DataFrame.
  """
  # Create EAC df
  eac = inputs.copy()[['Cost Account', 'ISN', 'CAB', 'Activity Type', 'Start Spread', 'End Spread', 'TotalEAC']].drop_duplicates()

  # Add trial milestone dates
  eac = eac.merge(trial_durations[['ISN', 'milestone', 'ds']], how='left', left_on=['ISN', 'Start Spread'], right_on=['ISN', 'milestone'])
  eac = eac.merge(trial_durations[['ISN', 'milestone', 'ds']], how='left', left_on=['ISN', 'End Spread'], right_on=['ISN', 'milestone']).drop_duplicates()

  # Rename columns
  eac.rename(columns={'ds_x': 'Start Date', 'ds_y': 'End Date'}, inplace=True)

  # Find months to spread the costs
  eac['duration2spread'] = eac.apply(lambda row: month_diff(row['End Date'], row['Start Date']), axis=1)

  # Find monthly costs
  eac['monthly_costs'] = eac['TotalEAC'] / eac['duration2spread']

  # Subset
  eac = eac[['Cost Account', 'ISN', 'CAB', 'Activity Type', 'Start Spread', 'End Spread', 'TotalEAC', 'Start Date', 'End Date', 'duration2spread', 'monthly_costs']]

  return eac

In [None]:
##############
# DURATION SIMULATION FUNCTIONS
##############
# Function to check if a cost type is active based on the current time
def is_cost_active(cost_type, current_time, milestone_schedule, accrual_period):
    start_milestone = accrual_period[accrual_period['Cost Account']==cost_type]['Start Milestone'].values[0]
    end_milestone = accrual_period[accrual_period['Cost Account']==cost_type]['End Milestone'].values[0]
    start_time = milestone_schedule[milestone_schedule['milestone']==start_milestone]['time2milestone'].values[0]
    end_time = milestone_schedule[milestone_schedule['milestone']==end_milestone]['time2milestone'].values[0]
    return start_time <= current_time < end_time

# Process for a clinical trial
def calculate_durations(env, trial_name, milestone_schedule, monthly_costs, results, sim_n):
    # Filter for trial
    milestone_schedule = milestone_schedule[milestone_schedule['ISN']==trial_name]
    monthly_costs = monthly_costs[monthly_costs['ISN']==trial_name]

    # Cost account list
    ca_list = accrual_period['Cost Account'].tolist()

    current_time = 0  # Start from the beginning of the trials

    while current_time <= milestone_schedule['time2milestone'].max():
        # Update costs for each month
        costs = {}
        for cost_type in ca_list:
            if is_cost_active(cost_type, current_time, milestone_schedule, accrual_period):
                costs[cost_type] = monthly_costs[monthly_costs['Cost Account']==cost_type]['monthly_costs'].values[0]
            else:
                costs[cost_type] = 0

        # Store the costs for this month
        results.append({**{'sim': sim_n, 'trial_name': trial_name, 'month': current_time}, **costs})

        # Move to the next month
        yield env.timeout(1)  # Wait for one month
        current_time += 1

def process_output(results, start_date):
    """
    Convert the collected data to a DataFrame and melt it for analysis.

    Parameters:
    results (list or DataFrame): The raw results data.

    Returns:
    DataFrame: The processed DataFrame after melting.
    """
    # Convert the collected data to a DataFrame
    df = pd.DataFrame(results)

    # List of columns to keep as they are (identifiers) and to melt into 'cost_account'
    id_vars = ['sim', 'trial_name', 'month']
    value_vars = df.columns.difference(id_vars)

    # Melting the DataFrame
    melted_df = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='cost_account', value_name='value')

    # Update the 'ds' column
    melted_df.rename(columns={'month': 'ds'}, inplace=True)
    melted_df['ds'] = melted_df['ds'].apply(lambda x: start_date + relativedelta(months=x))

    return melted_df

In [69]:
##############
# SUBJECT ENROLLMENT FUNCTIONS
##############
# Posterior: subjects_per_month = subjects_site_month / sites
# subjects_site_month = normal(6.844214489963478, 11.589503153381225), bounded at 0
# sites = x
def process_dates(subjects, trial_durations, trial_name):
    """
    Process dates and calculate time points and prediction horizon.
    """
    enrollments = subjects[subjects['Study'] == trial_name]['Enrollment'].values
    date_strings = subjects[subjects['Study'] == trial_name]['Helix Mth'].values
    date_strings = [datetime.datetime.strptime(date, '%m/%d/%Y').strftime('%Y-%m-%d') for date in date_strings]

    time_points = np.array(date_strings, dtype='datetime64[M]')
    start_date = time_points[0]
    time_points_numeric = (time_points - start_date).astype(int)

    lsft_date = trial_durations[(trial_durations['ISN']==trial_name) & (trial_durations['milestone']=='LSFT')]['ds'].values[0]
    lsft_date_py = (lsft_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
    lsft_date_py = datetime.datetime.utcfromtimestamp(lsft_date_py).date()
    lsft_first_of_month = lsft_date_py.replace(day=1)

    latest_time_point = max(time_points)

    latest_time_point_py = pd.to_datetime(str(latest_time_point)).date()

    pred_horizon = max(0, (lsft_first_of_month.year - latest_time_point_py.year) * 12 + lsft_first_of_month.month - latest_time_point_py.month)

    return enrollments, date_strings, time_points_numeric, start_date, pred_horizon

def extend_time_points(latest_date, periods, start_date):
    """
    Extend the time points by a given number of periods.
    """
    new_dates = pd.date_range(start=latest_date, periods=periods, freq='M')
    new_dates = [date + datetime.timedelta(days=1) for date in new_dates]
    new_dates = pd.DatetimeIndex(new_dates)
    new_time_points = np.array(new_dates, dtype='datetime64[M]')
    new_time_points_numeric = (new_time_points - start_date).astype(int)
    return new_dates, new_time_points, new_time_points_numeric

def train_and_predict_enrollments(K, enrollments, time_points_numeric, new_time_points_numeric, trial_name, new_dates, lever):
    """
    Train a probabilistic model for enrollment prediction and predict future enrollments.

    Parameters:
    K (int): Target number of enrollments.
    enrollments (np.array): Array of historical enrollments.
    time_points_numeric (np.array): Numeric representation of time points for historical data.
    new_time_points_numeric (np.array): Numeric representation of future time points.
    trial_name (str): Name of the trial.
    new_dates (pd.DatetimeIndex): Dates for future predictions.
    lever (float): Rate at which to decrease or increase subject enrollment

    Returns:
    pd.DataFrame: DataFrame with predicted future enrollments.
    """
    with pm.Model() as model:
        # Define priors

        # LOGNORMAL
        # mu = 0.009619759326080623
        # sigma=1.65731844788873

        # Update mu
        # mu = mu * (1+lever)
        # r = pm.LogNormal('r', mu=mu, sigma=sigma)

        # NORMAL
        mu = 116.09264856917414
        sigma=760.2183426451379

        # Update mu
        mu = mu * (1+lever)
        r = pm.Normal('r', mu=mu, sigma=sigma)


        # Logistic growth model
        expected_enrollment = pm.Deterministic('expected_enrollment',
                                               K / (1 + ((K - enrollments[0]) / enrollments[0]) * pm.math.exp(-r * time_points_numeric)))

        # Observational model with the actual enrollment data
        observation = pm.Normal('observation', mu=expected_enrollment, sigma=1, observed=enrollments)

        # Sample from the posterior
        trace = pm.sample(1000, return_inferencedata=True)

        # Predict future enrollments
        future_enrollment = pm.Deterministic('future_enrollment',
                                             K / (1 + ((K - enrollments[0]) / enrollments[0]) * pm.math.exp(-r * new_time_points_numeric)))

        # Generate posterior predictive samples for future enrollments
        future_prediction = pm.sample_posterior_predictive(trace, var_names=['future_enrollment'])

    # Future predictions as a NumPy array
    future_predictions_array = future_prediction.posterior_predictive['future_enrollment'].values

    # Process predictions
    if future_predictions_array.size > 0:
        # Reshape to combine the chains and samples
        combined_samples = future_predictions_array.reshape(-1, future_predictions_array.shape[-1])
        mean_enrollments = combined_samples.mean(axis=0).round(0)
        enr_results = pd.DataFrame({'ISN': trial_name, 'start_date': new_dates, 'enrollments': mean_enrollments})
    else:
        enr_results = pd.DataFrame()

    return enr_results

def adjust_enrollments(df, K):
    """
    Adjust the 'enrollments' column in the DataFrame based on the sum reaching or exceeding a value K.

    Parameters:
    df (pd.DataFrame): DataFrame with columns ['enrollments', 'start_date', 'trial_name'].
    K (int): The target sum for the 'enrollments' column.

    Returns:
    pd.DataFrame: Modified DataFrame with adjusted 'enrollments'.
    """
    # Convert start_date to datetime
    df['start_date'] = pd.to_datetime(df['start_date'])

    # Sort by start_date
    sorted_df = df.sort_values(by='start_date').reset_index(drop=True)

    # Calculate cumulative sum
    sorted_df['cum_sum'] = sorted_df['enrollments'].cumsum()

    # Apply the logic
    for i, row in sorted_df.iterrows():
        if row['cum_sum'] == K:
            sorted_df.loc[i + 1:, 'enrollments'] = 0
            break
        elif row['cum_sum'] > K:
            sorted_df.loc[i, 'enrollments'] = K - (row['cum_sum'] - row['enrollments'])
            sorted_df.loc[i + 1:, 'enrollments'] = 0
            break

    # Drop the cum_sum column if not needed
    sorted_df.drop(columns=['cum_sum'], inplace=True)

    return sorted_df

In [68]:
##############
# SUBJECT ENROLLMENT COSTS FUNCTIONS
##############
def calculate_subject_enrollment_costs(trial_info, trial_name, enr_results, eac_spread, K):
    """
    Calculate the subject enrollment costs.

    Parameters:
    trial_info (pd.DataFrame): DataFrame containing trial information.
    trial_name (str): Name of the trial.
    enr_results (pd.DataFrame): DataFrame with enrollment results.
    eac_spread (pd.DataFrame): DataFrame with EAC spread data.
    K (int): The target number of enrollments.

    Returns:
    pd.DataFrame: DataFrame with calculated subject enrollment costs.
    """
    def add_business_days(start_date, num_days):
        # Convert to a date-only format if start_date includes time
        start_date = np.datetime64(start_date, 'D')
        return np.busday_offset(start_date, num_days, roll='forward')

    # Calculate patient costs accrual end date
    tdays = trial_info[trial_info['ISN'] == trial_name]['Treatment Duration'].iloc[0]
    fdays = trial_info[trial_info['ISN'] == trial_name]['Follow-up Duration'].iloc[0]
    enr_results['days'] = tdays + fdays

    # Ensure 'start_date' is in the correct datetime format and apply the function to each row
    enr_results['start_date'] = pd.to_datetime(enr_results['start_date']).dt.date
    enr_results['end_date'] = enr_results.apply(lambda row: add_business_days(row['start_date'], row['days']), axis=1)

    # Calculate per patient costs per month
    eac_pc = eac_spread[(eac_spread['ISN']==trial_name) & (eac_spread['Methodology']=='Patient Curve')]
    eac_pc['EAC_per_patient'] = eac_pc['TotalEAC'] / K
    eac_pc['EAC_per_pat_per_day'] = eac_pc['EAC_per_patient']/(tdays + fdays)

    return eac_pc

def business_days_in_month(date_str):
    """
    Count the number of business days in the month of a given date.

    Parameters:
    date_str (str): A date string in the format 'YYYY-MM-DD'.

    Returns:
    int: Number of business days in the month of the given date.
    """
    # Parse the date string
    date = pd.to_datetime(date_str)

    # Extract the year and month
    year, month = date.year, date.month

    # Define the start and end date of the month
    start_date = pd.Timestamp(year, month, 1)
    end_date = start_date + pd.offsets.MonthEnd(0)

    # Count business days
    business_days = np.busday_count(start_date.date(), end_date.date())

    return business_days

def calculate_monthly_eac_values(enr_results, eac_pc):
    """
    Calculate the EAC values for each month, cost account, and trial.

    Parameters:
    enr_results (pd.DataFrame): DataFrame containing enrollment information.
    eac_pc (pd.DataFrame): DataFrame containing EAC per patient for each cost account.

    Returns:
    pd.DataFrame: DataFrame with columns ['ISN', 'Cost Account', 'month', 'eac_value'].
    """
    # Step 1: Expand enr_results to have a row for each month per subject
    expanded_rows = []
    for _, row in enr_results.iterrows():
        start_date = pd.to_datetime(row['start_date'])
        end_date = pd.to_datetime(row['end_date']) + MonthEnd(0)  # Ensuring end of month is included
        num_months = (end_date.year - start_date.year) * 12 + end_date.month - start_date.month + 1
        monthly_dates = pd.date_range(start_date, periods=num_months, freq='MS')

        for month in monthly_dates:
            expanded_rows.append({'ISN': row['ISN'], 'enrollments': row['enrollments'], 'month': month})

    expanded_enr = pd.DataFrame(expanded_rows)

    # Step 2: Calculate business days
    expanded_enr['bus_days'] = expanded_enr['month'].apply(business_days_in_month)

    # Step 3: Cross Join with eac_pc
    expanded_enr['key'] = 1
    eac_pc['key'] = 1
    cross_joined_df = expanded_enr.merge(eac_pc, on='key').drop('key', axis=1)

    # Step 4: Calculate the Accrued EAC Value
    cross_joined_df['eac_value'] = cross_joined_df['enrollments'] * cross_joined_df['EAC_per_pat_per_day'] * cross_joined_df['bus_days']

    # Step 4: Group and Summarize
    final_df = cross_joined_df.groupby(['ISN', 'Cost Account', 'month']).agg(eac_value=('eac_value', 'sum')).reset_index()

    return final_df

def calculate_subject_costs(trial_info, subjects, trial_name, sim, trial_durations, lever):

  # Calculate planned subjects
  K = trial_info[trial_info['ISN']==trial_name]['Planned # of subjects entered treatment'].iloc[0]

  # Predict any new enrollments
  enrollments, date_strings, time_points_numeric, start_date, pred_horizon = process_dates(subjects, trial_durations, trial_name)
  new_dates, new_time_points, new_time_points_numeric = extend_time_points(date_strings[-1], pred_horizon, start_date)
  enr_results = train_and_predict_enrollments(K, enrollments, time_points_numeric, new_time_points_numeric, trial_name, new_dates, lever)

  # Add historical enrollment data
  enr_results_old = pd.DataFrame({'ISN': trial_name, 'start_date':date_strings, 'enrollments': enrollments})
  enr_results = pd.concat([enr_results_old, enr_results])

  # Ensure no extra subjects are enrolled
  enr_results = adjust_enrollments(enr_results, K)

  # Calculate monthly costs
  eac_pc = calculate_subject_enrollment_costs(trial_info, trial_name, enr_results, eac_spread, K)
  pc_costs = calculate_monthly_eac_values(enr_results, eac_pc[['Cost Account','EAC_per_patient', 'EAC_per_pat_per_day']])

  # Rename columns and add sim number
  pc_costs.columns = ['trial_name', 'cost_account', 'ds', 'value']

  pc_costs['sim'] = sim

  return pc_costs


In [70]:
##############
# RUN MONTE CARLO SIMULATION
##############
sims=1
site_events = pd.DataFrame({
    'trial_name': ['2215-CL-0201', '2215-CL-0201'],
    'event_date': [datetime.datetime(2016, 5, 1), datetime.datetime(2023, 4, 1)],
    'change': [154, -10]
})

subject_lever=.10


def run_monte_carlo_trial(n, trial_name):
  duration_results = []  # List to collect results
  subject_results = pd.DataFrame()
  site_results = pd.DataFrame()

  # Run a for-loop for n_trajectories samples with dummy variable t
  for i in range(n):
    print(i)

    # Update trial milestone uncertainty
    trial_durations = create_trial_durations_df(eac_spread, sim_start_date, data_end_date, month_diff, get_random_months)

    # Update eac uncertainty
    eac = create_eac_df(eac_spread, trial_durations, month_diff)

    # Duration Simulation
    env = simpy.Environment()
    env.process(calculate_durations(env, trial_name, trial_durations, eac, duration_results, i))
    env.run()

    # Subject Enrollment Simulation
    subject_costs = calculate_subject_costs(trial_info, subjects, trial_name, i, trial_durations, subject_lever)
    subject_results = pd.concat([subject_results, subject_costs])

    # Site Simulation
    start_date, end_date, total_eac, cost_account, planned_sites = site_inputs(eac_spread, trial_info, trials2sim[0])
    site_costs = calculate_site_costs(i, trials2sim[0], cost_account, start_date, end_date, total_eac, planned_sites, site_events)
    site_results = pd.concat([site_results, site_costs])

  # Process duration results
  duration_results = process_output(duration_results, sim_start_date)

  # Combine results
  mc_output = pd.concat([duration_results, subject_results, site_results])

  return mc_output, trial_durations, eac

mc_results, trial_durations, eac = run_monte_carlo_trial(sims, trials2sim[0])

0


  lsft_date_py = (lsft_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eac_pc['EAC_per_patient'] = eac_pc['TotalEAC'] / K
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eac_pc['EAC_per_pat_per_day'] = eac_pc['EAC_per_patient']/(tdays + fdays)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eac_pc['key'] = 1


In [None]:
##############
# VALIDATION
##############
# Group by 'cost_account' and find the max 'value' for each group
ca_sums = mc_results[mc_results['trial_name']=='2215-CL-0201'].groupby('cost_account')['value'].sum()

# Sum up these maximum values
total_sum = ca_sums.sum()

print("Sum of maximum values by cost account:", total_sum) # 89,065,632

Sum of maximum values by cost account: 89034536.62625934


In [None]:
import arviz as az
import matplotlib.pyplot as plt

# with model:
#     summary = pm.summary(trace)
#     print(summary)

with model:
    az.plot_trace(trace, figsize=(12, 10))

# with model:
#     az.plot_posterior(trace, figsize=(2, 2))



NameError: ignored