In [62]:
# Create dictionaries for storing volume ratios
# and gradients for changes in volume ratios over time.
ratio_dict = {}
gradient_dict = {}

# Extract information on volume ratios and their time dependence
# (gradient from linear regression) for each organ of each patient.
groupby_id = df.groupby(['id'])
for id, df_id in groupby_id:
   groupby_organ = df_id.groupby(['organ'])
   for organ, df_organ in groupby_organ:
       df_organ_mv = df_organ[df_organ['mv dose'].notna()]
       df_organ_kv = df_organ[df_organ['planned clinical ct dose'].notna()]

       n_val = df_organ_mv['model volume'].count()
       if n_val == 30:
           # The value for "planning scan volume" is the structure volume within the scan.
           # The kV scan always contains all of the structure.
           # The MV scan may be missing a part of the structure.
           v0 = list(df_organ_kv['planning scan volume'])[0]
           r = df_organ_mv['planning scan volume'] / v0
           if not organ in ratio_dict:
               ratio_dict[organ] = []
           ratio_dict[organ].extend(r.to_list())

           # Fit a straight line to the volume ratio as a function of fraction number.
           # (Rows in the dataframe are ordered by time.)
           x = range(1, n_val + 1)
           y = df_organ_mv['model volume'] / df_organ_mv['planning scan volume']
           p = np.polyfit(x, y, deg=1)
           if not organ in gradient_dict:
               gradient_dict[organ] = []
           gradient_dict[organ].append(p[0])

In [97]:
len(ratio_dict['left parotid'])

5160

In [153]:
import pandas as pd

# Load DataFrame
df = pd.read_csv("structure_time_dependent.csv")

# Split DataFrame into kV and MV 
df_kv = df[df['planned clinical ct dose'].notna()].set_index(['id', 'organ']).copy()
df_mv = df[df['mv dose'].notna()].copy()

# Filter out patient/organ combos with exactly 30 entries
df_mv = df_mv.groupby(['id', 'organ']).filter(lambda x: x['model volume'].count() == 30)

# Compute volume ratio for each MV entry
def get_volume_ratio(row):
    v0 = df_kv.loc[(row['id'], row['organ']), 'planning scan volume']
    return row['planning scan volume'] / v0
df_mv['volume ratio'] = df_mv.apply(get_volume_ratio, axis=1)

# Make DataFrame of gradients for each patient and organ
def get_gradient(group):
    p = np.polyfit(range(1, len(group) + 1), group['model volume'] / group['planning scan volume'], deg=1)
    return p[0]
df_gradients = df_mv.groupby(['id', 'organ']).apply(get_gradient).reset_index().rename(columns={0: 'gradient'})