# Bar plot of max change during feature trajectory

### Using feature values from all timepoints, find where the greatest change between 2 timepoints is 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df_all = pd.read_csv('../Output/Submission-Long/features/Features_Rescaled.csv')
df_all = df_all[df_all['ContourType'] == 'Manual']
# drop min and max
df_all = df_all[~df_all['Feature'].isin(['firstorder_Minimum', 'firstorder_Maximum'])]
df_result = pd.DataFrame()

patIDs = df_all['PatID'].unique()
fts = df_all['Feature'].unique()

df_all = df_all[['PatID', 'Feature', 'Fraction', 'FeatureValue']]
df_all.drop_duplicates(subset=['PatID', 'Feature', 'Fraction'], keep='first', inplace=True)

In [None]:
# loop through all patients and trajectories
# calculate the abs change in feature value from the first fraction to all others

df_frac1_comp = pd.DataFrame()

for patID in patIDs:
    df_pat = df_all[df_all['PatID'] == patID]
    for ft in fts:
        df_ft = df_pat[df_pat['Feature'] == ft]
        df_ft = df_ft.sort_values('Fraction')
        df_ft['AbsChange'] = abs(df_ft['FeatureValue'] - df_ft['FeatureValue'].iloc[0])
        df_frac1_comp = pd.concat([df_frac1_comp, df_ft])

df_frac1_comp = df_frac1_comp[df_frac1_comp['Fraction'] != 1]


df_frac1_comp

In [None]:
# loop over all features and pats and get the timepoint with the highest abs change
df_max_abs_change = pd.DataFrame()

for patID in patIDs:
    df_pat = df_frac1_comp[df_frac1_comp['PatID'] == patID]
    for ft in fts:
        df_ft = df_pat[df_pat['Feature'] == ft]
        df_ft = df_ft.sort_values('AbsChange', ascending=False)
        # get the row with the highest abs change
        max_change = df_ft.iloc[0]['AbsChange']
        df_max_row = df_ft[df_ft['AbsChange'] == max_change]
        df_max_row.drop_duplicates(subset=['PatID', 'Feature'], keep='first', inplace=True)
        df_max_abs_change = pd.concat([df_max_abs_change, df_max_row])
        

In [None]:
df_max_abs_change

In [None]:
print(df_max_abs_change.PatID.nunique())
print(df_max_abs_change.Feature.nunique())
print(df_max_abs_change.Fraction.nunique())
print(df_max_abs_change.shape)

In [None]:
# find the fraction that appears most often
print(df_max_abs_change['Fraction'].value_counts())

In [None]:
# loop over all features and pats
# loop over all fractions and get the two fractions with the highest abs change
df_fracAB_comp = pd.DataFrame()
fractions = df_all['Fraction'].unique()

for patID in patIDs:
    df_pat = df_all[df_all['PatID'] == patID]
    for ft in fts:
        df_ft = df_pat[df_pat['Feature'] == ft]
        
        for fracA in fractions:
            fracA_val = df_ft[df_ft['Fraction'] == fracA]['FeatureValue'].values[0]
            
            for fracB in fractions:
                if fracB > fracA:
            
                    fracB_val = df_ft[df_ft['Fraction'] == fracB]['FeatureValue'].values[0]

                    abs_change = abs(fracA_val - fracB_val)

                    df_row = pd.DataFrame({'PatID': patID, 'Feature': ft, 'Fractions': f'{fracA}-{fracB}', 'AbsChange': abs_change}, index=[0])
                    df_fracAB_comp = pd.concat([df_fracAB_comp, df_row])

df_fracAB_comp

In [None]:
# for each patid and feature, get the fraction pair with the highest abs change
df_max_changeAB = df_fracAB_comp.groupby(['PatID', 'Feature']).apply(lambda x: x[x['AbsChange'] == x['AbsChange'].max()]).reset_index(drop=True)
df_max_changeAB.drop_duplicates(subset=['PatID', 'Feature'], keep='first', inplace=True)

In [None]:
df_max_changeAB['Fractions'].value_counts()

In [None]:
# bar plot of the fraction pairs with the highest abs change
df_bar = df_max_changeAB['Fractions'].value_counts().reset_index()
df_bar.columns = ['Fractions', 'Count']
df_bar['fracA'] = df_bar['Fractions'].apply(lambda x: x.split('-')[0])
df_bar['fracB'] = df_bar['Fractions'].apply(lambda x: x.split('-')[1])

df_bar = df_bar.sort_values(['fracA', 'fracB'], ascending=[True, True])
sns.set(style='whitegrid')
plt.figure(figsize=(10, 6))
# barplot horizontal
sns.barplot(x='Count', y='Fractions', data=df_bar, color='#31D574')

plt.xlabel('Count', fontsize=20)
plt.ylabel('Fraction Comparison', fontsize=20)
plt.yticks(fontsize=15)
plt.xticks(fontsize=15)
plt.title('Maximum Change in Feature Value Between Fractions', fontsize=24, pad=20)
plt.savefig('./bar_max_change.png', dpi=300)
plt.show()