In [76]:
import config
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
import datetime
from utils.data_exploration_utils import drop_unnamedcolumn,  investigate_data, plot_hist, scatterplot, missing_from_df

In [77]:
today = datetime.date.today()

base_dir = config.RAW_DATA_PATH
proc_dir = config.PROC_DATA_PATH

folder = None

if folder is not None:
    save_dir = os.path.join(proc_dir, folder)
else:
    save_dir = os.path.join(proc_dir, f"{today}_data_exploration")

os.makedirs(save_dir, exist_ok=True)

file_name = 'questionnaires_raw.csv'

df = pd.read_csv(os.path.join(base_dir, file_name))
kl = pd.read_csv(os.path.join(base_dir, 'brul_knee_annotations.csv'))

# Data Cleaning

In [78]:
try:
    df = drop_unnamedcolumn(df)
except Exception as e:
    print(f"Error dropping unnamed columns: {e}")

In [79]:
df['visit'] = df['redcap_event_name'].str.split('_').str[0].map({'first': 1, 'second': 2})
df['id_visit'] = df['record_id'].astype(str) + '_' + df['visit'].astype(str)

In [81]:
df_nanids = investigate_data(df, id_col='id_visit')

Column 'comi_knee_pain' has missing values: 209.0 out of 244
Column 'comi_knee_2' has missing values: 209.0 out of 244
Column 'comi_knee_sport' has missing values: 209.0 out of 244
Column 'comi_knee_3' has missing values: 209.0 out of 244
Column 'comi_knee_4' has missing values: 209.0 out of 244
Column 'comi_knee_5' has missing values: 209.0 out of 244
Column 'comi_knee_6' has missing values: 209.0 out of 244
Column 'comi_knee_7a' has missing values: 143.0 out of 244
Column 'comi_knee_compldescr' has missing values: 8 out of 244
Column 'comi_knee_7b' has missing values: 8.0 out of 244
Column 'comi_knee_8' has missing values: 143.0 out of 244
Column 'comi_knee_otherjoint' has missing values: 7 out of 244
Column 'comi_knee_9' has missing values: 143.0 out of 244
Column 'comi_knee_10' has missing values: 143.0 out of 244
Column 'comi_knee_painkiller' has missing values: 171.0 out of 244
Column 'oks_q1' has missing values: 221.0 out of 244
Column 'oks_q2' has missing values: 221.0 out of 2

Unnamed: 0,record_id,redcap_event_name,comi_knee_pain,comi_knee_2,comi_knee_sport,comi_knee_3,comi_knee_4,comi_knee_5,comi_knee_6,comi_knee_7a,...,koos_sp3,koos_sp4,koos_sp5,koos_q1,koos_q2,koos_q3,koos_q4,proms_complete,visit,id_visit
0,IM0001,first_visit_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,1,IM0001_1
1,IM0001,second_visit_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2,IM0001_2
2,IM0002,first_visit_arm_1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,1,IM0002_1
3,IM0002,second_visit_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2,IM0002_2
4,IM0004,first_visit_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2,1,IM0004_1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,IM3021,second_visit_arm_1,,,,,,,,,...,,,,,,,,0,2,IM3021_2
240,IM3022,first_visit_arm_1,5.0,7.5,10.0,10.0,7.5,10.0,0.0,,...,4.0,4.0,4.0,3.0,3.0,4.0,4.0,2,1,IM3022_1
241,IM3022,second_visit_arm_1,2.0,2.5,5.0,10.0,5.0,10.0,0.0,,...,3.0,3.0,4.0,4.0,2.0,2.0,2.0,2,2,IM3022_2
242,IM3023,first_visit_arm_1,7.0,7.5,7.5,10.0,5.0,7.5,999.0,,...,4.0,3.0,4.0,3.0,4.0,3.0,3.0,2,1,IM3023_1



No duplicate rows based on id_visit.


In [82]:
df['proms_complete'].value_counts()

proms_complete
2    220
0     24
Name: count, dtype: int64

In [83]:
l = ['comi_knee_pain', 'comi_knee_2',
       'comi_knee_sport', 'comi_knee_3', 'comi_knee_4', 'comi_knee_5',
       'comi_knee_6', 'comi_knee_7a', 'comi_knee_compldescr', 'comi_knee_7b',
       'comi_knee_8', 'comi_knee_otherjoint', 'comi_knee_9', 'comi_knee_10',
       'comi_knee_painkiller', 'oks_q1', 'oks_q2', 'oks_q3', 'oks_q4',
       'oks_q5', 'oks_q6', 'oks_q7', 'oks_q8', 'oks_q9', 'oks_q10', 'oks_q11',
       'oks_q12', 'ucla', 'fjs_q1', 'fjs_q2', 'fjs_q3', 'fjs_q4', 'fjs_q5',
       'fjs_q6', 'fjs_q7', 'fjs_q8', 'fjs_q9', 'fjs_q10', 'fjs_q11', 'fjs_q12',
       'koos_s1', 'koos_s2', 'koos_s3', 'koos_s4', 'koos_s5', 'koos_s6',
       'koos_s7', 'koos_p1', 'koos_p2', 'koos_p3', 'koos_p4', 'koos_p5',
       'koos_p6', 'koos_p7', 'koos_p8', 'koos_p9', 'koos_a1', 'koos_a2',
       'koos_a3', 'koos_a4', 'koos_a5', 'koos_a6', 'koos_a7', 'koos_a8',
       'koos_a9', 'koos_a10', 'koos_a11', 'koos_a12', 'koos_a13', 'koos_a14',
       'koos_a15', 'koos_a16', 'koos_a17', 'koos_sp1', 'koos_sp2', 'koos_sp3',
       'koos_sp4', 'koos_sp5', 'koos_q1', 'koos_q2', 'koos_q3', 'koos_q4']

print(f"Number of samples where no PROM values available: {len(df[df[l].isna().all(axis=1)])}")

Number of samples where no PROM values available: 22


In [84]:
# Remove samples where no PROM values are available
df = df[~df[l].isna().all(axis=1)]

# Questionnaire

## Core Outcome Measures Index (COMI)

In [88]:
base_col = ['record_id', 'redcap_event_name', 'proms_complete', 'id_visit']
comi_col = ['comi_knee_pain', 'comi_knee_2',
       'comi_knee_sport', 'comi_knee_3', 'comi_knee_4', 'comi_knee_5',
       'comi_knee_6', 'comi_knee_7a', 'comi_knee_compldescr', 'comi_knee_7b',
       'comi_knee_8', 'comi_knee_otherjoint', 'comi_knee_9', 'comi_knee_10',
       'comi_knee_painkiller']

comi = df[base_col + comi_col].copy()
display(comi.head())

Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,comi_knee_pain,comi_knee_2,comi_knee_sport,comi_knee_3,comi_knee_4,comi_knee_5,comi_knee_6,comi_knee_7a,comi_knee_compldescr,comi_knee_7b,comi_knee_8,comi_knee_otherjoint,comi_knee_9,comi_knee_10,comi_knee_painkiller
0,IM0001,first_visit_arm_1,2,IM0001_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
1,IM0001,second_visit_arm_1,2,IM0001_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
2,IM0002,first_visit_arm_1,2,IM0002_1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
3,IM0002,second_visit_arm_1,2,IM0002_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
4,IM0004,first_visit_arm_1,2,IM0004_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,


In [89]:
comi['comi_knee_7a'].value_counts()

comi_knee_7a
0.0    135
1.0      8
Name: count, dtype: int64

In [90]:
_ = investigate_data(comi, id_col='id_visit')

Column 'comi_knee_pain' has missing values: 209.0 out of 222
Column 'comi_knee_2' has missing values: 209.0 out of 222
Column 'comi_knee_sport' has missing values: 209.0 out of 222
Column 'comi_knee_3' has missing values: 209.0 out of 222
Column 'comi_knee_4' has missing values: 209.0 out of 222
Column 'comi_knee_5' has missing values: 209.0 out of 222
Column 'comi_knee_6' has missing values: 209.0 out of 222
Column 'comi_knee_7a' has missing values: 143.0 out of 222
Column 'comi_knee_compldescr' has missing values: 8 out of 222
Column 'comi_knee_7b' has missing values: 8.0 out of 222
Column 'comi_knee_8' has missing values: 143.0 out of 222
Column 'comi_knee_otherjoint' has missing values: 7 out of 222
Column 'comi_knee_9' has missing values: 143.0 out of 222
Column 'comi_knee_10' has missing values: 143.0 out of 222
Column 'comi_knee_painkiller' has missing values: 171.0 out of 222

Columns with NaN values:  ['comi_knee_pain', 'comi_knee_2', 'comi_knee_sport', 'comi_knee_3', 'comi_kn

Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,comi_knee_pain,comi_knee_2,comi_knee_sport,comi_knee_3,comi_knee_4,comi_knee_5,comi_knee_6,comi_knee_7a,comi_knee_compldescr,comi_knee_7b,comi_knee_8,comi_knee_otherjoint,comi_knee_9,comi_knee_10,comi_knee_painkiller
0,IM0001,first_visit_arm_1,2,IM0001_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
1,IM0001,second_visit_arm_1,2,IM0001_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
2,IM0002,first_visit_arm_1,2,IM0002_1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
3,IM0002,second_visit_arm_1,2,IM0002_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
4,IM0004,first_visit_arm_1,2,IM0004_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,IM3021,first_visit_arm_1,2,IM3021_1,4.0,5.0,10.0,5.0,5.0,10.0,0.0,,,,,,,,1.0
240,IM3022,first_visit_arm_1,2,IM3022_1,5.0,7.5,10.0,10.0,7.5,10.0,0.0,,,,,,,,2.0
241,IM3022,second_visit_arm_1,2,IM3022_2,2.0,2.5,5.0,10.0,5.0,10.0,0.0,,,,,,,,1.0
242,IM3023,first_visit_arm_1,2,IM3023_1,7.0,7.5,7.5,10.0,5.0,7.5,999.0,,,,,,,,1.0



No duplicate rows based on id_visit.


## Oxford Knee Score (OKS)

In [91]:
oks_col = ['oks_q1', 'oks_q2', 'oks_q3', 'oks_q4',
       'oks_q5', 'oks_q6', 'oks_q7', 'oks_q8', 'oks_q9', 'oks_q10', 'oks_q11',
       'oks_q12']
oks = df[base_col + oks_col].copy()
display(oks.head())

Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,oks_q1,oks_q2,oks_q3,oks_q4,oks_q5,oks_q6,oks_q7,oks_q8,oks_q9,oks_q10,oks_q11,oks_q12
0,IM0001,first_visit_arm_1,2,IM0001_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
1,IM0001,second_visit_arm_1,2,IM0001_2,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
2,IM0002,first_visit_arm_1,2,IM0002_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
3,IM0002,second_visit_arm_1,2,IM0002_2,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
4,IM0004,first_visit_arm_1,2,IM0004_1,4.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0


In [92]:
_ = investigate_data(oks, id_col='id_visit')

Column 'oks_q1' has missing values: 221.0 out of 222
Column 'oks_q2' has missing values: 221.0 out of 222
Column 'oks_q3' has missing values: 221.0 out of 222
Column 'oks_q4' has missing values: 221.0 out of 222
Column 'oks_q5' has missing values: 221.0 out of 222
Column 'oks_q6' has missing values: 221.0 out of 222
Column 'oks_q7' has missing values: 221.0 out of 222
Column 'oks_q8' has missing values: 221.0 out of 222
Column 'oks_q9' has missing values: 221.0 out of 222
Column 'oks_q10' has missing values: 221.0 out of 222
Column 'oks_q11' has missing values: 221.0 out of 222
Column 'oks_q12' has missing values: 221.0 out of 222

Columns with NaN values:  ['oks_q1', 'oks_q2', 'oks_q3', 'oks_q4', 'oks_q5', 'oks_q6', 'oks_q7', 'oks_q8', 'oks_q9', 'oks_q10', 'oks_q11', 'oks_q12']

NaN values per column:
oks_q1     1
oks_q2     1
oks_q3     1
oks_q4     1
oks_q5     1
oks_q6     1
oks_q7     1
oks_q8     1
oks_q9     1
oks_q10    1
oks_q11    1
oks_q12    1
dtype: int64


Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,oks_q1,oks_q2,oks_q3,oks_q4,oks_q5,oks_q6,oks_q7,oks_q8,oks_q9,oks_q10,oks_q11,oks_q12
224,IM3012,first_visit_arm_1,2,IM3012_1,,,,,,,,,,,,



No duplicate rows based on id_visit.


## UCLA (Aktivitätsindex)

In [93]:
ucla = df[base_col + ['ucla']].copy()
display(ucla.head())

Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,ucla
0,IM0001,first_visit_arm_1,2,IM0001_1,9.0
1,IM0001,second_visit_arm_1,2,IM0001_2,8.0
2,IM0002,first_visit_arm_1,2,IM0002_1,9.0
3,IM0002,second_visit_arm_1,2,IM0002_2,8.0
4,IM0004,first_visit_arm_1,2,IM0004_1,7.0


In [94]:
_ = investigate_data(ucla, id_col='id_visit')

Column 'ucla' has missing values: 221.0 out of 222

Columns with NaN values:  ['ucla']

NaN values per column:
ucla    1
dtype: int64


Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,ucla
224,IM3012,first_visit_arm_1,2,IM3012_1,



No duplicate rows based on id_visit.


## Forgotten Joint Score (FJS-12)

In [95]:
fjs_col = ['fjs_q1', 'fjs_q2', 'fjs_q3', 'fjs_q4', 'fjs_q5',
       'fjs_q6', 'fjs_q7', 'fjs_q8', 'fjs_q9', 'fjs_q10', 'fjs_q11', 'fjs_q12']
fjs = df[base_col + fjs_col].copy()
display(fjs.head())

Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,fjs_q1,fjs_q2,fjs_q3,fjs_q4,fjs_q5,fjs_q6,fjs_q7,fjs_q8,fjs_q9,fjs_q10,fjs_q11,fjs_q12
0,IM0001,first_visit_arm_1,2,IM0001_1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0
1,IM0001,second_visit_arm_1,2,IM0001_2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,IM0002,first_visit_arm_1,2,IM0002_1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0
3,IM0002,second_visit_arm_1,2,IM0002_2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0
4,IM0004,first_visit_arm_1,2,IM0004_1,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0


In [96]:
_ = investigate_data(fjs, id_col='id_visit')


Columns with NaN values:  []

NaN values per column:
Series([], dtype: int64)
No duplicate rows based on id_visit.


## Knee Injury and Osteoarthritis Outcome Score (KOOS)

In [97]:
koos_col = ['koos_s1', 'koos_s2', 'koos_s3', 'koos_s4', 'koos_s5', 'koos_s6',
       'koos_s7', 'koos_p1', 'koos_p2', 'koos_p3', 'koos_p4', 'koos_p5',
       'koos_p6', 'koos_p7', 'koos_p8', 'koos_p9', 'koos_a1', 'koos_a2',
       'koos_a3', 'koos_a4', 'koos_a5', 'koos_a6', 'koos_a7', 'koos_a8',
       'koos_a9', 'koos_a10', 'koos_a11', 'koos_a12', 'koos_a13', 'koos_a14',
       'koos_a15', 'koos_a16', 'koos_a17', 'koos_sp1', 'koos_sp2', 'koos_sp3',
       'koos_sp4', 'koos_sp5', 'koos_q1', 'koos_q2', 'koos_q3', 'koos_q4']

koos = df[base_col + koos_col].copy()
display(koos.head())

Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,koos_s1,koos_s2,koos_s3,koos_s4,koos_s5,koos_s6,...,koos_a17,koos_sp1,koos_sp2,koos_sp3,koos_sp4,koos_sp5,koos_q1,koos_q2,koos_q3,koos_q4
0,IM0001,first_visit_arm_1,2,IM0001_1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,IM0001,second_visit_arm_1,2,IM0001_2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,IM0002,first_visit_arm_1,2,IM0002_1,0.0,0.0,0.0,4.0,4.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,IM0002,second_visit_arm_1,2,IM0002_2,0.0,1.0,0.0,4.0,4.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,IM0004,first_visit_arm_1,2,IM0004_1,0.0,3.0,0.0,4.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [98]:
_ = investigate_data(koos, id_col='id_visit')

Column 'koos_p1' has missing values: 221.0 out of 222
Column 'koos_p9' has missing values: 221.0 out of 222
Column 'koos_a4' has missing values: 221.0 out of 222
Column 'koos_a13' has missing values: 221.0 out of 222

Columns with NaN values:  ['koos_p1', 'koos_p9', 'koos_a4', 'koos_a13']

NaN values per column:
koos_p1     1
koos_p9     1
koos_a4     1
koos_a13    1
dtype: int64


Unnamed: 0,record_id,redcap_event_name,proms_complete,id_visit,koos_s1,koos_s2,koos_s3,koos_s4,koos_s5,koos_s6,...,koos_a17,koos_sp1,koos_sp2,koos_sp3,koos_sp4,koos_sp5,koos_q1,koos_q2,koos_q3,koos_q4
224,IM3012,first_visit_arm_1,2,IM3012_1,3.0,3.0,0.0,4.0,0.0,2.0,...,1.0,4.0,4.0,4.0,3.0,4.0,4.0,3.0,3.0,3.0



No duplicate rows based on id_visit.


# KL Scores

So the issue that arises, is that the questionnaire is per patient and per visit, but not per knee. This means that patients that might have one healthy and one unhealthy knee, might have filled out the questionnaire only for their bad knee, which if we use the questionnaire for the healthy knee to question, could mean it introduces a wrong bias into the data. Therefore we need to investigate:
* how many patients have a big gap between their left and right knee?
* What effect does this have on their questionnaire data?
* What do we do about this?
* Was this already an issue in our low granularity data?

In [100]:
kl['id_visit'] = kl['name'].str.split('_').str[0] + '_' + kl['name'].str.split('_').str[1]

In [110]:
# Get id_visit where KL-Score difference is larger than 2
kl_diffs = kl.groupby('id_visit')['KL-Score'].agg(['min', 'max'])
kl_diffs['diff'] = kl_diffs['max'] - kl_diffs['min']
kl_diffs_index = kl_diffs[kl_diffs['diff'] > 2].index

kl[kl['id_visit'].isin(kl_diffs_index)]

Unnamed: 0,name,KL-Score,Osteophytes,Joint-Space-Narrowing,id_visit
159,IM1578_1_right,3,2,3,IM1578_1
160,IM1578_1_left,0,0,1,IM1578_1
220,IM2058_1_right,3,2,1,IM2058_1
221,IM2058_1_left,0,0,0,IM2058_1
222,IM2058_2_right,3,2,1,IM2058_2
223,IM2058_2_left,0,0,0,IM2058_2
253,IM2523_1_right,0,0,0,IM2523_1
254,IM2523_1_left,3,2,1,IM2523_1
255,IM2523_2_right,0,0,1,IM2523_2
256,IM2523_2_left,3,2,1,IM2523_2


In [113]:
kl_diffs[kl_diffs['diff'] >= 2].sort_values('diff', ascending=False)

Unnamed: 0_level_0,min,max,diff
id_visit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
IM3007_2,0,4,4
IM2576_2,0,3,3
IM1578_1,0,3,3
IM2058_1,0,3,3
IM2523_1,0,3,3
IM2058_2,0,3,3
IM2576_1,0,3,3
IM3016_2,0,3,3
IM3010_2,0,3,3
IM3022_2,0,3,3


In [114]:
df[df['id_visit']=='IM3007_2']

Unnamed: 0,record_id,redcap_event_name,comi_knee_pain,comi_knee_2,comi_knee_sport,comi_knee_3,comi_knee_4,comi_knee_5,comi_knee_6,comi_knee_7a,...,koos_sp3,koos_sp4,koos_sp5,koos_q1,koos_q2,koos_q3,koos_q4,proms_complete,visit,id_visit
217,IM3007,second_visit_arm_1,3.0,2.5,2.5,2.5,5.0,2.5,999.0,,...,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2,2,IM3007_2


# Prep & Save Dataframe

In [68]:
# Replace Nan values with -1
df[l] = df[l].fillna(-1)

# Remove COMI columns for now
df = df.drop(columns=comi_col)



In [69]:
df

Unnamed: 0,record_id,redcap_event_name,oks_q1,oks_q2,oks_q3,oks_q4,oks_q5,oks_q6,oks_q7,oks_q8,...,koos_sp1,koos_sp2,koos_sp3,koos_sp4,koos_sp5,koos_q1,koos_q2,koos_q3,koos_q4,proms_complete
0,IM0001,first_visit_arm_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
1,IM0001,second_visit_arm_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,IM0002,first_visit_arm_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,IM0002,second_visit_arm_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,IM0004,first_visit_arm_1,4.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,IM3021,first_visit_arm_1,1.0,4.0,4.0,3.0,3.0,3.0,2.0,4.0,...,3.0,4.0,4.0,4.0,4.0,3.0,2.0,2.0,3.0,2
240,IM3022,first_visit_arm_1,1.0,4.0,2.0,4.0,3.0,0.0,1.0,3.0,...,4.0,4.0,4.0,4.0,4.0,3.0,3.0,4.0,4.0,2
241,IM3022,second_visit_arm_1,2.0,3.0,3.0,4.0,3.0,3.0,1.0,2.0,...,1.0,3.0,3.0,3.0,4.0,4.0,2.0,2.0,2.0,2
242,IM3023,first_visit_arm_1,0.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,...,3.0,4.0,4.0,3.0,4.0,3.0,4.0,3.0,3.0,2
