# Creating Functional Measures Groups

Large swaths of what was measured in OAI were functional measures.  This notebook will, piece by piece sort out each measure and tag the questions associate with that measure.

In [None]:
import pickle
import numpy as np
import pandas as pd

In [None]:
# Created in 'Parse the VG_Form.pdf for OAI variable categories and sources' notebook
vars_df = pickle.load(open('oai_vars_labels_sources.pkl', 'rb' ))
vars_cat_df = pickle.load(open('oai_vars_categories_subcategories.pkl', 'rb' ))

In [None]:
# Utility functions
# Return the variable IDs of all variables with a given Category/Subcategory
def get_variables_by_category(category, subcategory=None):
    if not subcategory:
        return vars_cat_df[vars_cat_df.Category == category]
    return vars_cat_df[(vars_cat_df.Category == category) & (vars_cat_df.Subcategory == subcategory)]

# Return the variable IDs of all variables with a given category list
def get_variables_by_category_list(categories):
    return pd.concat([get_variables_by_category(cat) for cat in categories])


# Print the first occurance of each variable within a cat/subcat
def print_variables(cat, subcat):
    print('\n' + cat + ': ' + subcat)
    tmp = get_variables_by_category(cat, subcat).Variable.drop_duplicates().to_frame()
    tmp = tmp.set_index('Variable').join(vars_df.set_index('Variable'))
    tmp.Label = tmp.Label.str.replace('Left ', 'L/R ')
    tmp.Label = tmp.Label.str.replace('Right ', 'L/R ')
    tmp = tmp.drop_duplicates(subset=['Label'])
    print(tmp['Label'])

# Given a base variable name, create a numbered series with that base
def metric_series(label, count):
    return [label + str(i) for i in range(1,count+1)]

# Given a base label swap out the identified character
def swap_l2r(base, char_idx):
    return base[0:char_idx] + 'R' + base[char_idx+1:]

# Given a base variable name, create a series of left and right versions
def left_right_series(char_idx, left_label, count):
    left = metric_series(left_label, count)
    right_label = swap_l2r(left_label, char_idx)
    right = metric_series(right_label, count)
    return left, right

# Collect WOMAC and KOOS questions

## Find variable names
### WOMAC

In [None]:
print_variables('WOMAC/KOOS', 'WOMAC total score')
print_variables('WOMAC/KOOS', 'WOMAC pain')

This fits with the traditional 5 pain questions in WOMAC.

In [None]:
print_variables('WOMAC/KOOS', 'WOMAC stiffness')

This fits with the traditional 2 stiffness questions in WOMAC.

In [None]:
print_variables('WOMAC/KOOS', 'WOMAC disability')

This fits with the traditional 17 physical function questions in WOMAC.

In [None]:
# Create WOMAC dataframe
womac_df = get_variables_by_category('WOMAC/KOOS')
womac_df = womac_df[womac_df.Subcategory.str.contains('WOMAC')] # drop KOOS questions
womac_df['Visit'] = womac_df.Variable.str[:3].astype('category')
womac_df['VariableBase'] = womac_df.Variable.str[3:].astype('category')

# Label all the scores
womac_right_scores = ['WOMTSR', 'WOMKPR', 'WOMSTFR', 'WOMADLR']
womac_left_scores = ['WOMTSL', 'WOMKPL', 'WOMSTFL', 'WOMADLL']
womac_all_scores = womac_right_scores + womac_left_scores

womac_df['ValueType'] = 'question'
womac_df.loc[womac_df.VariableBase.isin(womac_all_scores), 'ValueType'] = 'score'
womac_df.ValueType = womac_df.ValueType.astype('category')

# Questions
womac_pain_questions_left, womac_pain_questions_right = left_right_series(2, 'WPRKN', 5)
womac_stiffness_questions_left, womac_stiffness_questions_right = left_right_series(2, 'WSRKN', 2)
womac_disability_questions_left, womac_disability_questions_right = left_right_series(2, 'DIRKN', 17)

# Label which knee
womac_df['Side'] = 'left'
right_knee_vars = womac_right_scores + womac_pain_questions_right + womac_stiffness_questions_right + womac_disability_questions_right
womac_df.loc[womac_df.VariableBase.isin(right_knee_vars), 'Side'] = 'right'
womac_df.Side = womac_df.Side.astype('category')
# 12 collection dates X 56 questions = 672 variables
womac_df

### KOOS

In [None]:
print_variables('WOMAC/KOOS', 'KOOS pain')

The 4 additional KOOS pain questions (beyond the WOMAC 5).

In [None]:
print_variables('WOMAC/KOOS', 'KOOS symptoms')

The 5 additional KOOS symptom questions (beyond the 2 WOMAC stiffness).

In [None]:
print_variables('WOMAC/KOOS', 'KOOS function')

The standard 5 KOOS sports/recreational functional questions.

In [None]:
print_variables('WOMAC/KOOS', 'KOOS QOL')

The standard 4 KOOS QOL questions.

In [None]:
# Create KOOS dataframe
koos_df = get_variables_by_category('WOMAC/KOOS').copy()
koos_df['Visit'] = koos_df.Variable.str[:3].astype('category')
koos_df['VariableBase'] = koos_df.Variable.str[3:].astype('category')
koos_df = koos_df[~koos_df.VariableBase.isin(womac_all_scores)]

# Label all the scores
koos_right_scores = ['KOOSKPR', 'KOOSYMR']
koos_left_scores = ['KOOSKPL', 'KOOSYML']
koos_function_score = ['KOOSFSR']
koos_qol_score = ['KOOSQOL']
# NOTE: OAI never seems to have calculated a ADL/disability score in the KOOS normalize form
koos_all_scores = koos_right_scores + koos_left_scores + koos_function_score + koos_qol_score

koos_df['ValueType'] = 'question'
koos_df.loc[koos_df.VariableBase.isin(koos_all_scores), 'ValueType'] = 'score'
koos_df.ValueType = koos_df.ValueType.astype('category')

# Questions
koos_pain_questions_left, koos_pain_questions_right = left_right_series(2, 'KPRKN', 3)
koos_pain_questions_left += womac_pain_questions_left + ['P7LKFR']
koos_pain_questions_right += womac_pain_questions_right + ['P7RKFR']

koos_syptoms_questions_left, koos_syptoms_questions_right = left_right_series(3, 'KSXRKN', 5)
koos_syptoms_questions_left += womac_stiffness_questions_left
koos_syptoms_questions_right += womac_stiffness_questions_right

koos_disability_questions_left = womac_disability_questions_left
koos_disability_questions_right = womac_disability_questions_right

koos_function_questions = metric_series('KOOSFX', 5)
koos_qol_questions = metric_series('KQOL', 4)
koos_all_questions = koos_qol_questions + koos_function_questions \
                    + koos_disability_questions_left + koos_syptoms_questions_left + koos_pain_questions_left \
                    + koos_disability_questions_right + koos_syptoms_questions_right + koos_pain_questions_right


# Label which knee
koos_df['Side'] = 'left'
right_knee_vars = koos_right_scores + koos_pain_questions_right + koos_syptoms_questions_right + koos_disability_questions_right
koos_df.loc[koos_df.VariableBase.isin(right_knee_vars), 'Side'] = 'right'
either_knee_vars = koos_function_score + koos_function_questions
koos_df.loc[koos_df.VariableBase.isin(either_knee_vars), 'Side'] = 'either'
qol_vars = koos_qol_score + koos_qol_questions
koos_df.loc[koos_df.VariableBase.isin(qol_vars), 'Side'] =  np.nan
koos_df.Side = koos_df.Side.astype('category')

# 12 collection dates X 81 questions = 972 variables
koos_df

In [None]:
# TODO: 
# Confirm WOMAC scores are as expected
# Confirm KOOS scores are as expected

## ICOAP
Measure of Intermittent and Constant Osteoarthritis Pain:  ICOAP

Constant pain subscale: To calculate the constant pain subscale, sum the scores for items 1 through 5. If subject did not report constant pain, assign a score of 0. This score can be transformed to a score out of 100 using the following formula:   (Total pain score / 20) x 100
* how intense
* affect sleep
* affect QOL
* how frustrated
* how upset

Intermittent pain subscale: To calculate the intermittent pain subscale, sum the scores for items 6 through 11. If subject did not report intermittent pain, assign a score of 0. This score can be transformed to a score out of 100 using the following formula:   (Total pain score / 24) x 100
* severity most intense pain
* frequency
* affect sleep
* affect QOL
* how frustrated
* how upset


Total pain score: To calculate the total pain score, sum the constant and intermittent pain subscales.  Maximum total pain score ranges from 0-44.   This score can be transformed to a score out of 100 using the following formula:   (Total pain score / 44) x 100

### Knee
Only asked on V06-V10

3 additional questions asking about pain in the past 7 days (each knee)
* KPN[L/R]7 - any (wasn't asked on V10)
* CKPN[L/R]7 - constant
* IKPN[L/R]7 - intermittent

Two questions were added to the intermittent list during V10:
* IP[L/R]KN7 - how often come on without warning
* IP[L/R]KN8 - how often occur after specific trigger

In [None]:
# Create Knee ICOAP dataframe
icoap_df = vars_cat_df[vars_cat_df.Category.str.contains('Knee symptoms') & vars_cat_df.Subcategory.str.contains('ICOAP')]
icoap_df = icoap_df.drop_duplicates(subset='Variable')
icoap_df['Visit'] = icoap_df.Variable.str[:3].astype('category')
icoap_df['VariableBase'] = icoap_df.Variable.str[3:].astype('category')

# Label all the scores
icoap_constant_score_left, icoap_constant_score_right = ['CPSKL'], ['CPSKR'] 
icoap_intermittent_score_left, icoap_intermittent_score_right = ['IPSKL'], ['IPSKR']
icoap_total_score_left, icoap_total_score_right = ['ICPTSKL'], ['ICPTSKR']
icoap_right_scores = icoap_constant_score_left + icoap_intermittent_score_left + icoap_total_score_left
icoap_left_scores = icoap_constant_score_right + icoap_intermittent_score_right + icoap_total_score_right
icoap_all_scores = icoap_left_scores + icoap_right_scores

icoap_df['ValueType'] = 'question'
icoap_df.loc[icoap_df.VariableBase.isin(icoap_all_scores), 'ValueType'] = 'score'
icoap_df.ValueType = icoap_df.ValueType.astype('category')

# Questions - any/const/intermittent past 7 days
icoap_any_left, icoap_any_right = ['KPNL7'], ['KPNR7']   # any knee pain last 7 days - not asked V10
icoap_constant_left, icoap_constant_right = ['CKPNL7'], ['CKPNR7'] # const knee pain last 7 days
icoap_intermittent_left, icoap_intermittent_right = ['IKPNL7'], ['IKPNR7']  # intermittent knee pain last 7 days

icoap_constant_questions_left, icoap_constant_questions_right = left_right_series(2, 'CPLKN', 5)
icoap_intermittent_questions_left, icoap_intermittent_questions_right = left_right_series(2, 'IPLKN', 8) # Q7,8 only asked during V10

# Label which knee
icoap_df['Side'] = 'left'
right_knee_vars = icoap_right_scores + icoap_any_right + icoap_constant_right + icoap_intermittent_right + icoap_constant_questions_right + icoap_intermittent_questions_right
icoap_df.loc[icoap_df.VariableBase.isin(right_knee_vars), 'Side'] = 'right'
icoap_df.Side = icoap_df.Side.astype('category')
# (4 collection dates X 34 questions) + (1 collection dates X 36 questions) = 172 variables
icoap_df

### Hip
Only asked on V08, V10

3 additional questions asking about pain in the past 7 days (each knee)
* HPN[L/R]7 - any (wasn't asked on V10)
* CHPN[L/R]7 - constant
* IHPN[L/R]7 - intermittent

Two questions were added to the intermittent list during V10:
* IP[L/R]HP7 - intermittent pain: how often come on without warning
* IP[L/R]HP8 - intermittent pain: how often occur after specific trigger

In [None]:
# Create Hip ICOAP dataframe
icoap_hip_df = vars_cat_df[vars_cat_df.Category.str.contains('Hip symptoms') & vars_cat_df.Subcategory.str.contains('ICOAP')]
icoap_hip_df = icoap_hip_df.drop_duplicates(subset='Variable')
icoap_hip_df['Visit'] = icoap_hip_df.Variable.str[:3].astype('category')
icoap_hip_df['VariableBase'] = icoap_hip_df.Variable.str[3:].astype('category')

# Label all the scores
icoap_constant_score_left, icoap_constant_score_right = ['CPSHL'], ['CPSHR'] 
icoap_intermittent_score_left, icoap_intermittent_score_right = ['IPSHL'], ['IPSHR']
icoap_total_score_left, icoap_total_score_right = ['ICPTSHL'], ['ICPTSHR']
icoap_right_scores = icoap_constant_score_left + icoap_intermittent_score_left + icoap_total_score_left
icoap_left_scores = icoap_constant_score_right + icoap_intermittent_score_right + icoap_total_score_right
icoap_all_scores = icoap_left_scores + icoap_right_scores

icoap_hip_df['ValueType'] = 'question'
icoap_hip_df.loc[icoap_hip_df.VariableBase.isin(icoap_all_scores), 'ValueType'] = 'score'
icoap_hip_df.ValueType = icoap_hip_df.ValueType.astype('category')

# Questions - any/const/intermittent past 7 days
icoap_any_left, icoap_any_right = ['HPNL7'], ['HPNR7']   # any knee pain last 7 days - not asked V10
icoap_constant_left, icoap_constant_right = ['CHPNL7'], ['CHPNR7'] # const knee pain last 7 days
icoap_intermittent_left, icoap_intermittent_right = ['IHPNL7'], ['IHPNR7']  # intermittent knee pain last 7 days

icoap_constant_questions_left, icoap_constant_questions_right = left_right_series(2, 'CPLHP', 5)
icoap_intermittent_questions_left, icoap_intermittent_questions_right = left_right_series(2, 'IPLHP', 8) # Q7,8 only asked during V10

# Label which knee
icoap_hip_df['Side'] = 'left'
right_hip_vars = icoap_right_scores + icoap_any_right + icoap_constant_right + icoap_intermittent_right + icoap_constant_questions_right + icoap_intermittent_questions_right
icoap_hip_df.loc[icoap_hip_df.VariableBase.isin(right_hip_vars), 'Side'] = 'right'
icoap_hip_df.Side = icoap_hip_df.Side.astype('category')
# (1 collection dates X 34 questions) + (1 collection dates X 36 questions) = 70 variables
icoap_hip_df

## Outcomes

In [None]:
outcomes_df = get_variables_by_category('Outcomes').copy()
# 89 variables, all V99
len(outcomes_df)

In [None]:
knee_replacement_df = get_variables_by_category('Outcomes', 'Knee replacement').copy()
print(len(knee_replacement_df))
# V99RNTCNT                               Most recent OAI contact
# 14 L/R knee questions
# V99ERKVSRP    OAI visit follow-up knee replacement self-reported at
# V99ERKRPCF    follow-up knee replacement adjudication/confirmation status
# V99ERKRPSN    knee replacement seen on follow-up OAI x-ray
# V99ERKDATE    date of follow-up knee replacement
# V99ERKFLDT    date flag, date of follow-up knee replacement from self report or adjudicated from medical records
# V99ERKTLPR    total or partial follow-up knee replacement (calc)
# V99ERKTPPR    type of partial follow-up knee replacement
# V99ERKPODX    primary pre-operative diagnosis
# V99ERKBLRP    knee replacement seen on baseline OAI x-ray
# V99ERKDAYS    days between enrollment visit and follow-up knee replacement
# V99ERKVSPR    closest OAI contact prior to follow-up knee replacement
# V99ERKVSAF    closest OAI contact after to follow-up knee replacement
# V99ERKXRPR    closest OAI visit with knee x-ray prior to follow-up knee replacement
# V99ERKXRAF    closest OAI visit with knee x-ray after follow-up knee replacement
pd.set_option('display.max_colwidth', None)
print_variables('Outcomes', 'Knee replacement')

In [None]:
hip_replacement_df = get_variables_by_category('Outcomes', 'Hip replacement').copy()
len(hip_replacement_df)

In [None]:
death_df = get_variables_by_category('Outcomes', 'Death').copy()
len(death_df)

In [None]:
xray_df = get_variables_by_category('Outcomes', 'X-ray outcomes').copy()
len(xray_df)

In [None]:
print_variables('Outcomes', 'Knee replacement')

In [None]:
# What about perf measures, strength measures, and knee exam?
#Look into lifetime activity questions (swimming, ping pong