In [567]:
#Packages
import pandas as pd

#Options
pd.options.display.float_format = '{:,.2f}'.format


In [568]:


#Specify column data types for read_excel 'dtype' parameter using a dictionary
#Can we convert some of these into boolean types and enums
varus_valgus_dtypes = {
    'Study ID':int,
    'Laterality (Right =1; Left = 2)':float,
    'Team Member Med Hist Review':str,
    'Team Member XRay Review':str,
    'Meets Inclusion: (Y=1; N=2)':str,
    'Nav or Conventional (Nav=1; Con=2)':float,
    'Nav #':float,
    'MD (Phillips = 1; Rachala = 2)':str,
    'Age at Surgery':float,
    'Length of FU (Days)':float,
    'Sex (M=1; F=2)':float,
    'Ht (in)':float,
    'Wt (lbs)':float,
    'BMI (kg/m2)':float,
    'Osteoporosis/Osteopenia (Y=1; N=2)':str,
    'Smoker (Current=1; Former=2; Never=3)':str,
    'Diabetes (Y=1; N=2)':str,
    'Comorbitities (Less than or equal to two = 1; Three or more = 2) WRITE "Less" or "More"':float,
    'Diagnosis (OA = 1; IA = 2; PTOA = 3; OA + IA = 4; OA + PTOA = 5)':str,
    'Presence of PreOp Flexion Contracture (Y=1; N=2)':str,
    'Nav PreOp Measurement (degree; Valgus is negative)':float,
    'Nav PostOp Measurement (degree; Valgus is negative)':float,
    'Nav PreOp Check':str,
    'Nav PostOp Check':str,
    'Group (Control=1; Outlier=2)':str,
    'PreOp Extension (degrees; hyperextension is negative)':float,
    'PostOp Extension at Final FU (degrees; hyperextension is negative)':float,
    'PreOp Flexion (degrees)':float,
    'PostOp Flexion (degrees)':float,
    'Tourniquet time (min)':float,
    'OR time (min)':float,
    'Estimated Blood Loss (ml)':float,
    'Tibial plateau fracture (Y=1; N=2)':str,
    'Tibial component size':float,
    'Femoral component size':float,
    'Length of Stay (days)':float,
    'Discharge destination (1=home; 2=rehab)':str,
    'Readmission w/in 90 days (Y=1; N=2)':str,
    'Reason for Readmission':str,
    'Reoperation (Y=1; N=2)':str,
    'Reason for reoperation':str,
    'PostOp Infection  (Y=1; N=2)':str,
    'PreOp Mechanical axis':float,
    '3mo PostOp Mechanical axis':float,
    'Final FU Mechanical axis':float,
    'PreOp Posterior Tibial Slope':float,
    'PostOp Posterior Tibial Slope':float,
    'PreOp Proximal Medial Tibial Angle':float,
    'PostOp Proximal Medial Tibial Angle':float,
    'PreOp Lateral Distal Femoral Angle':float,
    '3 mo PostOp Lateral Distal Femoral Angle':float,
    'Final FU Lateral Distal Femoral Angle':float,
    'PostOp Patellar Tilt':float,
    'Evidence of Hardware Failure (Y=1; N=2)':str,
    'Fracture   (Y=1; N=2)':str,
    'Subsidence   (Y=1; N=2)':str,
    'Loosening   (Y=1; N=2)':str,
    'Polyethylene Wear (Y=1; N=2)':str,
    '3mo PostOp Radiolucent lines (Y=1; N=2)':str,
    'Final FU Radiolucent lines (Y=1; N=2)':str,
    '3mo PostOp Stress shielding (Y=1; N=2)':str,
    'Final FU Stress shielding (Y=1; N=2)':str,
    'UBMD General Function VAS Score 2 wk':float,
    'UBMD General Function VAS Score 6 wk':float,
    'UBMD General Function VAS Score 3 mo':float,
    'UBMD General Function VAS Score 1 yr':float,
    'UBMD General Pain VAS Score 2 wk':float,
    'UBMD General Pain VAS Score 6 wk':float,
    'UBMD General Pain VAS Score 3 mo':float,
    'UBMD General Pain VAS Score 1 yr':float,
    'SF-12 Physical Health Score 6 wk':float,
    'SF-12 Physical Health Score 3 mo':float,
    'SF-12 Physical Health Score 1 yr':float,
    'VR-12 Physical Score 6 wk':float,
    'VR-12 Physical Score 3 mo':float,
    'VR-12 Physical Score 1 yr':float,
    'VR6D Score 6 wk':float,
    'VR6D Score 3 mo':float,
    'VR6D Score 1 yr':float,
    'KSS-Functional Knee Score 2 wk':float,
    'KSS-Functional Knee Score 6 wk':float,
    'KSS-Functional Knee Score 3 mo':float,
    'KSS-Functional Knee Score 1 yr':float,
    'KSS-Objective Knee Score (Pain) Score 2 wk':float,
    'KSS-Objective Knee Score (Pain) Score 6 wk':float,
    'KSS-Objective Knee Score (Pain) Score 3 mo':float,
    'KSS-Objective Knee Score (Pain) Score 1 yr':float,
    'LEAS Score 2 wk':float,
    'LEAS Score 6 wk':float,
    'LEAS Score 3 mo':float,
    'LEAS Score 1 yr':float
}

#Read-in the data file
varus_valgus_original = pd.read_excel('varus_valgus.xlsx', dtype=varus_valgus_dtypes)


#Questions for the group:
# Will we treat varus and valgus separately?
# Will we look at Rachala and Philips separately or as a group?

In [569]:
#Consider a dual index for outliers (use zip command and hier_index)
varus_valgus_original.set_index('Study ID', inplace=True)

In [570]:
#Data cleaning step (missing data etc.)
#df.dropna(axis=1) for columns df.fillna()

In [571]:
#Filter the original data for outliers
#Any deviation of mechanical axis of 10 or more from normal, according to either Nav or XR measurement
varus_valgus_outliers = varus_valgus_original[(abs(varus_valgus_original['PreOp Mechanical axis']) >= 10) |
                                              (abs(varus_valgus_original['Nav PreOp Measurement (degree; Valgus is negative)']) >= 10)]
varus_valgus_outliers

Unnamed: 0_level_0,Laterality (Right =1; Left = 2),Team Member Med Hist Review,Team Member XRay Review,Meets Inclusion: (Y=1; N=2),Nav or Conventional (Nav=1; Con=2),Nav #,MD (Phillips = 1; Rachala = 2),Age at Surgery,Length of FU (Days),Sex (M=1; F=2),...,KSS-Functional Knee Score 3 mo,KSS-Functional Knee Score 1 yr,KSS-Objective Knee Score (Pain) Score 2 wk,KSS-Objective Knee Score (Pain) Score 6 wk,KSS-Objective Knee Score (Pain) Score 3 mo,KSS-Objective Knee Score (Pain) Score 1 yr,LEAS Score 2 wk,LEAS Score 6 wk,LEAS Score 3 mo,LEAS Score 1 yr
Study ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,2.00,Hailley,Andrey,1,1.00,3056.00,1,69.00,123.00,2.00,...,,,,,,,,,,
15,2.00,Hailley,Andrey,1,1.00,3066.00,1,71.00,106.00,1.00,...,,,,,,,,,,
17,1.00,Hailley,Andrey,1,1.00,3068.00,1,58.00,169.00,1.00,...,,,,,,,,,,
22,2.00,Hailley,Andrey,1,1.00,3074.00,1,54.00,,2.00,...,,,,,,,,,,
29,1.00,Hailley,Andrey,1,1.00,3082.00,1,77.00,,1.00,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,1.00,Andrey,,1,1.00,3050.00,1,51.00,,2.00,...,,,,,,,,,,
2,1.00,Andrey,,1,1.00,3051.00,1,63.00,,1.00,...,,,,,,,,,,
9,2.00,Andrey,,1,1.00,3058.00,1,74.00,,2.00,...,,,,,,,,,,
308,1.00,Andrey,,1,1.00,3433.00,1,62.00,,1.00,...,,,,,,,,,,


In [572]:
#Verify varus valgus discrepancy b/w Nav and manual
varus_outliers = varus_valgus_outliers = varus_valgus_outliers[(varus_valgus_outliers['PreOp Mechanical axis'] >= 10) |
                                                              (varus_valgus_outliers['Nav PreOp Measurement (degree; Valgus is negative)'] >= 10)]

#Display discrepancies where XR measurement of mechanical axis does not equal NAV data
varus_outliers[varus_outliers['PreOp Mechanical axis'] != varus_outliers['Nav PreOp Measurement (degree; Valgus is negative)']][['PreOp Mechanical axis', 'Nav PreOp Measurement (degree; Valgus is negative)']]

Unnamed: 0_level_0,PreOp Mechanical axis,Nav PreOp Measurement (degree; Valgus is negative)
Study ID,Unnamed: 1_level_1,Unnamed: 2_level_1
7,13.00,10.00
17,15.00,11.50
22,12.00,10.00
29,12.00,10.00
38,12.00,10.50
...,...,...
497,12.00,10.00
498,13.00,10.00
1,11.00,9.00
2,10.00,8.00


In [573]:
#Filter the original data for outliers
#Any deviation of mechanical axis of 10 or more from normal, according to either Nav or XR measurement
varus_valgus_outliers = varus_valgus_original[(abs(varus_valgus_original['PreOp Mechanical axis']) >= 10) |
                                              (abs(varus_valgus_original['Nav PreOp Measurement (degree; Valgus is negative)']) >= 10)]

In [574]:
#Favor XR measurements over Nav
varus_outliers = varus_valgus_outliers[(varus_valgus_outliers['PreOp Mechanical axis'] >= 10)]
varus_outliers

Unnamed: 0_level_0,Laterality (Right =1; Left = 2),Team Member Med Hist Review,Team Member XRay Review,Meets Inclusion: (Y=1; N=2),Nav or Conventional (Nav=1; Con=2),Nav #,MD (Phillips = 1; Rachala = 2),Age at Surgery,Length of FU (Days),Sex (M=1; F=2),...,KSS-Functional Knee Score 3 mo,KSS-Functional Knee Score 1 yr,KSS-Objective Knee Score (Pain) Score 2 wk,KSS-Objective Knee Score (Pain) Score 6 wk,KSS-Objective Knee Score (Pain) Score 3 mo,KSS-Objective Knee Score (Pain) Score 1 yr,LEAS Score 2 wk,LEAS Score 6 wk,LEAS Score 3 mo,LEAS Score 1 yr
Study ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,2.00,Hailley,Andrey,1,1.00,3056.00,1,69.00,123.00,2.00,...,,,,,,,,,,
15,2.00,Hailley,Andrey,1,1.00,3066.00,1,71.00,106.00,1.00,...,,,,,,,,,,
17,1.00,Hailley,Andrey,1,1.00,3068.00,1,58.00,169.00,1.00,...,,,,,,,,,,
22,2.00,Hailley,Andrey,1,1.00,3074.00,1,54.00,,2.00,...,,,,,,,,,,
29,1.00,Hailley,Andrey,1,1.00,3082.00,1,77.00,,1.00,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,2.00,Hailley,,1,1.00,3661.00,1,57.00,362.00,2.00,...,,,,,,,,,,
498,1.00,Hailley,,1,1.00,3663.00,1,74.00,328.00,1.00,...,20.00,,,,50.00,,,,6.00,
1,1.00,Andrey,,1,1.00,3050.00,1,51.00,,2.00,...,,,,,,,,,,
2,1.00,Andrey,,1,1.00,3051.00,1,63.00,,1.00,...,,,,,,,,,,


In [575]:
#Define a function to obtain descriptive statistics for a df
def get_descr_stats(df, column_names):
    return df[column_names].describe().transpose()


#Lists of columns for continuous varus data
basic_columns = ["Age at Surgery", "Length of FU (Days)", "BMI (kg/m2)"]
nav_columns = varus_outliers.columns.values[19:21].tolist()
misc_columns = varus_outliers.columns.values[24:31].tolist()
xr_columns = varus_outliers.columns.values[41:52].tolist()
column_lst = basic_columns + nav_columns + misc_columns + xr_columns


#Compute descriptive statistics
get_descr_stats(varus_outliers, column_lst)



Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age at Surgery,94.0,67.28,8.88,51.0,61.0,66.5,73.0,89.0
Length of FU (Days),79.0,553.01,582.06,43.0,208.5,372.0,714.0,4068.0
BMI (kg/m2),94.0,34.84,9.21,20.94,27.37,33.36,40.33,63.41
Nav PreOp Measurement (degree; Valgus is negative),94.0,12.09,3.42,-11.5,10.5,12.0,13.5,22.0
Nav PostOp Measurement (degree; Valgus is negative),94.0,1.42,1.06,-1.0,0.5,1.5,2.0,4.0
PreOp Extension (degrees; hyperextension is negative),93.0,6.78,7.0,-12.0,2.5,7.5,10.5,23.5
PostOp Extension at Final FU (degrees; hyperextension is negative),93.0,2.28,1.32,-2.0,1.5,2.5,3.0,5.0
PreOp Flexion (degrees),93.0,133.65,9.3,107.5,129.0,134.5,139.5,162.5
PostOp Flexion (degrees),92.0,132.45,9.23,113.0,126.5,131.75,136.75,160.5
Tourniquet time (min),69.0,11.77,12.38,4.0,5.0,7.0,12.0,71.0
