# Comparing the SphygmoCor v8 and v9

* 1821 participants measured by OLD (v8) machine TWICE
* 786 participants measured by NEW (v9) machine TWICE
    * 1656 participants measured by NEW (v9) machine at least ONCE
* 73 partipicants measured by both NEW and OLD machine
* Goal: Pairwise t-test among overlapping 73 participants to ascertain if machines are calibrated.

# Load SardiNIA Database

In [1]:
library(readr)

In [2]:
full_db <- read_tsv( '20171017_SardiNIA_WaveIV_only.txt')

Parsed with column specification:
cols(
  .default = col_double(),
  id_individual = col_integer(),
  Wave = col_integer(),
  FirstVisitDate = col_date(format = ""),
  SecondVisitDate = col_date(format = ""),
  ThirdVisitDate = col_date(format = ""),
  FourthVisitDate = col_date(format = ""),
  labsHbA1Cdx = col_character(),
  labsG6PD = col_character(),
  labsZnPP = col_character(),
  labsBilirubinad = col_character(),
  labsBilirubinat = col_character(),
  labsSodiemia = col_character(),
  labsPotassiemia = col_character(),
  labsPCR = col_character(),
  labsTie = col_character(),
  disMIname = col_character(),
  disMIwhen = col_date(format = ""),
  disAPname = col_character(),
  disAPwhen = col_date(format = ""),
  disHFname = col_character()
  # ... with 192 more columns
)
See spec(...) for full column specifications.


In [3]:
# print # rows, # cols
dim(full_db)

In [4]:
head(full_db)

id_individual,id_sir,id_mad,Wave,Visit,Age,Sex,Education,Occupation,MaritalStatus,⋯,SphPWVSecondM_DT_DIST,SphPWVSecondM_N_Measurements,SphPWVSecondM_PP_DEVIATION,SphPWVSecondM_PP_MDT,SphPWVSecondM_PWV,SphPWVSecondM_PWVERR,SphPWVSecondM_PWV_DIST,SphPWVSecondM_PX_DIST,SphPWVSecondM_SP,SphPWVSecondM_date
2,1573,1,4,3,36.8,1,5,71,1,⋯,,,,,,,,,,
7,6756,16525,4,4,72.8,1,3,92,2,⋯,,,,,,,,,,
8,7,8546,4,3,43.6,1,5,140,3,⋯,,,,,,,,,,
10,7,8546,4,3,37.3,0,4,72,3,⋯,,,,,,,,,,
12,7,8546,4,1,22.9,0,4,97,0,⋯,,,,,,,,,,
13,16137,15362,4,4,76.3,1,2,70,1,⋯,570.0,3.0,4.0651,62.90355,7.708743,0.5383492,480.0,90.0,106.0,2015-12-01


# Count the number of participants measured by the various machines

In [5]:
library(tidyr)
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



## How many measured by OLD machine at least once?

In [6]:
sum( !is.na( full_db %>% select( 'SphfirstM_SP') ))

## How many measured by OLD machine twice?

In [7]:
sum( !is.na( full_db %>% select( 'SphsecondM_SP') ))

## How many measured by NEW machine at least once?

In [8]:
sum( !is.na( full_db %>% select( 'SphPWAFirstM_SP') ))

## How many measured by NEW machine twice?

In [9]:
sum( !is.na( full_db %>% select( 'SphPWASecondM_SP') ))

# Get the trait names for the OLD SphygmoCor machine

Grab all the column names starting with the prefix "SphfirstM_"

In [10]:
library(stringr)

In [11]:
old_names <- sort( str_subset( names(full_db), "SphfirstM_" ) )

In [12]:
head(old_names)

Drop the prefix "SphfirstM_" from the variable name by taking the substring 11 characters from the beginning of the name.

In [13]:
old_names <- sort( sapply( old_names, function(x) str_sub( x, 11), USE.NAMES=F ) )

In [14]:
head( old_names )

# Get the trait names for the NEW SphygmoCor machine

Grab all the column names starting with the prefix "SphPWAFirstM_"

In [15]:
new_names <- sort( str_subset( names(full_db), "SphPWAFirstM_" ) )

In [16]:
length( new_names )

In [17]:
head(new_names)

In [18]:
new_names <- sort( sapply( new_names, function(x) str_sub( x, 14), USE.NAMES=F ) )

In [19]:
head( new_names )

# Get the intersection of traits names measured by BOTH new and old machines

In [20]:
intersection <- sort( intersect( new_names, old_names ) )

In [21]:
intersection

# Read Database Description file to know what traits we're comparing

In [22]:
library(readxl)

In [23]:
descr_path<- '~/projects/david/sardiNIA_database/latest/20171006_Wave5_Update/DbDescription5.xlsx'
descriptions <- read_excel( descr_path ) %>% 
                    filter(TABLE == 'SPHIGMOCOR_PWA') %>% 
                    select( FIELD, DESCRIPTION )

In [24]:
head( descriptions)

FIELD,DESCRIPTION
ID_INDIVIDUAL,KEY
DATETIME,End Date & Time of Study
SP,Entered Systolic Pressure
DP,Entered Diastolic Pressur
HR,Heart Rate
ED,Ejection Duration (ms)


In [25]:
names( descriptions) <- c('name', 'description')

# For each trait, do a pairwise t-test

In [26]:
compare <- function( db, prefix1, prefix2 ) {
    n_vars <- length(intersection)
    # initialize empty results table
    results <- tibble( name=intersection,
                       n_obs=rep(NA_integer_, n_vars),
                       mean1=rep('', n_vars),
                       mean2=rep('', n_vars),
                       meandiff=rep('', n_vars),
                       std1=rep('', n_vars),
                       std2=rep('', n_vars),        
                       p_value=rep('', n_vars),
                       sig=rep('', n_vars),
                     )
    for( name in intersection ){
        old_col_name <- paste0( prefix1, name )
        new_col_name <- paste0( prefix2, name )
        subset <- db %>% select( id_individual, old_col_name, new_col_name ) %>% drop_na()
        old <- subset[[old_col_name]]
        new <- subset[[new_col_name]]
        test_result <- t.test( old, new, paired=T )
        if( !is.na( test_result$p.value ) ) {
            if( test_result$p.value < 5e-4 ) {
                sig <- '***' 
            } else if( test_result$p.value < 5e-3 ){
                sig <- '**' 
            } else if( test_result$p.value < 5e-2 ){
                sig <- '*' 
            } else if( test_result$p.value < 0.1 ){
                sig <- '.' 
            } else {
                sig <- ''
            }
        } else {
            sig <- ''
        }
        results[ results$name == name, -1] <- c( 
            length(old),
            sprintf( "%0.1f", mean(old) ), 
            sprintf( "%0.1f", mean(new) ), 
            sprintf( "%0.1f", test_result$estimate ), 
            sprintf( "%0.1f", sd(old) ),
            sprintf( "%0.1f", sd(new) ), 
            sprintf( "%0.3f", test_result$p.value ),
            sig
        )
    }
    results <- right_join( descriptions, results, by='name')
    # Shuffle order of columns
    return( results[c( 'description', 'name','n_obs', 'mean1', 'mean2', 'meandiff', 'std1', 'std2', 'p_value', 'sig') ] )
}

# Results

## Comparison 1: OLD Sph FIRST Measurement vs. OLD Sph SECOND Measurement

In [27]:
compare( full_db, 'SphfirstM_', 'SphsecondM_')

description,name,n_obs,mean1,mean2,meandiff,std1,std2,p_value,sig
Central Aug/PH %,C_AGPH,1821,30.2,30.7,-0.5,234.2,234.2,0.954,
Central Augmentation Index,C_AI,1821,143.3,144.4,-1.1,232.7,232.8,0.885,
Central Augmented Pressure,C_AP,1821,15.0,15.2,-0.2,234.2,234.2,0.983,
Central Diastolic Duration,C_DD,1821,598.4,596.0,2.4,119.9,119.3,0.017,*
Period-ED/Period %,C_DD_PERIOD,1821,64.4,64.4,0.1,4.0,4.0,0.06,.
Central Diastolic Pressure,C_DP,1821,73.3,73.3,-0.0,10.0,10.0,0.422,
Central Diastolic Time Index,C_DTI,1821,3230.5,3229.7,0.8,483.7,483.4,0.726,
Central ED/Period %,C_ED_PERIOD,1821,35.6,35.7,-0.1,4.0,4.0,0.058,.
Central End Systolic Pressure,C_ESP,1821,99.1,99.2,-0.1,15.6,15.5,0.309,
Central Mean Pressure,C_MEANP,1821,89.0,89.0,-0.1,12.3,12.3,0.064,.


## Comparison 2: NEW Sph FIRST Measurement vs. NEW Sph SECOND Measurement

In [28]:
compare( full_db, 'SphPWAFirstM_', 'SphPWASecondM_')

description,name,n_obs,mean1,mean2,meandiff,std1,std2,p_value,sig
Central Aug/PH %,C_AGPH,786,31.0,35.8,-4.8,177.9,356.2,0.735,
Central Augmentation Index,C_AI,786,144.0,147.9,-3.9,177.1,352.9,0.784,
Central Augmented Pressure,C_AP,786,16.4,21.8,-5.4,178.1,356.4,0.702,
Central Diastolic Duration,C_DD,786,607.7,580.4,27.3,123.3,114.9,0.0,***
Period-ED/Period %,C_DD_PERIOD,786,64.7,63.9,0.8,4.0,3.9,0.0,***
Central Diastolic Pressure,C_DP,786,76.1,75.0,1.1,11.6,10.9,0.07,.
Central Diastolic Time Index,C_DTI,786,3362.9,3268.4,94.5,533.3,513.4,0.0,***
Central ED/Period %,C_ED_PERIOD,786,35.3,36.1,-0.8,4.0,3.9,0.0,***
Central End Systolic Pressure,C_ESP,786,102.9,100.6,2.3,16.4,16.0,0.004,**
Central Mean Pressure,C_MEANP,786,92.4,90.8,1.5,13.3,12.9,0.022,*


## Comparison 3: OLD Sph FIRST Measurement vs. NEW Sph FIRST Measurement

In [29]:
compare( full_db, 'SphfirstM_', 'SphPWAFirstM_')

description,name,n_obs,mean1,mean2,meandiff,std1,std2,p_value,sig
Central Aug/PH %,C_AGPH,73,23.9,26.0,-2.2,13.8,14.7,0.022,*
Central Augmentation Index,C_AI,73,136.0,141.9,-5.9,26.5,31.5,0.017,*
Central Augmented Pressure,C_AP,73,8.3,10.4,-2.1,6.0,7.8,0.0,***
Central Diastolic Duration,C_DD,73,574.9,599.2,-24.4,127.0,132.7,0.042,*
Period-ED/Period %,C_DD_PERIOD,73,63.8,64.3,-0.5,4.1,3.8,0.195,
Central Diastolic Pressure,C_DP,73,74.0,77.6,-3.7,10.1,11.2,0.002,**
Central Diastolic Time Index,C_DTI,73,3211.4,3403.9,-192.5,504.9,533.1,0.0,***
Central ED/Period %,C_ED_PERIOD,73,36.2,35.7,0.5,4.2,3.8,0.162,
Central End Systolic Pressure,C_ESP,73,98.1,104.5,-6.4,14.5,15.9,0.0,***
Central Mean Pressure,C_MEANP,73,88.8,93.7,-5.0,12.0,12.8,0.0,***


## Comparison 4: OLD Sph SECOND Measurement vs. NEW Sph FIRST Measurement

In [30]:
compare( full_db, 'SphsecondM_', 'SphPWAFirstM_')

description,name,n_obs,mean1,mean2,meandiff,std1,std2,p_value,sig
Central Aug/PH %,C_AGPH,71,24.8,26.0,-1.2,14.7,14.6,0.22,
Central Augmentation Index,C_AI,71,138.5,141.9,-3.4,30.1,31.6,0.198,
Central Augmented Pressure,C_AP,71,8.6,10.4,-1.8,6.3,7.9,0.001,**
Central Diastolic Duration,C_DD,71,570.8,602.5,-31.6,127.3,133.0,0.012,*
Period-ED/Period %,C_DD_PERIOD,71,63.7,64.4,-0.7,4.1,3.8,0.097,.
Central Diastolic Pressure,C_DP,71,74.2,77.8,-3.6,10.2,11.2,0.003,**
Central Diastolic Time Index,C_DTI,71,3215.4,3411.3,-195.9,507.8,538.6,0.0,***
Central ED/Period %,C_ED_PERIOD,71,36.3,35.6,0.7,4.1,3.8,0.087,.
Central End Systolic Pressure,C_ESP,71,98.3,104.5,-6.2,14.7,16.1,0.0,***
Central Mean Pressure,C_MEANP,71,89.0,93.8,-4.8,12.2,13.0,0.0,***
