_analysis.ipynb (R): generate figures_

Motor phenotypes in 16p11.2 deletion

# Setup

In [None]:
library(here)

## Load functions

In [None]:
source(here('_functions.r'))

## Initialize environment

In [None]:
# 0_initialize.r
# created 20201113

library(conflicted) # loads first so it can deal with namespace collisions
library(here)

library(arm)
library(broom)
library(data.table)
library(fs)
library(janitor)
library(skimr)
library(tidymodels)
library(tidyverse)

options(width=200)

conflict_prefer('filter', 'dplyr', quiet = TRUE)
conflict_prefer('select', 'dplyr', quiet = TRUE)
conflict_prefer("rescale", "arm", quiet = TRUE)
conflict_prefer("chisq.test", "stats")

# definitions
BASE_PATH <- path('data', 'raw')
DIR_16P <- path('Simons_Searchlight_Phase1_16p11.2_Dataset_v11.0')
DIR_SEARCHLIGHT <- path('Simons_Searchlight_Dataset_v9.2', 'all_cohorts')
DIR_TABLE_OUTPUT <- here("tables","raw")

JOIN_BY <- c('individual', 
             'individual.family', 
             'family_type', 
             'collection')

JOIN_BY_COMPARISON <- c('sfari_id', 
                        'genetic_status', 
                        'genetic_status_source', 
                        'inheritance_status',
                        'inheritance_status_source',
                        'sex',
                        'relationship_to_iip',
                        'additional_variants',
                        'has_potential_confound',
                        'deceased')

message("Initialized environment")

## Import

In [None]:
# 1_import.r
# 20201113

phenotype_dataframes <- load_data(here(path(
    BASE_PATH, DIR_16P)))

message("Imported data")

## Merge

In [None]:
# 2_preprocess.r
# created 20220826

# 16p data

## prepare for merge
phenotype_dataframes$subjects <- phenotype_dataframes$`svip_subjects_16p11.2` |> 
    rename(individual = sfari_id, 
           individual.family = family)

phenotype_dataframes$diagnoses <- phenotype_dataframes$diagnosis_summary |> 
    rename(individual.family = family)

phenotype_dataframes$dkefs <- phenotype_dataframes$d_kefs |> 
    rename(individual.family = family)

df <- phenotype_dataframes$subjects |> 
    strict_left_join(phenotype_dataframes$diagnoses) |>
    strict_left_join(phenotype_dataframes$dkefs) |>
    strict_left_join(phenotype_dataframes$srs_parent) |>
    strict_left_join(phenotype_dataframes$srs_adult) |>
    strict_left_join(phenotype_dataframes$vineland_ii) |>
    strict_left_join(phenotype_dataframes$wasi) |>
    strict_left_join(phenotype_dataframes$purdue_pegboard) |>
    strict_left_join(phenotype_dataframes$bapq) |>
    strict_left_join(phenotype_dataframes$movement_abc_2) |>
    strict_left_join(phenotype_dataframes$adi_r) |>
    strict_left_join(phenotype_dataframes$svip_neuro_exam) |>
    strict_left_join(phenotype_dataframes$svip_background_history) |>
    strict_left_join(clean_df(phenotype_dataframes$previous_diagnosis))

message("Merged")

## Transform

In [None]:
df <- df |> 
    mutate(genetic_status = factor(genetic_status_16p)) |>
    mutate(sex_factor = factor(sex)) |>

    mutate(age_years = (age_months/12)) |>

    mutate(duplication = if_else(genetic_status_16p == "duplication", 1, 0, 0)) |>
    mutate(deletion = if_else(genetic_status_16p == "deletion", 1, 0, 0)) |>
    mutate(triplication = if_else(genetic_status_16p == "triplication", 1, 0, 0)) |>
    mutate(control = if_else(duplication == 0 & deletion == 0 & triplication == 0, 1, 0, 0)) |>

    mutate(iip = if_else(relationship_to_iip == "Initially identified proband", 1, 0, 0)) |>

    mutate(movement_disorder = if_else(
        (stereotype_motor_disorder == 1 | coordination_disorder == 1), 1, 0, 0)) |>

    mutate(srs_coordinated = if_else(
        !is.na(srs_adult.srs_adult_raw.q14_coordinated), 
        srs_adult.srs_adult_raw.q14_coordinated, 
        srs_parent.srs_parent_raw.q14_well_coordinated)) |>

    mutate(srs_raw = if_else(!is.na(srs_parent.total), srs_parent.total, srs_adult.total)) |>

    mutate(mabc_percentile_transformed = 
        (100 - as.numeric(movement_abc_2.movement_abc_2_ss.total_percentile))) |>

    mutate(mabc_aim_catch_percentile_transformed = 
        (100 - as.numeric(movement_abc_2.movement_abc_2_ss.aimcatch_percentile))) |>

    mutate(mabc_balance_percentile_transformed = 
        (100 - as.numeric(movement_abc_2.movement_abc_2_ss.balance_percentile)))

df$sex_numeric <- recode(df$sex, female=1, male=0)

df$coordination_disorder <- df$coordination_disorder |>
    as_factor() 

df <- df |> filter(
    genetic_status == "deletion" | 
    genetic_status == "duplication" | 
    genetic_status == "negative")

df <- df |> filter(!is.na(age_years))

df_control <- df |> filter(deletion==0 & duplication==0)
df_cnv <- df |> filter(duplication == 1 | deletion == 1) #|> filter(age_years<=18)
df_duplication <- df_cnv |> filter(duplication==1)
df_deletion <- df_cnv |> filter(deletion==1)


message("Transformed")

In [None]:
# setup
df$genetic_status <- factor(df$genetic_status, 
    ordered = TRUE, 
    levels = c('deletion', 'negative', 'duplication', 'triplication'))

In [None]:
genetic_status_labels <- c(
    deletion = '16p11.2 deletion', 
    negative = 'No CNV',
    duplication = '16p11.2 duplication')

In [None]:
df$genetic_status_labelled_no_n <- recode_factor(df$genetic_status, !!!genetic_status_labels)

# Table 1

In [None]:
df |> count()

In [None]:
df |> group_by(genetic_status) |> count()

## SRS coordination

In [None]:
df_deletion |> summarize_continuous_variable(srs_coordinated)

In [None]:
df_control |> group_by(iip) |> summarize_continuous_variable(srs_coordinated)

In [None]:
df_duplication |> group_by(iip) |> summarize_continuous_variable(srs_coordinated)

In [None]:
df_deletion |> group_by(iip) |> summarize_continuous_variable(srs_coordinated)

In [None]:
df |> summarize_continuous_variable(srs_coordinated)

df |> group_by(genetic_status) |>
    summarize_continuous_variable(srs_coordinated)

srs_aov <- aov(srs_coordinated ~ genetic_status, data=df)
srs_aov |> tidy()
TukeyHSD(srs_aov)

## Age

### Comparison with familial controls

In [None]:
df |> summarize_continuous_variable(age_years)

df |> group_by(genetic_status) |>
    summarize_continuous_variable(age_years)

age_aov <- aov(age_years ~ genetic_status, data = df)
age_aov |> tidy()
TukeyHSD(age_aov)

### Comparison within CNV

In [None]:
age_cnv <- df_cnv |> 
    summarize_continuous_variable(age_years)
age_dup_del <- df_cnv |> 
    group_by(genetic_status) |> 
    summarize_continuous_variable(age_years)
age <- bind_rows(age_cnv, age_dup_del)
age

age_t <- df_cnv |> 
    t_test(age_months ~ genetic_status) |> 
    add_column("variable" = "age", .before = 1)
age_t

In [None]:
age |> write_csv(here(DIR_TABLE_OUTPUT,"table1","age.csv"))
age_t |> write_csv(here(DIR_TABLE_OUTPUT,"table1","age_t.csv"))

## IQ

### Comparison with familial controls

In [None]:
df |> summarize_continuous_variable(best_full_scale_iq)

df |> group_by(genetic_status) |>
    summarize_continuous_variable(best_full_scale_iq)

iq_aov <- aov(best_full_scale_iq ~ genetic_status, data=df)
iq_aov |> tidy()
TukeyHSD(iq_aov)

### Comparison within CNV

In [None]:
iq_cnv <- df_cnv |> summarize_continuous_variable(best_full_scale_iq)
iq_dup_del <- df_cnv |> group_by(genetic_status) |> summarize_continuous_variable(best_full_scale_iq)
iq <- bind_rows(iq_cnv, iq_dup_del)
iq

iq_t <- df_cnv |> t_test(best_full_scale_iq ~ genetic_status) |> add_column("variable" = "iq", .before = 1)
iq_t

In [None]:
iq |> write_csv(here(DIR_TABLE_OUTPUT,"table1","iq.csv"))
iq_t |> write_csv(here(DIR_TABLE_OUTPUT,"table1","iq_t.csv"))

## Age of first independent walking

In [None]:
df |> summarize_continuous_variable(adi_r.adi_r.q05a_walked_unaideda)

df |> group_by(genetic_status) |>
    summarize_continuous_variable(adi_r.adi_r.q05a_walked_unaideda)

aow_aov <- aov(adi_r.adi_r.q05a_walked_unaideda ~ genetic_status, data=df)
aow_aov |> tidy()
TukeyHSD(aow_aov)

## Sex

In [None]:
sex <- df |> group_by(genetic_status) |> summarize_discrete_variable(sex)
sex

sex_chi <- stats::chisq.test(df$genetic_status, df$sex) |> 
    tidy() |> 
    add_column("variable" = "sex", .before = 1)
sex_chi

In [None]:
sex_cnv <- df_cnv |> summarize_discrete_variable(sex)
sex_dup_del <- df_cnv |> group_by(genetic_status) |> summarize_discrete_variable(sex)
sex <- bind_rows(sex_cnv, sex_dup_del)
sex

sex_chi <- stats::chisq.test(df_cnv$deletion, df_cnv$sex) |> 
    tidy() |> 
    add_column("variable" = "sex", .before = 1)
sex_chi

In [None]:
chisq.post.hoc(
    xtabs(~ sex + genetic_status, data = df),
    test = "chisq.test",
    popsInRows = FALSE,
    control = "bonferroni")

In [None]:
sex |> write_csv(here(DIR_TABLE_OUTPUT,"table1","sex.csv"))
sex_chi |> write_csv(here(DIR_TABLE_OUTPUT,"table1","sex_chi.csv"))

## ASD

In [None]:
df |> group_by(genetic_status) |> summarize_discrete_variable(latest_clinical_asd_dx)

In [None]:
asd_chi <- stats::chisq.test(df$genetic_status, df$latest_clinical_asd_dx) |> 
    tidy() |> 
    add_column("variable" = "sex", .before = 1)
asd_chi

In [None]:
asd_cnv <- df_cnv |> summarize_discrete_variable(latest_clinical_asd_dx)
asd_dup_del <- df_cnv |> group_by(genetic_status) |> summarize_discrete_variable(latest_clinical_asd_dx)
asd <- bind_rows(asd_cnv, asd_dup_del)
asd

asd_chi <- stats::chisq.test(df_cnv$deletion, df_cnv$latest_clinical_asd_dx) |> 
    tidy() |> 
    add_column("variable" = "sex", .before = 1)
asd_chi

In [None]:
chisq.post.hoc(
    xtabs(~ clinical_asd_dx + genetic_status, data = df),
    test = "chisq.test",
    popsInRows = FALSE,
    control = "bonferroni")

In [None]:
asd |> write_csv(here(DIR_TABLE_OUTPUT,"table1","asd.csv"))
asd_chi |> write_csv(here(DIR_TABLE_OUTPUT,"table1","asd_chi.csv"))

## SRS coordination

In [None]:
srs_del_aov <- aov(srs_coordinated ~ iip, data = df_deletion)
srs_del_aov |> tidy()

In [None]:
srs_dup_aov <- aov(srs_coordinated ~ iip, data = df_duplication)
srs_dup_aov |> tidy()

In [None]:
library(effectsize)

In [None]:
eta_squared(srs_del_aov, partial = FALSE)

In [None]:
eta_squared(srs_dup_aov, partial = FALSE)

In [None]:
df_deletion |> group_by(iip) |> summarize_continuous_variable(srs_coordinated)

In [None]:
df_duplication |> group_by(iip) |> summarize_continuous_variable(srs_coordinated)

In [None]:
df_control |> summarize_continuous_variable(srs_coordinated)

## Trails 5

In [None]:
df_control$trail_making_condition5 |> unique()

In [None]:
df_control |> group_by(iip) |> summarize_continuous_variable(trail_making_condition5)

In [None]:
trails_del_aov <- aov(trail_making_condition5 ~ iip, data = df_deletion)
trails_del_aov |> tidy()

In [None]:
trails_dup_aov <- aov(trail_making_condition5 ~ iip, data = df_duplication)
trails_dup_aov |> tidy()

In [None]:
eta_squared(trails_dup_aov, partial = FALSE)

In [None]:
df_duplication |> group_by(iip) |> summarize_continuous_variable(trail_making_condition5)

In [None]:
df_deletion |> group_by(iip) |> summarize_continuous_variable(trail_making_condition5)

## Coordination disorder

In [None]:
sex <- df |> group_by(genetic_status) |> summarize_discrete_variable(sex)
sex

sex_chi <- stats::chisq.test(df$genetic_status, df$sex) |> 
    tidy() |> 
    add_column("variable" = "sex", .before = 1)
sex_chi

In [None]:
df_duplication |> filter(!is.na(coordination_disorder)) |> group_by(iip) |> summarize_discrete_variable(coordination_disorder)

In [None]:
df_deletion |> nrow()

In [None]:
chi <- stats::chisq.test(df_deletion$coordination_disorder, df_deletion$iip) |> 
    tidy()

In [None]:
df_duplication |> nrow()

In [None]:
chi <- stats::chisq.test(df_duplication$coordination_disorder, df_duplication$iip) |> 
    tidy()

In [None]:
df_deletion |> filter(!is.na(coordination_disorder)) |> group_by(iip) |> summarize_discrete_variable(coordination_disorder)

In [None]:
df_deletion |> group_by(iip) |> summarize_discrete_variable(coordination_disorder)

## Tremor

In [None]:
df_deletion |> group_by(iip) |> summarize_discrete_variable(svip_neuro_exam.movement.motor_tremor)

In [None]:
df_duplication |> group_by(iip) |> summarize_discrete_variable(svip_neuro_exam.movement.motor_tremor)

In [None]:
df_control |> summarize_discrete_variable(svip_neuro_exam.movement.motor_tremor)

In [None]:
df_cnv$svip_neuro_exam.movement.motor_tremor <- 
    df_cnv$svip_neuro_exam.movement.motor_tremor |>
        as_factor() |>
        fct_explicit_na(na_level = "0") |>
        fct_collapse(
            "0" = "no",
            "1" = "yes") 

## Purdue

In [None]:
df_deletion |> filter(!is.na(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw)) |>
    group_by(iip) |>
    summarize_continuous_variable(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw)

In [None]:
purdue_del_aov <- aov(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ iip, data = df_deletion)
purdue_del_aov |> tidy()

In [None]:
eta_squared(purdue_del_aov, partial = FALSE)

In [None]:
purdue_del_aov <- aov(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ iip, data = df_deletion)
purdue_del_aov |> tidy()

In [None]:
purdue_dup_aov <- aov(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ iip, data = df_duplication)
purdue_dup_aov |> tidy()

In [None]:
df_duplication |> filter(!is.na(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw)) |>
    group_by(iip) |>
    summarize_continuous_variable(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw)

## MABC

In [None]:
df |> group_by(genetic_status) |> summarize_continuous_variable(mabc_percentile_transformed)

In [None]:
df_deletion |>     
    group_by(iip) |>
    summarize_continuous_variable(mabc_percentile_transformed)

In [None]:
df_deletion |>     
    group_by(iip) |>
    summarize_continuous_variable(mabc_percentile_transformed)

In [None]:
df_deletion |>     
    group_by(iip) |>
    summarize_continuous_variable(mabc_percentile_transformed)

In [None]:
df_duplication |>     
    group_by(iip) |>
    summarize_continuous_variable(mabc_percentile_transformed)

In [None]:
df_control |>     
    group_by(iip) |>
    summarize_continuous_variable(mabc_percentile_transformed)

# Table 2

## Developmental coordination disorder diagnosis

### Simple regression

In [None]:
df_cnv <- df_cnv |> mutate(age_years_scaled = arm::rescale(age_years))

In [None]:
df_deletion |> nrow()

In [None]:
df_duplication |> nrow()

In [None]:
df_cnv |> nrow()

In [None]:
df_cnv_non_probands <- df_cnv |> filter(iip==0)
df_cnv_probands <- df_cnv |> filter(iip==1)

In [None]:
simple_dcd_non_probands <- glm(
    coordination_disorder ~ 
        deletion,
    family = "binomial",
    data = df_cnv_non_probands)

simple_dcd_non_probands |> summary()
simple_dcd_non_probands |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
simple_dcd_probands <- glm(
    coordination_disorder ~ 
        deletion,
    family = "binomial",
    data = df_cnv_probands)

simple_dcd_probands |> summary()
simple_dcd_probands |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
df_cnv$coordination_disorder |> unique()

In [None]:
df_cnv_no_na$coordination_disorder |> unique()

In [None]:
simple_dcd <- glm(
    coordination_disorder ~ 
        deletion,
    family = "binomial",
    data = df_cnv)

simple_dcd |> summary()
simple_dcd |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
simple_dcd <- glm(
    coordination_disorder ~ 
        deletion,
    family = "binomial",
    data = df_cnv)

simple_dcd |> summary()
simple_dcd |> tidy(conf.int = TRUE, exp = TRUE)

### Multiple regression

In [None]:
df_cnv$age_years |> mean(na.rm=TRUE)

In [None]:
multiple_dcd <- glm(
    coordination_disorder ~ 
        deletion +
        age_years_scaled + 
        sex,
    family = "binomial",
    data = df_cnv)

multiple_dcd |> summary()
multiple_dcd |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
multiple_dcd <- glm(
    coordination_disorder ~ 
        deletion +
        age_years + 
        sex,
    family = "binomial",
    data = df_cnv)

multiple_dcd |> summary()
multiple_dcd |> tidy(conf.int = TRUE, exp = TRUE)

## Age at first walking (ADI-R)

In [None]:
df |> 
    group_by(genetic_status) |>
    summarize_continuous_variable(adi_r.adi_r.q05a_walked_unaideda)

### Simple regression

In [None]:
df_cnv |> 
    group_by(genetic_status) |>
    summarize_continuous_variable(adi_r.adi_r.q05a_walked_unaideda)

In [None]:
model_delayed_walking <- lm(adi_r.adi_r.q05a_walked_unaideda ~ 
    deletion,
    data = df_cnv_probands)

model_delayed_walking |> summary()
model_delayed_walking |> tidy(conf.int = TRUE)

In [None]:
model_delayed_walking <- lm(adi_r.adi_r.q05a_walked_unaideda ~ 
    deletion,
    data = df_cnv)

model_delayed_walking |> summary()
model_delayed_walking |> tidy(conf.int = TRUE)

### Multiple regression

In [None]:
model_delayed_walking_mult <- lm(
    adi_r.adi_r.q05a_walked_unaideda ~ 
        deletion +
        age_years_scaled +
        sex,
    data = df_cnv)

model_delayed_walking_mult |> summary()
model_delayed_walking_mult |> tidy(conf.int = TRUE)

In [None]:
model_delayed_walking_mult <- lm(
    adi_r.adi_r.q05a_walked_unaideda ~ 
        deletion +
        age_years +
        sex,
    data = df_cnv)

model_delayed_walking_mult |> summary()
model_delayed_walking_mult |> tidy(conf.int = TRUE, exp = TRUE)

## SRS coordination item

### Simple regression

In [None]:
df_cnv |> filter(duplication==1 & iip==1) |> nrow()

In [None]:
simple_srs <- lm(
    srs_coordinated ~ 
        deletion, 
    data = df_cnv_non_probands)

simple_srs |> summary()
simple_srs |> tidy(conf.int = TRUE)

In [None]:
simple_srs <- lm(
    srs_coordinated ~ 
        deletion, 
    data = df_cnv)

simple_srs |> summary()
simple_srs |> tidy(conf.int = TRUE)

### Multiple regression

In [None]:
multiple_srs <- lm(
    srs_coordinated ~ 
        deletion + 
        age_years_scaled + 
        sex, 
    data = df_cnv)

multiple_srs |> summary()
multiple_srs |> tidy(conf.int = TRUE)

In [None]:
multiple_srs <- lm(
    srs_coordinated ~ 
        deletion + 
        age_years + 
        sex, 
    data = df_cnv)

multiple_srs |> summary()
multiple_srs |> tidy(conf.int = TRUE)

## MABC percentile

### Simple regression

In [None]:
simple_mabc <- lm(
    mabc_balance_percentile_transformed ~ 
        deletion, 
    data = df_cnv)

simple_mabc |> summary()
simple_mabc |> tidy(conf.int = TRUE)

### Multiple regression

In [None]:
simple_mabc <- lm(
    mabc_aim_catch_percentile_transformed ~ 
        deletion, 
    data = df_cnv)

simple_mabc |> summary()
simple_mabc |> tidy()

In [None]:
simple_mabc <- lm(
    mabc_percentile_transformed ~ 
        deletion, 
    data = df_cnv)

simple_mabc |> summary()
simple_mabc |> tidy(conf.int=TRUE)

In [None]:
df_cnv_non_probands |> filter(!is.na(mabc_percentile_transformed)) |> filter(deletion==1) |> nrow()

In [None]:
simple_mabc <- lm(
    mabc_percentile_transformed ~ 
        deletion, 
    data = df_cnv_non_probands)

simple_mabc |> summary()
simple_mabc |> tidy(conf.int=TRUE)

### Multiple regression

In [None]:
multiple_mabc <- lm(
    mabc_percentile_transformed ~ 
        deletion + 
        age_years_scaled + 
        sex, 
    data = df_cnv)

multiple_mabc |> summary()
multiple_mabc |> tidy(conf.int = TRUE)

In [None]:
multiple_mabc <- lm(
    mabc_percentile_transformed ~ 
        deletion + 
        age_years + 
        sex, 
    data = df_cnv)

multiple_mabc |> summary()
multiple_mabc |> tidy(conf.int = TRUE)

In [None]:
multiple_mabc <- lm(
    mabc_aim_catch_percentile_transformed ~ 
        deletion + 
        age_years + 
        sex, 
    data = df_cnv)

multiple_mabc |> summary()
multiple_mabc |> tidy(conf.int = TRUE)

In [None]:
multiple_mabc <- lm(
    mabc_balance_percentile_transformed ~ 
        deletion + 
        age_years + 
        sex, 
    data = df_cnv)

multiple_mabc |> summary()
multiple_mabc |> tidy(conf.int = TRUE)

## Sensitivity analysis: remove outliers

### Age outliers?

#### Median absolute deviation

In [None]:
outliersMAD(df$age_months)

#### Modified Z score

In [None]:
iglewicz_hoaglin(df$age_months)

#### Generalized extreme Studentized deviate test

In [None]:
rosnerTest(df$age_months, k = 3)$all.stats

## Tremor

### Simple regression

In [None]:
df_cnv$svip_neuro_exam.movement.motor_tremor <- 
    df_cnv$svip_neuro_exam.movement.motor_tremor |>
        as_factor() |>
        fct_explicit_na(na_level = "0") |>
        fct_collapse(
            "0" = "no",
            "1" = "yes") 

In [None]:
df_cnv$svip_neuro_exam.movement.motor_tremor <- 
    df_cnv$svip_neuro_exam.movement.motor_tremor |>
        as.character() |>
        as.numeric()

In [None]:
df_cnv_non_probands <- df_cnv |> filter(iip==0)
df_cnv_probands <- df_cnv |> filter(iip==1)

In [None]:
model_tremor <- glm(
    svip_neuro_exam.movement.motor_tremor ~ 
        deletion,
    family = "binomial",
    data = df_cnv_non_probands)

model_tremor |> summary()
model_tremor |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
model_tremor <- glm(
    svip_neuro_exam.movement.motor_tremor ~ 
        deletion,
    family = "binomial",
    data = df_cnv)

model_tremor |> summary()
model_tremor |> tidy(conf.int = TRUE, exp = TRUE)

### Multiple regression

In [None]:
model_tremor_multiple <- glm(
    svip_neuro_exam.movement.motor_tremor ~ 
        deletion +
        age_years_scaled +
#        best_full_scale_iq +
        sex,
    family = "binomial",
    data = df_cnv)

model_tremor_multiple |> summary()
model_tremor_multiple |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
model_tremor_multiple <- glm(
    svip_neuro_exam.movement.motor_tremor ~ 
        deletion +
        age_years +
        best_full_scale_iq +
        sex,
    family = "binomial",
    data = df_cnv)

model_tremor_multiple |> summary()
model_tremor_multiple |> tidy(conf.int = TRUE, exp = TRUE)

## Tandem gait

In [None]:
df_cnv$svip_neuro_exam.gait_station.motor_tandem <- 
    df_cnv$svip_neuro_exam.gait_station.motor_tandem |>
        na_if('not-tested') |>
        as_factor() |>
        fct_collapse(
            "0" = "normal",
            "1" = "abnormal") 

In [None]:
df_cnv_non_probands <- df_cnv |> filter(iip==0)
df_cnv_probands <- df_cnv |> filter(iip==1)

### Simple regression

In [None]:
model_tandem <- glm(
    svip_neuro_exam.gait_station.motor_tandem ~ 
        deletion,
    family = "binomial",
    data = df_cnv_non_probands)

model_tandem |> summary()
model_tandem |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
model_tandem <- glm(
    svip_neuro_exam.gait_station.motor_tandem ~ 
        deletion,
    family = "binomial",
    data = df_cnv)

model_tandem |> summary()
model_tandem |> tidy(conf.int = TRUE, exp = TRUE)

### Multiple regression

In [None]:
model_tandem_multiple <- glm(
    svip_neuro_exam.gait_station.motor_tandem ~ 
        deletion +
        age_years_scaled + 
        sex,
    family = "binomial",
    data = df_cnv)

model_tandem_multiple |> summary()
model_tandem_multiple |> tidy(conf.int = TRUE, exp = TRUE)

In [None]:
model_tandem_multiple <- glm(
    svip_neuro_exam.gait_station.motor_tandem ~ 
        deletion +
        age_years + 
        sex,
    family = "binomial",
    data = df_cnv)

model_tandem_multiple |> summary()
model_tandem_multiple |> tidy(conf.int = TRUE, exp = TRUE)

## Trails part 5

### Simple regression

In [None]:
model_trails_5 <- lm(
    trail_making_condition5 ~ 
        deletion,
    data = df_cnv_non_probands)

model_trails_5 |> summary()
model_trails_5 |> tidy(conf.int = TRUE)

In [None]:
model_trails_5 <- lm(
    trail_making_condition5 ~ 
        deletion,
    data = df_cnv)

model_trails_5 |> summary()
model_trails_5 |> tidy(conf.int = TRUE)

### Multiple regression

In [None]:
round(0.00959, 2)

In [None]:
model_trails_5_multiple <- lm(
    trail_making_condition5 ~ 
        deletion + 
        age_years_scaled +
        sex,
    data = df_cnv)

model_trails_5_multiple |> summary()
model_trails_5_multiple |> tidy(conf.int = TRUE)

In [None]:
model_trails_5_multiple <- lm(
    trail_making_condition5 ~ 
        deletion + 
        age_years +
        sex,
    data = df_cnv)

model_trails_5_multiple |> summary()
model_trails_5_multiple |> tidy(conf.int = TRUE)

## Purdue pegboard

### Simple regression

In [None]:
model_pegboard_both <- lm(
    purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ 
        deletion,
    data = df_cnv)

model_pegboard_both |> summary()
model_pegboard_both |> tidy(conf.int = TRUE)

In [None]:
model_pegboard_both <- lm(
    purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ 
        deletion,
    data = df_cnv)

model_pegboard_both |> summary()
model_pegboard_both |> tidy(conf.int = TRUE)

model_pegboard_dom <- lm(
    purdue_pegboard.purdue_pegboard_raw.dom_hand_score_raw ~ 
        deletion,
    data = df_cnv)

model_pegboard_dom |> summary()
model_pegboard_dom |> tidy(conf.int = TRUE)

model_pegboard_nondom <- lm(
    purdue_pegboard.purdue_pegboard_raw.nondom_hand_score_raw ~ 
        deletion,
    data = df_cnv)

model_pegboard_nondom |> summary()
model_pegboard_nondom |> tidy(conf.int = TRUE)

### Multiple regression

In [None]:
model_pegboard_both <- lm(
    purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ 
        deletion +
        age_years_scaled +
        sex,
    data = df_cnv)

model_pegboard_both |> summary()
model_pegboard_both |> tidy(conf.int = TRUE)

model_pegboard_dom <- lm(
    purdue_pegboard.purdue_pegboard_raw.dom_hand_score_raw ~ 
        deletion +
        age_years +
        sex,
    data = df_cnv)

model_pegboard_dom |> summary()
model_pegboard_dom |> tidy(conf.int = TRUE)

model_pegboard_nondom <- lm(
    purdue_pegboard.purdue_pegboard_raw.nondom_hand_score_raw ~ 
        deletion +
        age_years +
        sex,
    data = df_cnv)

model_pegboard_nondom |> summary()
model_pegboard_nondom |> tidy(conf.int = TRUE)

In [None]:
model_pegboard_both <- lm(
    purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw ~ 
        deletion +
        age_years +
        sex,
    data = df_cnv)

model_pegboard_both |> summary()
model_pegboard_both |> tidy(conf.int = TRUE)

model_pegboard_dom <- lm(
    purdue_pegboard.purdue_pegboard_raw.dom_hand_score_raw ~ 
        deletion +
        age_years +
        sex,
    data = df_cnv)

model_pegboard_dom |> summary()
model_pegboard_dom |> tidy(conf.int = TRUE)

model_pegboard_nondom <- lm(
    purdue_pegboard.purdue_pegboard_raw.nondom_hand_score_raw ~ 
        deletion +
        age_years +
        sex,
    data = df_cnv)

model_pegboard_nondom |> summary()
model_pegboard_nondom |> tidy(conf.int = TRUE)

# Table 3

## Overall motor impairment

In [None]:
df_deletion |> nrow()

In [None]:
df_deletion$trail_making_condition5 |> mean(na.rm=TRUE)

In [None]:
df_deletion$trail_making_condition5 |> sd(na.rm=TRUE)*2

In [None]:
df_deletion |> filter(trail_making_condition5 < (7.33333333333333 - 7.12928233882392)) |> nrow()

In [None]:
df_deletion$purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw |> mean(na.rm=TRUE)

In [None]:
df_deletion$purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw |> sd(na.rm=TRUE)

In [None]:
df_deletion$mabc_percentile_transformed |> mean(na.rm=TRUE)

In [None]:
df_deletion$mabc_percentile_transformed |> sd(na.rm=TRUE)

In [None]:
df_deletion <- df_deletion |> 
    mutate(overall_impairment =
        if_else(
            mabc_percentile_transformed < (95.875 - 6.28436009615791) |
            purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw < (7.34579439252336 - 2.85862747029593) |
            svip_neuro_exam.gait_station.motor_tandem == 'abnormal' |
            svip_neuro_exam.movement.motor_tremor == 'yes' |
            coordination_disorder == 1, 1, 0))

In [None]:
df_deletion |> filter(overall_impairment==1) |> nrow()

In [None]:
df_deletion |> filter(
    mabc_percentile_transformed < (95.875 - 6.28436009615791) |
    purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw < (7.34579439252336 - 2.85862747029593) |
    svip_neuro_exam.gait_station.motor_tandem == 'abnormal' |
    svip_neuro_exam.movement.motor_tremor == 'yes' |
    coordination_disorder == 1
    ) |> nrow()

In [None]:
df_deletion |> filter(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw < (7.34579439252336 - 2.85862747029593)) |> nrow()

In [None]:
df_deletion$coordination_disorder

In [None]:
df_duplication$trail_making_condition5 |> mean(na.rm=TRUE)

In [None]:
df_duplication$trail_making_condition5 |> sd(na.rm=TRUE)

In [None]:
df_duplication |> filter(trail_making_condition5 < (9.85714285714286 - 2.71015640419361)) |> nrow()

In [None]:
df_duplication$purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw |> mean(na.rm=TRUE)

In [None]:
df_duplication$purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw |> sd(na.rm=TRUE)

In [None]:
df_duplication |> filter(purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw < (7.8695652173913 - 3.81211888040461)) |> nrow()

In [None]:
df_duplication$mabc_percentile_transformed |> mean(na.rm=TRUE)

In [None]:
df_duplication$mabc_percentile_transformed |> sd(na.rm=TRUE)

In [None]:
df_duplication |> filter(mabc_percentile_transformed < (87.5714285714286 - 18.4459236917086)) |> nrow()

In [None]:
df_duplication <- df_duplication |> 
    mutate(overall_impairment =
        if_else(
            mabc_percentile_transformed < (87.5714285714286 - 18.4459236917086) |
            trail_making_condition5 < (9.85714285714286 - 2.71015640419361) |
            purdue_pegboard.purdue_pegboard_raw.both_hand_score_raw < (7.8695652173913 - 3.81211888040461) |
            svip_neuro_exam.gait_station.motor_tandem == 'abnormal' |
            svip_neuro_exam.movement.motor_tremor == 'yes' |
            coordination_disorder == 1, 1, 0))

In [None]:
df_duplication |> filter(overall_impairment==1) |> nrow()

## SRS coordination as predictor of ASD in deletion carriers

### Simple regression

In [None]:
asd_as_outcome_srs <- glm(
    latest_clinical_asd_dx ~
        overall_impairment, 
    family = "binomial",
    data = df_duplication)

asd_as_outcome_srs |> summary()
asd_as_outcome_srs |> tidy(conf.int=TRUE, exp = TRUE)

In [None]:
df

In [None]:
asd_as_outcome_srs <- glm(
    latest_clinical_asd_dx ~
        srs_coordinated, 
    family = "binomial",
    data = df_deletion)

asd_as_outcome_srs |> summary()
asd_as_outcome_srs |> tidy(conf.int=TRUE, exp = TRUE)

### Multiple regression

In [None]:
asd_as_outcome_srs_multiple <- glm(
    latest_clinical_asd_dx ~
        srs_coordinated +
        age_years +
        sex, 
    family = "binomial",
    data = df_deletion)

asd_as_outcome_srs_multiple |> summary()
asd_as_outcome_srs_multiple |> tidy(conf.int=TRUE, exp = TRUE)

## SRS coordination as predictor of ASD in duplication carriers

### Simple regression

In [None]:
asd_as_outcome_srs_dup <- glm(
    latest_clinical_asd_dx ~
        srs_coordinated, 
    family = "binomial",
    data = df_duplication)

asd_as_outcome_srs_dup |> summary()
asd_as_outcome_srs_dup |> tidy(conf.int=TRUE, exp = TRUE)

### Multiple regression

In [None]:
asd_as_outcome_srs_dup_mult <- glm(
    latest_clinical_asd_dx ~
        srs_coordinated +
        best_full_scale_iq +
        age_years,
    family = "binomial",
    data = df_duplication)

asd_as_outcome_srs_dup_mult |> summary()
asd_as_outcome_srs_dup_mult |> tidy(conf.int=TRUE, exp = TRUE)

# Correct for multiple comparisons

In [None]:
p_vals_neurological <- c(0.00246, 0.19286)

In [None]:
p.adjust(p_vals_neurological, method = "BH")

In [None]:
p_vals_behavioral <- c(0.00959, 0.0191, 0.00316)

In [None]:
p.adjust(p_vals_behavioral, method = "BH")

# Predictors of coordination disorder

In [None]:
predictors_of_coordination_disorder <- glm(
    coordination_disorder ~
        deletion +
        clinical_asd_dx,
    family = "binomial",
    data = df_cnv)

summary(predictors_of_coordination_disorder)
tidy(predictors_of_coordination_disorder, conf.int=TRUE, exp=TRUE)

In [None]:
predictors_of_coordination_disorder <- glm(
    coordination_disorder ~
        deletion +
        adi_r.adi_r.q05a_walked_unaideda,
    family = "binomial",
    data = df_cnv)

summary(predictors_of_coordination_disorder)
tidy(predictors_of_coordination_disorder, conf.int=TRUE, exp=TRUE)