In [None]:
# load R packages
library(readxl)
packageVersion('readxl')
library(dplyr)
packageVersion('dplyr')
library(stringr)
packageVersion('stringr')
library(fastDummies)
packageVersion('fastDummies')
library(tidyr)
packageVersion('tidyr')
library(lubridate)
packageVersion('lubridate')
library(ggplot2)
packageVersion('ggplot2')

In [None]:
# set directory
project.dir = '...'
data.dir = '...'
regeps.dir = '...'
raw.rpdr.dir = file.path(regeps.dir, '...')
cleaned.rpdr.dir = file.path(regeps.dir, '...')
Qingwen.data.dir = file.path(regeps.dir, '...')

In [None]:
# check med file to see how many rows
med.lines <-readLines(file.path(raw.rpdr.dir, "Med.txt"))
length(med.lines)

In [None]:
# load med file
med.data <- read.delim(file.path(raw.rpdr.dir, "Med.txt"), sep = '|', quote="", fill=FALSE)
dim(med.data)
length(unique(med.data$EMPI))
head(med.data)

In [None]:
# change format of med date
med.data$Medication_Date <- as.Date(med.data$Medication_Date, format = "%m/%d/%Y")
typeof(med.data$Medication_Date)

In [None]:
table(med.data$Medication_Date_Detail) # check medication date detail

In [None]:
# The 'Removed' value denotes that a medication was removed from a patient's medication list -> exclude 'Removed'
med.listed <- med.data %>% filter(Medication_Date_Detail != 'Removed')
dim(med.listed) # from 1463941 to 1415384

In [None]:
table(med.listed$Medication_Date_Detail) # check after removing

In [None]:
# extract biobank ID
data.id <- read.csv(file.path(cleaned.rpdr.dir, 'Subject_Id.csv'))
dim(data.id)
# merge biobank.ID and med.data file
med.data.ID <- merge(med.listed,  data.id[,c('Subject_Id', 'EMPI')], by = 'EMPI')
dim(med.data.ID)

In [None]:
head(med.data.ID)

## 1. SRRI: Selective serotonin reuptake inhibitors

- Citalopram (Celexa).
- Escitalopram (Lexapro).
- Fluoxetine (Prozac).
- Paroxetine (Paxil).
- Sertraline (Zoloft).
- Fluvoxamine (Luvox)

In [None]:
# string: Citalopram or Celexa
citalopram.string <- med.data.ID %>% filter(str_detect(Medication, 'Citalopram|citalopram|Celexa|celexa')) %>%
                                    filter(!str_detect(Medication, 'Escitalopram'))
length(unique(citalopram.string$Medication))
citalopram.string$Medication_name <- 'Citalopram'

In [None]:
# string: escitalopram
escitalopram.string <- med.data.ID %>% filter(str_detect(Medication, 'Escitalopram|escitalopram|Lexapro|lexapro'))
length(unique(escitalopram.string$Medication))
escitalopram.string$Medication_name <- 'Escitalopram'

In [None]:
# string: fluoxetine
fluoxetine.string <- med.data.ID %>% filter(str_detect(Medication, 'Fluoxetine|fluoxetine|Prozac|prozac'))
length(unique(fluoxetine.string$Medication))
fluoxetine.string$Medication_name <- 'Fluoxetine'

In [None]:
# string: Paroxetine
paroxetine.string <- med.data.ID %>% filter(str_detect(Medication, 'paroxetine|Paxil|paxil|Paroxetine'))
length(unique(paroxetine.string$Medication))
paroxetine.string$Medication_name <- 'Paroxetine'

In [None]:
# string: sertraline
sertraline.string <- med.data.ID %>% filter(str_detect(Medication, 'sertraline|Sertraline|Zolof|zolof'))
length(unique(sertraline.string$Medication))
sertraline.string$Medication_name <- 'Sertraline'

In [None]:
# string: fluvoxamine
fluvoxamine.string <- med.data.ID %>% filter(str_detect(Medication, 'fluvoxamine|Fluvoxamine|Luvox|luvox'))
length(unique(fluvoxamine.string$Medication))
fluvoxamine.string$Medication_name <- 'Fluvoxamine'

In [None]:
dim(fluvoxamine.string)
dim(sertraline.string)
dim(paroxetine.string)
dim(fluoxetine.string)
dim(escitalopram.string)
dim(citalopram.string)

In [None]:
srri.med <- rbind(fluvoxamine.string, sertraline.string, paroxetine.string,
                  fluoxetine.string, escitalopram.string, citalopram.string)
srri.med$Medication_group <- 'Selective_serotonin_reuptake_inhibitors'
dim(srri.med)
head(srri.med)

## 2. Serotonin-norepinephrine reuptake inhibitors (SNRIs)

- Desvenlafaxine (Pristiq)
- Duloxetine (Cymbalta) — also approved to treat anxiety and certain types of chronic pain
- Levomilnacipran (Fetzima)
- Venlafaxine (Effexor XR) — also approved to treat certain anxiety disorders and panic disorder
- Milnacipran (Savella)

In [None]:
# string: Desvenlafaxine
desvenlafaxine.string <- med.data.ID %>% filter(str_detect(Medication, 'desvenlafaxine|Desvenlafaxine|Pristiq|pristiq'))
length(unique(desvenlafaxine.string$Medication))
unique(desvenlafaxine.string$Medication)
desvenlafaxine.string$Medication_name <- 'Desvenlafaxine'

In [None]:
# string: duloxetine
duloxetine.string <- med.data.ID %>% filter(str_detect(Medication, 'Duloxetine|duloxetine|Cymbalta|cymbalta'))
length(unique(duloxetine.string$Medication))
unique(duloxetine.string$Medication)
duloxetine.string$Medication_name <- 'Duloxetine'

In [None]:
# string: Levomilnacipran
levomilnacipran.string <- med.data.ID %>% filter(str_detect(Medication, 'levomilnacipran|Levomilnacipran|fetzima|Fetzima'))
length(unique(levomilnacipran.string$Medication))
unique(levomilnacipran.string$Medication)
levomilnacipran.string$Medication_name <- 'Levomilnacipran'

In [None]:
# string: Venlafaxine
venlafaxine.string <- med.data.ID %>% filter(str_detect(Medication, 'Venlafaxine|venlafaxine|Effexor XR|EffexorXR|effecxor|Effecxor|Effexor-oncall|Effexorxr|EffexorXr|Effexor Xr')) %>%
filter(!str_detect(Medication, 'Desvenlafaxine|desvenlafaxine'))
length(unique(venlafaxine.string$Medication))
unique(venlafaxine.string$Medication)
venlafaxine.string$Medication_name <- 'Venlafaxine'

In [None]:
# string: Milnacipran
milnacipran.string <- med.data.ID %>% filter(str_detect(Medication, 'Milnacipran|milnacipran|Savella|savella')) %>%
filter(!str_detect(Medication, 'Levomilnacipran'))
length(unique(milnacipran.string$Medication))
unique(milnacipran.string$Medication)
milnacipran.string$Medication_name <- 'Milnacipran'

In [None]:
snri.med <- rbind(desvenlafaxine.string, duloxetine.string,
                  levomilnacipran.string, venlafaxine.string, 
                  milnacipran.string)
snri.med$Medication_group <- 'Serotonin_norepinephrine_reuptake_inhibitors'
dim(snri.med)
head(snri.med)

## 3. Atypical antidepressants

- Agomelatine (not available in the United States)
- Bupropion (Aplenzin; Forfivo XL; Wellbutrin SR; Wellbutrin XL)
- Mirtazapine (Remeron)

In [None]:
# string: Bupropion
bupropion.string <- med.data.ID %>% filter(str_detect(Medication, 'Bupropion|bupropion|Aplenzin|aplenzin|Forfivo|forfivo|Wellbutrin|wellbutrin|Fent |Zyban-oncall')) %>%
filter(!str_detect(Medication, 'Naltrexone')) 
length(unique(bupropion.string$Medication))
unique(bupropion.string$Medication)
bupropion.string$Medication_name <- 'Bupropion'

In [None]:
# string: Mirtazapine
mirtazapine.string <- med.data.ID %>% filter(str_detect(Medication, 'Mirtazapine|mirtazapine|Remeron|remeron'))
length(unique(mirtazapine.string$Medication))
unique(mirtazapine.string$Medication)
mirtazapine.string$Medication_name <- 'Mirtazapine'

In [None]:
atypical.anti.med <- rbind(bupropion.string, mirtazapine.string)
atypical.anti.med$Medication_group <- 'Atypical_antidepressants'
dim(atypical.anti.med)
head(atypical.anti.med)

## 4. Serotonin modulators

- Nefazodone (Serzone)
- Trazodone (Desyrel)
- Vilazodone (Viibryd)
- Vortioxetine (Trintellix)

In [None]:
# string: Nefazodone 
nefazodone.string <- med.data.ID %>% filter(str_detect(Medication, 'Nefazodone|nefazodone|Serzone|serzone'))
length(unique(nefazodone.string$Medication))
unique(nefazodone.string$Medication)
nefazodone.string$Medication_name <- 'Nefazodone'

In [None]:
# string: Trazodone
trazodone.string <- med.data.ID %>% filter(str_detect(Medication, 'Trazodone|trazodone|Desyrel|desyrel'))
length(unique(trazodone.string$Medication))
unique(trazodone.string$Medication)
trazodone.string$Medication_name <- 'Trazodone'

In [None]:
# string: Vilazodone
vilazodone.string <- med.data.ID %>% filter(str_detect(Medication, 'Vilazodone|vilazodone|Viibryd|viibryd'))
length(unique(vilazodone.string$Medication))
unique(vilazodone.string$Medication)
vilazodone.string$Medication_name <- 'Vilazodone'

In [None]:
# Vortioxetine (Trintellix)
vortioxetine.string <- med.data.ID %>% filter(str_detect(Medication, 'Vortioxetine|vortioxetine|Trintellix|trintellix'))
length(unique(vortioxetine.string$Medication))
unique(vortioxetine.string$Medication)
vortioxetine.string$Medication_name <- 'Vortioxetine'

In [None]:
sero.mod.med <- rbind(nefazodone.string, trazodone.string, 
                      vilazodone.string, vortioxetine.string)
sero.mod.med$Medication_group <- 'Serotonin_modulators'
dim(sero.mod.med)
head(sero.mod.med)

## 5. Tricyclic and tetracyclic antidepressants ("TCAs")

- Amitriptyline (Elavil)
- Amoxapine
- Clomipramine (Anafranil)
- Desipramine (Norpramin)
- Doxepin (Silenor)
- Imipramine (Tofranil)
- Nortriptyline (Pamelor)
- Protriptyline
- Trimipramine (Surmontil)

In [None]:
# Amitriptyline (Elavil)
amitriptyline.string <- med.data.ID %>% filter(str_detect(Medication, 'Amitriptyline|amitriptyline|Elavil|elavil')) %>%
filter(Medication != 'Amitriptyline Halftab 12.5 mg Tablet Cmpd BWF')
length(unique(amitriptyline.string$Medication))
unique(amitriptyline.string$Medication)
amitriptyline.string$Medication_name <- 'Amitriptyline'

In [None]:
# Amoxapine
amoxapine.string <- med.data.ID %>% filter(str_detect(Medication, 'Amoxapine|amoxapine'))
length(unique(amoxapine.string$Medication))
unique(amoxapine.string$Medication)
#amoxapine.string$Medication_name <- 'Amoxapine'

In [None]:
# Clomipramine (Anafranil)
clomipramine.string <- med.data.ID %>% filter(str_detect(Medication, 'Clomipramine|clomipramine|Anafranil|anafranil'))
length(unique(clomipramine.string$Medication))
unique(clomipramine.string$Medication)
clomipramine.string$Medication_name <- 'Clomipramine'

In [None]:
# Desipramine (Norpramin)
desipramine.string <- med.data.ID %>% filter(str_detect(Medication, 'Desipramine|desipramine|Norpramin|norpramin'))
length(unique(desipramine.string$Medication))
unique(desipramine.string$Medication)
desipramine.string$Medication_name <- 'Desipramine'

In [None]:
# Doxepin (Silenor)
doxepin.string <- med.data.ID %>% filter(str_detect(Medication, 'Doxepin|doxepin|Silenor|silenor')) %>%
filter(Medication != 'Doxepin 5 % Topical Cream')
length(unique(doxepin.string$Medication))
unique(doxepin.string$Medication)
doxepin.string$Medication_name <- 'Doxepin'

In [None]:
# Imipramine (Tofranil)
imipramine.string <- med.data.ID %>% filter(str_detect(Medication, 'Imipramine|imipramine|Tofranil|tofranil'))
length(unique(imipramine.string$Medication))
unique(imipramine.string$Medication)
imipramine.string$Medication_name <- 'Imipramine' 

In [None]:
# Nortriptyline (Pamelor)
nortriptyline.string <- med.data.ID %>% filter(str_detect(Medication, 'Nortriptyline|nortriptyline|Pamelor|pamelor'))
length(unique(nortriptyline.string$Medication))
unique(nortriptyline.string$Medication)
nortriptyline.string$Medication_name <- 'Nortriptyline'

In [None]:
# Protriptyline
protriptyline.string <- med.data.ID %>% filter(str_detect(Medication, 'Protriptyline|protriptyline'))
length(unique(protriptyline.string$Medication))
unique(protriptyline.string$Medication)
protriptyline.string$Medication_name <- 'Protriptyline'

In [None]:
# Trimipramine (Surmontil)
trimipramine.string <- med.data.ID %>% filter(str_detect(Medication, 'Trimipramine|trimipramine|Surmontil|surmontil'))
length(unique(trimipramine.string$Medication))
unique(trimipramine.string$Medication)
#trimipramine.string$Medication_name <- 'Trimipramine'

In [None]:
# Tricyclic_and_tetracyclic_antidepressants
tca.med <- rbind(protriptyline.string, nortriptyline.string,
                 imipramine.string, doxepin.string, 
                 desipramine.string, clomipramine.string,
                 amitriptyline.string)
tca.med$Medication_group <- 'Tricyclic_and_tetracyclic_antidepressants'
dim(tca.med)
head(tca.med)

## 6. Monoamine oxidase inhibitors ("MAOIs") 

- Isocarboxazid (Marplan)
- Moclobemide (not available in the United States)
- Phenelzine (Nardil)
- Selegiline (oral and transdermal formulations) (Emsam; Zelapar)
- Tranylcypromine (Parnate)

In [None]:
isocarboxazid.string <- med.data.ID %>% filter(str_detect(Medication, 'Isocarboxazid|isocarboxazid|Marplan|marplan'))
length(unique(isocarboxazid.string$Medication))
unique(isocarboxazid.string$Medication)
#isocarboxazid.string$Medication_name <- 'Isocarboxazid'

In [None]:
phenelzine.string <- med.data.ID %>% filter(str_detect(Medication, 'Phenelzine|phenelzine|Nardil|nardil'))
length(unique(phenelzine.string$Medication))
unique(phenelzine.string$Medication)
phenelzine.string$Medication_name <- 'Phenelzine'

In [None]:
selegiline.string <- med.data.ID %>% filter(str_detect(Medication, 'Selegiline|selegiline|Emsam|emsam|Zelapar|zelapar'))
length(unique(selegiline.string$Medication))
unique(selegiline.string$Medication)
selegiline.string$Medication_name <- 'Selegiline'

In [None]:
# Tranylcypromine (Parnate)
tranylcypromine.string <- med.data.ID %>% filter(str_detect(Medication, 'Tranylcypromine|tranylcypromine|Parnate|parnate'))
length(unique(tranylcypromine.string$Medication))
unique(tranylcypromine.string$Medication)
tranylcypromine.string$Medication_name <- 'Tranylcypromine'

In [None]:
# Monoamine oxidase inhibitors ("MAOIs") 
maois.med <- rbind(tranylcypromine.string, selegiline.string, phenelzine.string)
maois.med$Medication_group <- 'Monoamine_oxidase_inhibitors'
dim(maois.med)
head(maois.med)

In [None]:
## merge all files
dim(maois.med)
dim(tca.med)
dim(sero.mod.med)
dim(atypical.anti.med)
dim(snri.med)
dim(srri.med)

In [None]:
anti.depress.med <- rbind(maois.med, tca.med, sero.mod.med, atypical.anti.med, 
                         snri.med, srri.med)
dim(anti.depress.med)
head(anti.depress.med)

# Remove duplication

In [None]:
colnames(anti.depress.med)

In [None]:
# selected columns
anti.depress.med.selected.cols <- anti.depress.med %>% select(Subject_Id,Medication_Date, Medication, 
                                                   Additional_Info, Medication_name, 
                                                   Medication_group) %>%
                                            arrange(Subject_Id, Medication_Date)
dim(anti.depress.med.selected.cols)
length(unique(anti.depress.med.selected.cols$Subject_Id)) # 772
head(anti.depress.med.selected.cols, 10)

In [None]:
# extract ROUTE infomation from Additional info
anti.depress.med.selected.cols$Route <- str_extract(anti.depress.med.selected.cols$Additional_Info, "ROUTE=[^;]+")
# check route
table(anti.depress.med.selected.cols$Route)

In [None]:
dim(anti.depress.med.selected.cols[duplicated(anti.depress.med.selected.cols),]) # dup rows

In [None]:
# remove dup rows
dim(anti.depress.med.selected.cols)
anti.depress.no.dup.rows <- anti.depress.med.selected.cols[!duplicated(anti.depress.med.selected.cols),]
dim(anti.depress.no.dup.rows)

In [None]:
anti.depress.med.group <- anti.depress.no.dup.rows %>% select(Subject_Id, Medication_Date, Medication_group)
dim(anti.depress.med.group[duplicated(anti.depress.med.group),]) 

In [None]:
dim(anti.depress.med.group)
anti.depress.no.dup <- anti.depress.med.group[!duplicated(anti.depress.med.group),]
dim(anti.depress.no.dup)

In [None]:
anti.depress.no.dup$Prescription <- 1 # assign 1 for a prescription
head(anti.depress.no.dup)

In [None]:
# Total number of prescription
# Group by Subject_Id and sum of medication using dplyr
anti.depress.prescr.counts.per.ind <- anti.depress.no.dup %>% group_by(Subject_Id) %>% 
  summarise(Antidepressants_total_number_of_prescriptions = sum(Prescription),
            .groups = 'drop')
dim(anti.depress.prescr.counts.per.ind)

In [None]:
# summary statistic
summary(anti.depress.prescr.counts.per.ind$Antidepressants_total_number_of_prescriptions)
hist(anti.depress.prescr.counts.per.ind$Antidepressants_total_number_of_prescriptions)

In [None]:
anti.depress.prescr.counts.per.ind %>% filter(Antidepressants_total_number_of_prescriptions == 780)

In [None]:
# Calculate date difference based on plasma collection date
anti.depress.group.w.date <- merge(anti.depress.no.dup , data.id[,c('Subject_Id', 'Plasma_collect_date')], 
                              by = 'Subject_Id')
dim(anti.depress.group.w.date)

In [None]:
# convert the date column to the Y-M-D format
anti.depress.group.w.date$Medication_Date <- as.Date(anti.depress.group.w.date$Medication_Date, format = "%m/%d/%Y")
typeof(anti.depress.group.w.date$Medication_Date)
head(anti.depress.group.w.date$Medication_Date)

# plasma collect date
anti.depress.group.w.date$Plasma_collect_date <- as.Date(anti.depress.group.w.date$Plasma_collect_date, format = "%Y -%m -%d")
typeof(anti.depress.group.w.date$Plasma_collect_date)
head(anti.depress.group.w.date$Plasma_collect_date)

In [None]:
# substract collect date and diag date
anti.depress.group.w.date["Days_Difference"] <- difftime(anti.depress.group.w.date$Plasma_collect_date, 
                                                    anti.depress.group.w.date$Medication_Date, units = "days")
anti.depress.group.w.date$Days_Difference <- as.numeric(anti.depress.group.w.date$Days_Difference)
head(anti.depress.group.w.date$Days_Difference,10)
# absolute values
anti.depress.group.w.date["Days_Difference_Abs"] <- as.numeric(abs(anti.depress.group.w.date$Days_Difference))
head(anti.depress.group.w.date$Days_Difference_Abs,10)

In [None]:
typeof(anti.depress.group.w.date$Days_Difference)
typeof(anti.depress.group.w.date$Days_Difference_Abs)

In [None]:
# create a dataset of no of prescriptions within 5 years based on sample collection date
anti.depress.5y <- anti.depress.group.w.date %>% filter(Days_Difference_Abs <= 1826)
dim(anti.depress.5y)

# check how many individuals have ICS prescriptions within the last 5 years based on sample collection date
length(unique(anti.depress.5y$Subject_Id)) # 704 individuals

In [None]:
summary(as.numeric(anti.depress.5y$Days_Difference))

In [None]:
# Group by Subject_Id and sum of medication date using dplyr
anti.depress.5y.counts.per.ind <- anti.depress.5y %>% group_by(Subject_Id) %>% 
  summarise(Antidpressants_total_number_of_prescriptions_within_5y = sum(Prescription),
            .groups = 'drop')
dim(anti.depress.5y.counts.per.ind)

In [None]:
summary(anti.depress.5y.counts.per.ind$Antidpressants_total_number_of_prescriptions_within_5y)
plot(anti.depress.5y.counts.per.ind$Antidpressants_total_number_of_prescriptions_within_5y)

In [None]:
# check 467
anti.depress.5y.counts.per.ind %>% filter(Antidpressants_total_number_of_prescriptions_within_5y == 467)

In [None]:
anti.depress.no.dup %>% filter(Subject_Id == '10028225') %>% arrange(Medication_Date)

## Check oral

In [None]:
# select only oral vs PO
antidepress.oral <- anti.depress.med.selected.cols %>% filter(Route == 'ROUTE=PO' | Route == 'ROUTE=Oral')

In [None]:
# remove dup rows
dim(antidepress.oral)
antidepress.oral.no.dup.rows <- antidepress.oral[!duplicated(antidepress.oral),]
dim(antidepress.oral.no.dup.rows)

In [None]:
antidepress.oral.med.group <- antidepress.oral.no.dup.rows %>% select(Subject_Id, Medication_Date, Medication_group)
dim(antidepress.oral.med.group[duplicated(antidepress.oral.med.group),]) 

In [None]:
dim(antidepress.oral.med.group)
antidepress.oral.no.dup <- antidepress.oral.med.group[!duplicated(antidepress.oral.med.group),]
dim(antidepress.oral.no.dup)

In [None]:
antidepress.oral.no.dup$Prescription <- 1 # assign 1 for a prescription
head(antidepress.oral.no.dup)

In [None]:
# Total number of prescription
# Group by Subject_Id and sum of medication using dplyr
antidepress.oral.prescr.counts.per.ind <- antidepress.oral.no.dup %>% group_by(Subject_Id) %>% 
  summarise(Antidepressants_oral_total_number_of_prescriptions = sum(Prescription),
            .groups = 'drop')
dim(antidepress.oral.prescr.counts.per.ind)

In [None]:
# summary statistic
summary(antidepress.oral.prescr.counts.per.ind$Antidepressants_oral_total_number_of_prescriptions)
hist(antidepress.oral.prescr.counts.per.ind$Antidepressants_oral_total_number_of_prescriptions)

In [None]:
# Calculate date difference based on plasma collection date
antidepress.oral.group.w.date <- merge(antidepress.oral.no.dup , data.id[,c('Subject_Id', 'Plasma_collect_date')], 
                              by = 'Subject_Id')
dim(antidepress.oral.group.w.date)

In [None]:
# convert the date column to the Y-M-D format
antidepress.oral.group.w.date$Medication_Date <- as.Date(antidepress.oral.group.w.date$Medication_Date, format = "%m/%d/%Y")
typeof(antidepress.oral.group.w.date$Medication_Date)
head(antidepress.oral.group.w.date$Medication_Date)

# plasma collect date
antidepress.oral.group.w.date$Plasma_collect_date <- as.Date(antidepress.oral.group.w.date$Plasma_collect_date, format = "%Y -%m -%d")
typeof(antidepress.oral.group.w.date$Plasma_collect_date)
head(antidepress.oral.group.w.date$Plasma_collect_date)

In [None]:
# substract collect date and diag date
antidepress.oral.group.w.date["Days_Difference"] <- difftime(antidepress.oral.group.w.date$Plasma_collect_date, 
                                                    antidepress.oral.group.w.date$Medication_Date, units = "days")
antidepress.oral.group.w.date$Days_Difference <- as.numeric(antidepress.oral.group.w.date$Days_Difference)
head(antidepress.oral.group.w.date$Days_Difference,10)
# absolute values
antidepress.oral.group.w.date["Days_Difference_Abs"] <- as.numeric(abs(antidepress.oral.group.w.date$Days_Difference))
head(antidepress.oral.group.w.date$Days_Difference_Abs,10)

In [None]:
typeof(antidepress.oral.group.w.date$Days_Difference)
typeof(antidepress.oral.group.w.date$Days_Difference_Abs)

In [None]:
# create a dataset of no of prescriptions within 5 years based on sample collection date
antidepress.oral.5y <- antidepress.oral.group.w.date %>% filter(Days_Difference_Abs <= 1826)
dim(antidepress.oral.5y)

# check how many individuals have ICS prescriptions within the last 5 years based on sample collection date
length(unique(antidepress.oral.5y$Subject_Id)) # 641 individuals

In [None]:
summary(as.numeric(antidepress.oral.5y$Days_Difference))

In [None]:
# Group by Subject_Id and sum of medication date using dplyr
antidepress.oral.5y.counts.per.ind <- antidepress.oral.5y %>% group_by(Subject_Id) %>% 
  summarise(Antidepressants_oral_total_number_of_prescriptions_within_5y = sum(Prescription),
            .groups = 'drop')
dim(antidepress.oral.5y.counts.per.ind)

In [None]:
summary(antidepress.oral.5y.counts.per.ind$Antidepressants_oral_total_number_of_prescriptions_within_5y)
plot(antidepress.oral.5y.counts.per.ind$Antidepressants_oral_total_number_of_prescriptions_within_5y)

## Depression PPV

In [None]:
# PPV data from biobank portal
depression.biobank <- read.csv(file.path(data.dir,'Diabetes_PPV.csv'))
dim(depression.biobank)
head(depression.biobank)

In [None]:
# change name of column
colnames(depression.biobank) <- c('Subject_Id', 'Gender', 'Age', 'Race', 'Ethnicity', 'Vital_status',
                               'Depression_current_or_past_history_PPV_090_Existence_Yes_No',
                               'T1DM_current_or_past_history_PPV_080_Existence_Yes_No',
                               'T1DM_current_or_past_history_PPV_090_Existence_Yes_No',
                               'T2DM_current_or_past_history_PPV_080_Existence_Yes_No',
                               'T2DM_current_or_past_history_PPV_090_Existence_Yes_No')
head(depression.biobank)

In [None]:
table(depression.biobank$Depression_current_or_past_history_PPV_090_Existence_Yes_No)

In [None]:
depression.ppv.yes <- depression.biobank %>% filter(Depression_current_or_past_history_PPV_090_Existence_Yes_No == 'Yes')

In [None]:
depression.ppv.yes %>% filter(Subject_Id %in% antidepress.oral.5y.counts.per.ind$Subject_Id) # 358 of 388

In [None]:
head(antidepress.oral.5y.counts.per.ind)
head(anti.depress.5y.counts.per.ind)

## Antidepressant oral: Count number of prescription of each medication group in 5 years

In [None]:
# create dummy variables for each diag category in antidepress.oral.5y
antidepress.oral.category.no.dup.dummy <- dummy_cols(antidepress.oral.5y,
                   select_columns = "Medication_group")
dim(antidepress.oral.category.no.dup.dummy)

In [None]:
head(antidepress.oral.category.no.dup.dummy)

In [None]:
which(colnames(antidepress.oral.category.no.dup.dummy) == 'Medication_group_Atypical_antidepressants')

In [None]:
# check how many counts in each columns
antidepress.oral.category.cols <- colnames(antidepress.oral.category.no.dup.dummy)[8:ncol(antidepress.oral.category.no.dup.dummy)]
for (i in c(1:length(antidepress.oral.category.cols))){
  print(antidepress.oral.category.cols[i])
  print(table(antidepress.oral.category.no.dup.dummy[, antidepress.oral.category.cols[i]]))
}

In [None]:
# group by Subject_Id and sum all columns
antidepress.oral.category.no.dup.dummy.group <- antidepress.oral.category.no.dup.dummy[,-2:-7] # remove un-neccesary columns
sum.antidepress.oral.category <- antidepress.oral.category.no.dup.dummy.group %>% group_by(Subject_Id) %>% 
                                                summarise(across(everything(), sum), .groups = 'drop') %>%
                                                as.data.frame()
dim(sum.antidepress.oral.category)
head(sum.antidepress.oral.category)

In [None]:
# add column: Yes or No for each medication type
exist.sum.antidepress.oral.category <- sum.antidepress.oral.category
exist.sum.antidepress.oral.category[,-1] <- ifelse(exist.sum.antidepress.oral.category[,-1] > 0, 'Yes', 'No')
colnames(exist.sum.antidepress.oral.category)[-1] <- paste(colnames(exist.sum.antidepress.oral.category[,-1]),'_Existence_5Y_Yes_No', sep = '')

In [None]:
# add suffix count after each medication
colnames(sum.antidepress.oral.category)[-1]<- paste(colnames(sum.antidepress.oral.category[,-1]),"total_prescription_5Y",sep="_")

In [None]:
# merge
sum.antidepress.oral.category.final <- merge(sum.antidepress.oral.category, exist.sum.antidepress.oral.category, by = 'Subject_Id')
dim(sum.antidepress.oral.category.final)

In [None]:
# Remove the "Medication_group_" prefix from all column names
names(sum.antidepress.oral.category.final) <- gsub("Medication_group_", "", names(sum.antidepress.oral.category.final))
dim(sum.antidepress.oral.category.final)

In [None]:
# add column: Yes or No for antidepress.oral prescp total
sum.antidepress.oral.category.final$Any_antidepress.oral_Medication_Existence_Yes_No <- 'Yes' # yes for antidepress.oral prescp
head(sum.antidepress.oral.category.final)

In [None]:
dim(sum.antidepress.oral.category.final)
dim(anti.depress.5y.counts.per.ind)
dim(antidepress.oral.5y.counts.per.ind)
dim(depression.biobank)

In [None]:
# merge files
depression.data <- depression.biobank[,c('Subject_Id', 'Depression_current_or_past_history_PPV_090_Existence_Yes_No')] %>%
left_join(antidepress.oral.5y.counts.per.ind, by = 'Subject_Id') %>%
left_join(anti.depress.5y.counts.per.ind, by = 'Subject_Id') %>% 
left_join(sum.antidepress.oral.category.final, by = 'Subject_Id')
head(depression.data)

In [None]:
dim(depression.data)

In [None]:
met.dir = '...'
write.csv(depression.data, file.path(met.dir, 'Antidepressants_prescription_summary.csv'), row.names = FALSE)