### UKB phenotyping of patients with serious ADRs and WES available

In [None]:
%load_ext autoreload
%autoreload 2

# wide notebook display in browser

from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

display(HTML("<style>.container { width:100% !important; }</style>"))

import hail as hl

from analysis.utils.annotations import (
    load_annotations,
    annotate_adr_patients,
    annotate_date_codes_dict,
    annotate_intent
)
from analysis.ukb200.adr.kinship_pca_matches import remove_related

from bokeh.io import show, output_notebook
from bokeh.layouts import gridplot
output_notebook()

### 1. Load annotations fields from UKB

In [None]:
ht = load_annotations('data/ukb-annotations.ht')
codes = hl.import_table('raw/dataset/data-codings/coding19.tsv', delimiter='\t')

### 2. Annotate the table with diagnostic codes:

additionaly:

    - only samples that have WES are kept [note: should later be changed for WGS]
    
    - withrawed samples are removed

In [None]:
ht = annotate_adr_patients(annots = ht, overwrite=True)

#### How many controls vs cases?

In [None]:
ht.aggregate(hl.agg.counter(ht.group))

### 3. Annotate the table with a dictionary containing dates and diagnostic codes

Here two fields are added: date_code_zip and date_code_dict. These are used to manually explore the data.

In [None]:
ht = annotate_date_codes_dict(ht)

### 4. Annotate the table with antidepressant toxicity relevant phenotypes:

#### Intent column:

x 'therapy' - with therapeutic dose, no toxicity codes for ADs NO history od self-harm with drugs

x 'accidental' - accidental toxicity may be mixed with other drugs NO history od self-harm with drugs

x 'intentional' - any toxicity codes with any self harm history with drugs

#### Self-harm history:

x 'no_self_harm' - no codes related with self-harm (Z915, X6, X7, X80, X81, X82, X83, X84)

x 'with_drugs' - any code with 'X6'

x 'without_drugs' - other self harm codes

#### Two other columns based on most common codes:

x mental_health_inpatient:

    # depression F329, F339, F322, F321, F323, F331, F334, F251, F315, F338, F328, F313, F330, F316, F332
    
    # anxiety  F419, F410, F411, F418, F606, F408, F409, F413
    
    # both F412 or depression & anxiety codes

x drug_abuse:

    # Z864
    
#### Death column:
 
     # from ADR
     
     # different cause
     
     # still allive


In [None]:
ht = annotate_intent(ht)
ht.aggregate(hl.agg.group_by(ht.intention, hl.agg.counter(ht.self_harm)))

In [None]:
ht.aggregate(hl.agg.group_by(ht.group, hl.agg.counter(ht.death)))

### 5. Table export after the initial phenotyping step:

In [None]:
ht.write('data/hospital-codes-phenotyped.ht')

### 6. Remove related individuals from the final table

In [None]:
ht = hl.read_table('data/hospital-codes-phenotyped.ht')

In [None]:
ht = remove_related(ht, kin_cut=0.125)

#### export final annotations for burden analysis and save the table

In [None]:
ht.write('full-annots-for-burden.ht')