## Import TRE tools

In [1]:
import tretools

In [2]:

from tretools.codelists.codelist import Codelist

ModuleNotFoundError: No module named 'tretools'

In [2]:
from tretools.datasets.raw_dataset import RawDataset
from tretools.datasets.processed_dataset import ProcessedDataset

## Codelists

In [3]:
snomed_codelist = Codelist("Codelists/disease_a_snomed.csv", "SNOMED")

In [4]:
snomed_codelist.codes

{'100000001', '100000002'}

In [5]:
snomed_codelist.codelist_type

'SNOMED'

In [6]:
icd_codelist = Codelist("Codelists/disease_a_icd.csv", "ICD10")

In [8]:
icd_codelist.codes

{'A01', 'A02'}

In [10]:
icd_codelist.data

[{'code': 'A01', 'term': 'Disease A - 1'},
 {'code': 'A02', 'term': 'Disease A - 2'}]

Now lets pass in `add_x_codes` to see the difference. 

In [11]:
icd_codelist_with_x = Codelist("Codelists/disease_a_icd.csv", "ICD10", add_x_codes=True)

In [12]:
icd_codelist_with_x.data

[{'code': 'A01', 'term': 'Disease A - 1'},
 {'code': 'A01X', 'term': 'Disease A - 1'},
 {'code': 'A02', 'term': 'Disease A - 2'},
 {'code': 'A02X', 'term': 'Disease A - 2'}]

## Datasets

In [14]:
raw_data = RawDataset(path="Datasets/primary_care_data.csv", coding_system="SNOMED", dataset_type="primary_care")

In [16]:
raw_data.data.head()

pseudo_nhs_number,clinical_effective_date,original_code,original_term,extra_col
str,str,i64,str,i64
"""84950DE0614A5C…","""2018-10-05 12:…",100000001,"""Disease A - 1""",1
"""84950DE0614A5C…","""05/11/2018""",100000001,"""Disease A - 1""",1
"""84950DE0614A5C…","""12-02-2019""",100000002,"""Disease A - 2""",1
"""84950DE0614A5C…","""2020-05-22T08:…",200000001,"""Disease B - 1""",1
"""73951AB0712D6E…","""""",100000001,"""Disease A - 1""",1


There are different dates, some missing data and an extra column. Let's get rid of these

In [17]:
processed_data = raw_data.process_dataset(
    deduplication_options=["nhs_number", "code", "date"], 
    column_maps={"original_code": "code", "original_term": "term", "clinical_effective_date": "date", "pseudo_nhs_number": "nhs_number"}
)

In [18]:
processed_data.data.head()

nhs_number,code,term,date
str,i64,str,date
"""84950DE0614A5C…",100000002,"""Disease A - 2""",2019-02-12
"""73951AB0712D6E…",100000001,"""Disease A - 1""",2013-06-03
"""84950DE0614A5C…",100000001,"""Disease A - 1""",2018-10-05
"""84950DE0614A5C…",100000001,"""Disease A - 1""",2018-11-05
"""53952EF0503F7F…",200000001,"""Disease B - 1""",2016-08-20


# Phenotype Report

In [19]:
from tretools.phenotype_report.report import PhenotypeReport

In [20]:
report = PhenotypeReport("Disease A")

In [21]:
report.add_count("primary_care", snomed_codelist, processed_data)

In [22]:
report.counts

{'primary_care': {'code': [100000002, 100000001],
  'patient_count': 2,
  'event_count': 4,
  'nhs_numbers': shape: (2, 4)
  ┌───────────────────────────────────┬───────────┬───────────────┬────────────┐
  │ nhs_number                        ┆ code      ┆ term          ┆ date       │
  │ ---                               ┆ ---       ┆ ---           ┆ ---        │
  │ str                               ┆ i64       ┆ str           ┆ date       │
  ╞═══════════════════════════════════╪═══════════╪═══════════════╪════════════╡
  │ 73951AB0712D6E241E8222EDCCF28AE8… ┆ 100000001 ┆ Disease A - 1 ┆ 2013-06-03 │
  │ 84950DE0614A5C241F7223FBCCD27BE8… ┆ 100000001 ┆ Disease A - 1 ┆ 2018-10-05 │
  └───────────────────────────────────┴───────────┴───────────────┴────────────┘,
  'codelist_type': 'SNOMED',
  'dataset_type': 'primary_care',
  'log': ['2023-10-31 14:16:58.522533: There are 7 events in the dataset',
   '2023-10-31 14:16:58.526656: Counting events for codelist primary_care',
   '2023-10-31