# HMGCR Yogev et al. (2023)

Data derived from [Yogev Y, et al. (2023) Limb girdle muscular disease caused by HMGCR mutation and statin myopathy treatable with mevalonolactone. Proc Natl Acad Sci U S A.;120(7):e2217831120. PMID:36745799](https://pubmed.ncbi.nlm.nih.gov/36745799/)

Six individuals of a single consanguineous Bedouin kindred (Fig. 1A) were affected with apparently autosomal recessive progressive limb girdle muscle disease. The disease initially manifested during the fourth decade of life with pain on exertion, followed by muscle fatigue and weakness, affecting mostly the proximal and axial muscles, and culminating with involvement of respiratory muscles. 

g.5:75359992G>A (GRCh38/hg38); NM_000859.3:c.2465G>A; p.(G822D) in HMGCR

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import HTML, display
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import importlib.metadata
__version__ = importlib.metadata.version("pyphetools")
print(f"Using pyphetools version {__version__}")

Using pyphetools version 0.9.26


In [2]:
PMID="PMID:36745799"
title = "Limb girdle muscular disease caused by HMGCR mutation and statin myopathy treatable with mevalonolactone"
cite = Citation(pmid=PMID, title=title)
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", citation=cite)
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2024-01-16


In [3]:
df = pd.read_excel("input/HMGCR_Yogev_HMGCR.xlsx")
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft['individual_id'] = dft.index  # Set the new column 'patient_id' to be identical to the contents of the index
dft.head() # check the transposed table

INDIVIDUAL,SEX,AGE AT EXAMINATION,AGE AT ONSET,PROXIMAL STRENGTH UPPER LIMB,PROXIMAL STRENGTH LOWER LIMB,ATROPHY UPPER LIMB,ATROPHY LOWER LIMB,DEEP TENDON REFLEXES,PAIN ON EXERTION,AMBULATORY,...,TRIGLYCERIDES (RECOMMENDED<150MG/DL),HDL (RECOMMENDED >60MG/DL),LDL (RECOMMENDED <100MG/DL),VLDL,FASTING BLOOD SUGAR,ABNORMAL BRAIN IMAGING,MYOPATHIC CHANGERS IN EMG,"MUSCLE BIOPSY-NORMAL DYSTROPHIN, NADH, SDH, COX, ATPASES, ELECTRON MICROSCOPY",COMORBIDITIES,individual_id
V:2,F,49,P31Y,0/5,0/5,Marked,Marked,Absent,+,-,...,87.0,49,80.0,17,390,-,+,Mild type 2 fiber deficiency,"Insulin-dependent diabetes mellitus, onset age 19",V:2
V:5,M,58,P39Y,3/5,2/5,Marked,Marked,Diminished,+,-,...,123.0,49,87.0,25,123,-,+,+,"COPD, diastolic dysfunction, ICRBBB, lymphocytosis",V:5
V:8,M,37,P24Y,5/5,5/5,-,-,+,+,+,...,95.5,38,77.0,19,127,,,,-,V:8
V:9,M,42,P33Y,5/5,4/5,-,-,+,+,+,...,108.0,45,67.0,22,111,-,,,ICRBBB,V:9
V:12,F,51,P31Y,2/5,2/5,Evident,Evident,Diminished,+,-,...,149.0,55,82.5,30,124,-,+,+,single kidney,V:12


In [4]:
generator = SimpleColumnMapperGenerator(df=dft, observed="+", excluded="-", hpo_cr=hpo_cr)
column_mapper_list = generator.try_mapping_columns()
display(HTML(generator.to_html()))

Result,Columns
Mapped,RESPIRATORY DIFFICULTIES; DYSPHAGIA; ABNORMAL BRAIN IMAGING
Unmapped,"SEX; AGE AT EXAMINATION; AGE AT ONSET; PROXIMAL STRENGTH UPPER LIMB; PROXIMAL STRENGTH LOWER LIMB; ATROPHY UPPER LIMB; ATROPHY LOWER LIMB; DEEP TENDON REFLEXES; PAIN ON EXERTION; AMBULATORY; ABULATORY MOBILITY RESTRICTION; ECHOCARDIOGRAPHY; CPK (reference 20-180 U/L); MAXIMAL TROPONIN T (0-14NG/L); CREATININE; AST (REFERENCE 0-35 U/L); ALT (REFERENCE 0-45 U/L); ALKALINE PHOSPHATASE (REFERENCE 30-120 U/L); TOTAL CHOLESTEROL (RECOMMENDED <200 MG/DL); TRIGLYCERIDES (RECOMMENDED<150MG/DL); HDL (RECOMMENDED >60MG/DL); LDL (RECOMMENDED <100MG/DL); VLDL; FASTING BLOOD SUGAR; MYOPATHIC CHANGERS IN EMG; MUSCLE BIOPSY-NORMAL DYSTROPHIN, NADH, SDH, COX, ATPASES, ELECTRON MICROSCOPY; COMORBIDITIES; individual_id"


In [7]:
#res = OptionColumnMapper.autoformat(df=dft, concept_recognizer=hpo_cr, omit_columns=generator.get_mapped_columns())
#print(res)

In [9]:
proximal_strength_upper_limb_d = {'0/5': 'Proximal muscle weakness in upper limbs',
 '3/5': 'Proximal muscle weakness in upper limbs',
 '2/5': 'Proximal muscle weakness in upper limbs'}
excluded = {'5/5': 'Proximal muscle weakness in upper limbs',}
proximalMapper = OptionColumnMapper(column_name="PROXIMAL STRENGTH UPPER LIMB", 
                                    concept_recognizer=hpo_cr, 
                                    option_d=proximal_strength_upper_limb_d,
                                   excluded_d=excluded)
column_mapper_list.append(proximalMapper)
proximalMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""0/5"" -> HP: Proximal muscle weakness in upper limbs (HP:0008997) (observed)",1
1,"original value: ""3/5"" -> HP: Proximal muscle weakness in upper limbs (HP:0008997) (observed)",2
2,"original value: ""5/5"" -> HP: Proximal muscle weakness in upper limbs (HP:0008997) (excluded)",2
3,"original value: ""2/5"" -> HP: Proximal muscle weakness in upper limbs (HP:0008997) (observed)",1


In [11]:
proximal_strength_lower_limb_d = {'0/5': 'Proximal muscle weakness in lower limbs',
 '2/5': 'Proximal muscle weakness in lower limbs',
 '4/5': 'Proximal muscle weakness in lower limbs'}
excluded = { '5/5': 'Proximal muscle weakness in lower limbs',}
proximal_strength_lower_limbMapper = OptionColumnMapper(column_name="PROXIMAL STRENGTH LOWER LIMB", 
                                                        concept_recognizer=hpo_cr, 
                                                        option_d=proximal_strength_lower_limb_d,
                                                       excluded_d=excluded)
column_mapper_list.append(proximal_strength_lower_limbMapper)
proximal_strength_lower_limbMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""0/5"" -> HP: Proximal muscle weakness in lower limbs (HP:0008994) (observed)",1
1,"original value: ""2/5"" -> HP: Proximal muscle weakness in lower limbs (HP:0008994) (observed)",2
2,"original value: ""5/5"" -> HP: Proximal muscle weakness in lower limbs (HP:0008994) (excluded)",1
3,"original value: ""4/5"" -> HP: Proximal muscle weakness in lower limbs (HP:0008994) (observed)",2


In [13]:
atrophy_upper_limb_d = {'Marked': 'Upper limb amyotrophy',
 'Evident': 'Upper limb amyotrophy'}
atrophy_upper_limbMapper = OptionColumnMapper(column_name="ATROPHY UPPER LIMB", concept_recognizer=hpo_cr, option_d=atrophy_upper_limb_d)
column_mapper_list.append(atrophy_upper_limbMapper)
atrophy_upper_limbMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""Marked"" -> HP: Upper limb amyotrophy (HP:0009129) (observed)",2
1,"original value: ""Evident"" -> HP: Upper limb amyotrophy (HP:0009129) (observed)",2


In [14]:
atrophy_lower_limb_d = {'Marked': 'Lower limb amyotrophy',
 'Evident': 'Lower limb amyotrophy'}
atrophy_lower_limbMapper = OptionColumnMapper(column_name="ATROPHY LOWER LIMB", concept_recognizer=hpo_cr, option_d=atrophy_lower_limb_d)
column_mapper_list.append(atrophy_lower_limbMapper)
atrophy_lower_limbMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""Marked"" -> HP: Lower limb amyotrophy (HP:0007210) (observed)",2
1,"original value: ""Evident"" -> HP: Lower limb amyotrophy (HP:0007210) (observed)",1


In [16]:
deep_tendon_reflexes_d = {'Absent': 'Areflexia',
 'Diminished': 'Hyporeflexia'}
deep_tendon_reflexesMapper = OptionColumnMapper(column_name="DEEP TENDON REFLEXES", concept_recognizer=hpo_cr, option_d=deep_tendon_reflexes_d)
column_mapper_list.append(deep_tendon_reflexesMapper)
deep_tendon_reflexesMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""Absent"" -> HP: Areflexia (HP:0001284) (observed)",1
1,"original value: ""Diminished"" -> HP: Hyporeflexia (HP:0001265) (observed)",3


In [19]:
pain_on_exertionMapper = SimpleColumnMapper(column_name="PAIN ON EXERTION", 
                                            hpo_id="HP:0003738",
                                            hpo_label="Exercise-induced myalgia",
                                            observed="+",
                                            excluded="-")
OptionColumnMapper(column_name="PAIN ON EXERTION", concept_recognizer=hpo_cr, option_d=pain_on_exertion_d)
column_mapper_list.append(pain_on_exertionMapper)
pain_on_exertionMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""+"" -> HP: Exercise-induced myalgia (HP:0003738) (observed)",6


In [22]:
# dft["AMBULATORY"] "+" means can still ambulate, "-" is loss
# Loss of ambulation HP:0002505
ambulatoryMapper = SimpleColumnMapper(column_name="AMBULATORY", 
                                            hpo_id="HP:0002505",
                                            hpo_label="Loss of ambulation",
                                            observed="+",
                                            excluded="-")
column_mapper_list.append(ambulatoryMapper)
ambulatoryMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""-"" -> HP: Loss of ambulation (HP:0002505) (excluded)",3
1,"original value: ""+"" -> HP: Loss of ambulation (HP:0002505) (observed)",3


In [25]:
respiratory_d = {'ventilated through tracheostomy': 'Respiratory failure requiring assisted ventilation',
                             '+':"Respiratory insufficiency"}
excluded = {"-" :"Respiratory insufficiency"}
respiratoryMapper = OptionColumnMapper(column_name="RESPIRATORY DIFFICULTIES", concept_recognizer=hpo_cr, option_d=respiratory_d, excluded_d=excluded)
column_mapper_list.append(respiratoryMapper)
respiratoryMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""ventilated through tracheostomy"" -> HP: Respiratory failure requiring assisted ventilation (HP:0004887) (observed)",1
1,"original value: ""+"" -> HP: Respiratory insufficiency (HP:0002093) (observed)",2
2,"original value: ""-"" -> HP: Respiratory insufficiency (HP:0002093) (excluded)",3


In [26]:
dft["DYSPHAGIA"]

V:2     -
V:5     -
V:8     -
V:9     -
V:12    -
V:13    -
Name: DYSPHAGIA, dtype: object

In [27]:
dysphagia_d = {}
dysphagiaMapper = SimpleColumnMapper(column_name="DYSPHAGIA", 
                                            hpo_id="HP:0002015",
                                            hpo_label="Dysphagia",
                                            observed="+",
                                            excluded="-")
column_mapper_list.append(dysphagiaMapper)
dysphagiaMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""-"" -> HP: Dysphagia (HP:0002015) (excluded)",6


In [31]:
echocardiography_d = {
 'Mild diastolic dysfunction': 'Left ventricular diastolic dysfunction',
}
excluded = {'Normal': 'Left ventricular diastolic dysfunction'}
echocardiographyMapper = OptionColumnMapper(column_name="ECHOCARDIOGRAPHY", 
                                            concept_recognizer=hpo_cr, 
                                            option_d=echocardiography_d, 
                                            excluded_d=excluded)
column_mapper_list.append(echocardiographyMapper)
echocardiographyMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""Normal"" -> HP: Left ventricular diastolic dysfunction (HP:0025168) (excluded)",3
1,"original value: ""Mild diastolic dysfunction"" -> HP: Left ventricular diastolic dysfunction (HP:0025168) (observed)",1


In [34]:
#Elevated circulating creatine kinase concentration HP:0003236
cpk_d = { 
 '1501': 'Elevated circulating creatine kinase concentration',
 '9065': 'Elevated circulating creatine kinase concentration',
 '477': 'Elevated circulating creatine kinase concentration',
 '542': 'Elevated circulating creatine kinase concentration',
 '3797': 'Elevated circulating creatine kinase concentration'}
cpk_Mapper = ThresholdedColumnMapper(column_name="CPK (reference 20-180 U/L)", 
                                    hpo_id="HP:0003236",
                                    hpo_label="Elevated circulating creatine kinase concentration",
                                    threshold=180,
                                    call_if_above=True)
column_mapper_list.append(cpk_Mapper)
cpk_Mapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Elevated circulating creatine kinase concentration (HP:0003236): excluded,1
1,Elevated circulating creatine kinase concentration (HP:0003236): observed,5


In [36]:
#Increased circulating troponin T concentration HP:0410174
maximal_troponin_t_d = {'32.06': 'PLACEHOLDER',
 '18.59': 'PLACEHOLDER',
 'nan': 'PLACEHOLDER',
 '64.82': 'PLACEHOLDER',
 '23.39': 'PLACEHOLDER'}
maximal_troponin_tMapper = ThresholdedColumnMapper(column_name="MAXIMAL TROPONIN T (0-14NG/L)", 
                                    hpo_id="HP:0410174",
                                    hpo_label="Elevated circulating troponin T concentration",
                                    threshold=14,
                                    call_if_above=True)
column_mapper_list.append(maximal_troponin_tMapper)
maximal_troponin_tMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Elevated circulating troponin T concentration (HP:0410174): observed,4
1,Elevated circulating troponin T concentration (HP:0410174): excluded,2


In [39]:
# Elevated circulating aspartate aminotransferase concentration HP:0031956
astMapper = ThresholdedColumnMapper(column_name="AST (REFERENCE 0-35 U/L)", 
                                    hpo_id="HP:0031956",
                                    hpo_label="Elevated circulating aspartate aminotransferase concentration",
                                    threshold=35,
                                    call_if_above=True)

column_mapper_list.append(astMapper)
astMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Elevated circulating aspartate aminotransferase concentration (HP:0031956): excluded,2
1,Elevated circulating aspartate aminotransferase concentration (HP:0031956): observed,4


In [41]:
#Elevated circulating alanine aminotransferase concentration HP:0031964
altMapper = ThresholdedColumnMapper(column_name="ALT (REFERENCE 0-45 U/L)", 
                                    hpo_id="HP:0031964",
                                    hpo_label="Elevated circulating alanine aminotransferase concentration",
                                    threshold=45,
                                    call_if_above=True)
column_mapper_list.append(altMapper)
altMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Elevated circulating alanine aminotransferase concentration (HP:0031964): excluded,3
1,Elevated circulating alanine aminotransferase concentration (HP:0031964): observed,3


In [43]:
#Elevated circulating alkaline phosphatase concentration
alkaline_phosphataseMapper = ThresholdedColumnMapper(column_name="ALKALINE PHOSPHATASE (REFERENCE 30-120 U/L)", 
                                    hpo_id="HP:0031964",
                                    hpo_label="Elevated circulating alanine aminotransferase concentration",
                                    threshold=120,
                                    call_if_above=True)
column_mapper_list.append(alkaline_phosphataseMapper)
alkaline_phosphataseMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Elevated circulating alanine aminotransferase concentration (HP:0031964): observed,1
1,Elevated circulating alanine aminotransferase concentration (HP:0031964): excluded,5


In [None]:
total_cholesterol_(recommended_<200_mg_dl)_d = {'146': 'PLACEHOLDER',
 '159': 'PLACEHOLDER',
 '128': 'PLACEHOLDER',
 '136': 'PLACEHOLDER',
 '171': 'PLACEHOLDER'}
total_cholesterol_(recommended_<200_mg_dl)Mapper = OptionColumnMapper(column_name="TOTAL CHOLESTEROL (RECOMMENDED <200 MG/DL)", concept_recognizer=hpo_cr, option_d=total_cholesterol_(recommended_<200_mg_dl)_d)
column_mapper_list.append(total_cholesterol_(recommended_<200_mg_dl)Mapper)
total_cholesterol_(recommended_<200_mg_dl)Mapper.preview_column(df)
