In [15]:
from datetime import datetime
import ply.lex as lex

In [16]:
# List of token names.
tokens = [
    'StartTagClose',
    'XmlDeclStartTag',
    'XmlDeclEndTag',
    'DocTypeDeclStartTag',
    'HealthTopicsStartTagOpen',
    'HealthTopicsEndTag',
    'HealthTopicStartTagOpen',
    'HealthTopicEndTag',
    'AlsoCalledStartTagOpen',
    'AlsoCalledEndTag',
    'FullSummaryStartTagOpen',
    'FullSummaryEndTag',
    'GroupStartTagOpen',
    'GroupEndTag',
    'LanguageMappedTopicStartTagOpen',
    'LanguageMappedTopicEndTag',
    'MeshHeadingStartTagOpen',
    'MeshHeadingEndTag',
    'DescriptorStartTagOpen',
    'DescriptorEndTag',
    'OtherLanguageStartTagOpen',
    'OtherLanguageEndTag',
    'PrimaryInstituteStartTagOpen',
    'PrimaryInstituteEndTag',
    'SeeReferenceStartTagOpen',
    'SeeReferenceEndTag',
    'SiteStartTagOpen',
    'SiteEndTag',
    'InformationCategoryStartTagOpen',
    'InformationCategoryEndTag',
    'OrganizationStartTagOpen',
    'OrganizationEndTag',
    'StandardDescriptionStartTagOpen',
    'StandardDescriptionEndTag',
    'RelatedTopicStartTagOpen',
    'RelatedTopicEndTag',
    'DocTypeDeclName',
    'DocTypeDeclExternalId',
    'VersionKey',
    'EncodingKey',
    'DateGeneratedKey',
    'TotalKey',
    'IdKey',
    'DateCreatedKey',
    'LanguageKey',
    'TitleKey',
    'UrlKey',
    'MetaDescKey',
    'VernacularNameKey',
    'LanguageMappedUrlKey',
    'Timestamp',
    'Integer',
    'Date',
    'Language',
    'Uri',
    'String',
    'Text'
]

# Regular expression rules for simple tokens
t_HealthTopicsStartTagOpen = r'<health-topics'
t_HealthTopicsEndTag = r'</health-topics>'
t_HealthTopicStartTagOpen = r'<health-topic'
t_HealthTopicEndTag = r'</health-topic>'
t_AlsoCalledStartTagOpen = r'<also-called'
t_AlsoCalledEndTag = r'</also-called>'
t_FullSummaryStartTagOpen = r'<full-summary'
t_FullSummaryEndTag = r'</full-summary>'
t_GroupStartTagOpen = r'<group'
t_GroupEndTag = r'</group>'
t_LanguageMappedTopicStartTagOpen = r'<language-mapped-topic'
t_LanguageMappedTopicEndTag = r'</language-mapped-topic>'
t_MeshHeadingStartTagOpen = r'<mesh-heading'
t_MeshHeadingEndTag = r'</mesh-heading>'
t_DescriptorStartTagOpen = r'<descriptor'
t_DescriptorEndTag = r'</descriptor>'
t_OtherLanguageStartTagOpen = r'<other-language'
t_OtherLanguageEndTag = r'</other-language>'
t_PrimaryInstituteStartTagOpen = r'<primary-institute'
t_PrimaryInstituteEndTag = r'</primary-institute>'
t_SeeReferenceStartTagOpen = r'<see-reference'
t_SeeReferenceEndTag = r'</see-reference>'
t_SiteStartTagOpen = r'<site'
t_SiteEndTag = r'</site>'
t_InformationCategoryStartTagOpen = r'<information-category'
t_InformationCategoryEndTag = r'</information-category>'
t_OrganizationStartTagOpen = r'<organization'
t_OrganizationEndTag = r'</organization>'
t_StandardDescriptionStartTagOpen = r'<standard-description'
t_StandardDescriptionEndTag = r'</standard-description>'
t_RelatedTopicStartTagOpen = r'<related-topic'
t_RelatedTopicEndTag = r'</related-topic>'

# Regular expression for Text
t_Text = r'[\w ¿?!:;,&=\.\-\'\"%\*\(\)/\t]+'

# Prolog tags
def t_XmlDeclStartTag(t):
    r'<\?xml'
    return t

def t_XmlDeclEndTag(t):
    r'\?>'
    return t

def t_StartTagClose(t):
    r'>'
    return t

def t_DocTypeDeclStartTag(t):
    r'<!DOCTYPE'
    return t

def t_DocTypeDeclName(t):
    r' health-topics'
    return t

def t_DocTypeDeclExternalId(t):
    r'PUBLIC\s+"-//NLM//DTD\s+health-topics\s+//EN"\s+"https://medlineplus.gov/xml/mplus_topics.dtd"'
    return t

# Keys of attributes
def t_VersionKey(t):
    r'version='
    return t

def t_EncodingKey(t):
    r'encoding='
    return t

def t_DateGeneratedKey(t):
    r'date-generated='
    return t

def t_TotalKey(t):
    r'total='
    return t

def t_IdKey(t):
    r'id='
    return t

def t_DateCreatedKey(t):
    r'date-created='
    return t

def t_LanguageKey(t):
    r'language='
    return t

def t_TitleKey(t):
    r'title='
    return t

def t_UrlKey(t):
    r'url='
    return t

def t_MetaDescKey(t):
    r'meta-desc='
    return t

def t_VernacularNameKey(t):
    r'vernacular-name='
    return t

def t_LanguageMappedUrlKey(t):
    r'language-mapped-url='
    return t

# Value types of attributes
def t_Date(t):
    r'"\d{2}/\d{2}/\d{4}"'
    value = t.value
    t.value  = datetime.strptime(value.replace('"', ''), "%m/%d/%Y").date()
    return t

def t_Timestamp(t):
    r'"\d{2}/\d{2}/\d{4}\s\d{2}:\d{2}:\d{2}"'
    value = t.value
    t.value  = datetime.strptime(value.replace('"', ''), "%m/%d/%Y %H:%M:%S")
    return t

def t_Integer(t):
    r'"\d+"'
    value = t.value
    t.value = int(value.replace('"', ''))
    return t

def t_Language(t):
    r'"(English|Spanish)"'
    t.value = t.value.replace('"', '')
    return t

def t_Uri(t):
    r'"(?:https?):\/\/[^\s/$.?#].[^\s"<>]*[^"<>]*"'
    t.value = t.value.replace('"', '')
    return t

def t_String(t):
    r'"[^"]*"'
    t.value = t.value.replace('"', '')
    return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'

# Error handling rule
def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)

__file__ = "lexer.ipynb"
lexer = lex.lex()

with open('../data/mplus_topics.xml', 'r', encoding='utf-8') as file:
    data = file.read()

# Give the lexer some input
lexer.input(data)

# Tokenize
for i in range(0, 248):
    tok = lexer.token()
    if not tok:
        break  # No more input
    display(tok)

# while True:
#     tok = lexer.token()
#     if not tok:
#         break  # No more input
#     display(tok)

LexToken(XmlDeclStartTag,'<?xml',1,0)

LexToken(VersionKey,'version=',1,6)

LexToken(String,'1.0',1,14)

LexToken(EncodingKey,'encoding=',1,20)

LexToken(String,'UTF-8',1,29)

LexToken(XmlDeclEndTag,'?>',1,36)

LexToken(DocTypeDeclStartTag,'<!DOCTYPE',2,39)

LexToken(DocTypeDeclName,'health-topics',2,49)

LexToken(DocTypeDeclExternalId,'PUBLIC "-//NLM//DTD health-topics //EN" "https://medlineplus.gov/xml/mplus_topics.dtd"',2,63)

LexToken(StartTagClose,'>',2,149)

LexToken(HealthTopicsStartTagOpen,'<health-topics',3,151)

LexToken(TotalKey,'total=',3,166)

LexToken(Integer,2044,3,172)

LexToken(DateGeneratedKey,'date-generated=',3,179)

LexToken(Timestamp,datetime.datetime(2024, 8, 30, 2, 30, 25),3,194)

LexToken(StartTagClose,'>',3,215)

LexToken(HealthTopicStartTagOpen,'<health-topic',4,218)

LexToken(MetaDescKey,'meta-desc=',4,232)

LexToken(String,'If you are being tested for Type 2 diabetes, your doctor gives you an A1C test. The test is also used to monitor your A1C levels.',4,242)

LexToken(TitleKey,'title=',4,374)

LexToken(String,'A1C',4,380)

LexToken(UrlKey,'url=',4,386)

LexToken(Uri,'https://medlineplus.gov/a1c.html',4,390)

LexToken(IdKey,'id=',4,425)

LexToken(Integer,6308,4,428)

LexToken(LanguageKey,'language=',4,435)

LexToken(Language,'English',4,444)

LexToken(DateCreatedKey,'date-created=',4,454)

LexToken(Date,datetime.date(2015, 12, 22),4,467)

LexToken(StartTagClose,'>',4,479)

LexToken(AlsoCalledStartTagOpen,'<also-called',5,483)

LexToken(StartTagClose,'>',5,495)

LexToken(Text,'Glycohemoglobin',5,496)

LexToken(AlsoCalledEndTag,'</also-called>',5,511)

LexToken(AlsoCalledStartTagOpen,'<also-called',6,528)

LexToken(StartTagClose,'>',6,540)

LexToken(Text,'HbA1C',6,541)

LexToken(AlsoCalledEndTag,'</also-called>',6,546)

LexToken(AlsoCalledStartTagOpen,'<also-called',7,563)

LexToken(StartTagClose,'>',7,575)

LexToken(Text,'Hemoglobin A1C test',7,576)

LexToken(AlsoCalledEndTag,'</also-called>',7,595)

LexToken(FullSummaryStartTagOpen,'<full-summary',8,612)

LexToken(StartTagClose,'>',8,625)

LexToken(Text,'&lt;p&gt;A1C is a blood test for &lt;a href="https://medlineplus.gov/diabetestype2.html"&gt;type 2 diabetes&lt;/a&gt; and &lt;a href="https://medlineplus.gov/prediabetes.html"&gt;prediabetes&lt;/a&gt;. It measures your average blood glucose, or &lt;a href="https://medlineplus.gov/bloodglucose.html"&gt;blood sugar&lt;/a&gt;, level over the past 3 months. Doctors may use the A1C alone or in combination with other diabetes tests to make a diagnosis. They also use the A1C to see how well you are managing your diabetes. This test is different from the blood sugar checks that people with diabetes do every day.&lt;/p&gt;',8,626)

LexToken(Text,'&lt;p&gt;Your A1C test result is given in percentages. The higher the percentage, the higher your blood sugar levels have been:&lt;/p&gt;',10,1249)

LexToken(Text,'&lt;ul&gt;',11,1387)

LexToken(Text,'&lt;li&gt;A normal A1C level is below 5.7%&lt;/li&gt;',12,1398)

LexToken(Text,'&lt;li&gt;Prediabetes is between 5.7 to 6.4%. Having prediabetes is a risk factor for getting type 2 diabetes. People with prediabetes may need retests every year.&lt;/li&gt;',13,1452)

LexToken(Text,'&lt;li&gt;Type 2 diabetes is above 6.5%&lt;/li&gt;',14,1627)

LexToken(Text,'&lt;li&gt;If you have diabetes, you should have the A1C test at least twice a year. The A1C goal for many people with diabetes is below 7. It may be different for you. Ask what your goal should be. If your A1C result is too high, you may need to change your diabetes care plan.&lt;/li&gt;',15,1678)

LexToken(Text,'&lt;/ul&gt;',16,1967)

LexToken(Text,'&lt;p class=""&gt;NIH: National Institute of Diabetes and Digestive and Kidney Diseases&lt;/p&gt;',18,1980)

LexToken(FullSummaryEndTag,'</full-summary>',18,2077)

LexToken(GroupStartTagOpen,'<group',19,2095)

LexToken(UrlKey,'url=',19,2102)

LexToken(Uri,'https://medlineplus.gov/diagnostictests.html',19,2106)

LexToken(IdKey,'id=',19,2153)

LexToken(Integer,25,19,2156)

LexToken(StartTagClose,'>',19,2160)

LexToken(Text,'Diagnostic Tests',19,2161)

LexToken(GroupEndTag,'</group>',19,2177)

LexToken(GroupStartTagOpen,'<group',20,2188)

LexToken(UrlKey,'url=',20,2195)

LexToken(Uri,'https://medlineplus.gov/diabetesmellitus.html',20,2199)

LexToken(IdKey,'id=',20,2247)

LexToken(Integer,45,20,2250)

LexToken(StartTagClose,'>',20,2254)

LexToken(Text,'Diabetes Mellitus',20,2255)

LexToken(GroupEndTag,'</group>',20,2272)

LexToken(LanguageMappedTopicStartTagOpen,'<language-mapped-topic',21,2283)

LexToken(UrlKey,'url=',21,2306)

LexToken(Uri,'https://medlineplus.gov/spanish/a1c.html',21,2310)

LexToken(IdKey,'id=',21,2353)

LexToken(Integer,6309,21,2356)

LexToken(LanguageKey,'language=',21,2363)

LexToken(Language,'Spanish',21,2372)

LexToken(StartTagClose,'>',21,2381)

LexToken(Text,'Prueba de hemoglobina glicosilada (HbA1c)',21,2382)

LexToken(LanguageMappedTopicEndTag,'</language-mapped-topic>',21,2423)

LexToken(MeshHeadingStartTagOpen,'<mesh-heading',22,2450)

LexToken(StartTagClose,'>',22,2463)

LexToken(DescriptorStartTagOpen,'<descriptor',23,2468)

LexToken(IdKey,'id=',23,2480)

LexToken(String,'D006442',23,2483)

LexToken(StartTagClose,'>',23,2492)

LexToken(Text,'Glycated Hemoglobin',23,2493)

LexToken(DescriptorEndTag,'</descriptor>',23,2512)

LexToken(MeshHeadingEndTag,'</mesh-heading>',24,2528)

LexToken(OtherLanguageStartTagOpen,'<other-language',25,2546)

LexToken(VernacularNameKey,'vernacular-name=',25,2562)

LexToken(String,'español',25,2578)

LexToken(UrlKey,'url=',25,2588)

LexToken(Uri,'https://medlineplus.gov/spanish/a1c.html',25,2592)

LexToken(StartTagClose,'>',25,2634)

LexToken(Text,'Spanish',25,2635)

LexToken(OtherLanguageEndTag,'</other-language>',25,2642)

LexToken(PrimaryInstituteStartTagOpen,'<primary-institute',26,2662)

LexToken(UrlKey,'url=',26,2681)

LexToken(Uri,'https://www.niddk.nih.gov',26,2685)

LexToken(StartTagClose,'>',26,2712)

LexToken(Text,'National Institute of Diabetes and Digestive and Kidney Diseases',26,2713)

LexToken(PrimaryInstituteEndTag,'</primary-institute>',26,2777)

LexToken(SeeReferenceStartTagOpen,'<see-reference',27,2800)

LexToken(StartTagClose,'>',27,2814)

LexToken(Text,'Hemoglobin A1c',27,2815)

LexToken(SeeReferenceEndTag,'</see-reference>',27,2829)

LexToken(SiteStartTagOpen,'<site',28,2848)

LexToken(TitleKey,'title=',28,2854)

LexToken(String,'A1C and eAG',28,2860)

LexToken(UrlKey,'url=',28,2874)

LexToken(Uri,'https://diabetes.org/a1c-eag-conversion-calculator',28,2878)

LexToken(LanguageMappedUrlKey,'language-mapped-url=',28,2931)

LexToken(Uri,'http://www.diabetes.org/es/vivir-con-diabetes/tratamiento-y-cuidado/el-control-de-la-glucosa-en-la-sangre/a1c-y-eag.html?loc=lwd-es-slabnav',28,2951)

LexToken(StartTagClose,'>',28,3092)

LexToken(InformationCategoryStartTagOpen,'<information-category',29,3097)

LexToken(StartTagClose,'>',29,3118)

LexToken(Text,'Learn More',29,3119)

LexToken(InformationCategoryEndTag,'</information-category>',29,3129)

LexToken(OrganizationStartTagOpen,'<organization',30,3156)

LexToken(StartTagClose,'>',30,3169)

LexToken(Text,'American Diabetes Association',30,3170)

LexToken(OrganizationEndTag,'</organization>',30,3199)

LexToken(SiteEndTag,'</site>',31,3217)

LexToken(SiteStartTagOpen,'<site',32,3227)

LexToken(TitleKey,'title=',32,3233)

LexToken(String,'A1C test',32,3239)

LexToken(UrlKey,'url=',32,3250)

LexToken(Uri,'https://medlineplus.gov/ency/article/003640.htm',32,3254)

LexToken(LanguageMappedUrlKey,'language-mapped-url=',32,3304)

LexToken(Uri,'https://medlineplus.gov/spanish/ency/article/003640.htm',32,3324)

LexToken(StartTagClose,'>',32,3381)

LexToken(InformationCategoryStartTagOpen,'<information-category',33,3386)

LexToken(StartTagClose,'>',33,3407)

LexToken(Text,'Patient Handouts',33,3408)

LexToken(InformationCategoryEndTag,'</information-category>',33,3424)

LexToken(OrganizationStartTagOpen,'<organization',34,3451)

LexToken(StartTagClose,'>',34,3464)

LexToken(Text,'Medical Encyclopedia',34,3465)

LexToken(OrganizationEndTag,'</organization>',34,3485)

LexToken(SiteEndTag,'</site>',35,3503)

LexToken(SiteStartTagOpen,'<site',36,3513)

LexToken(TitleKey,'title=',36,3519)

LexToken(String,'A1C Test and Diabetes',36,3525)

LexToken(UrlKey,'url=',36,3549)

LexToken(Uri,'https://www.niddk.nih.gov/health-information/diagnostic-tests/a1c-test',36,3553)

LexToken(LanguageMappedUrlKey,'language-mapped-url=',36,3626)

LexToken(Uri,'https://www.niddk.nih.gov/health-information/informacion-de-la-salud/pruebas-diagnosticas/prueba-a1c-diabetes',36,3646)

LexToken(StartTagClose,'>',36,3757)

LexToken(InformationCategoryStartTagOpen,'<information-category',37,3762)

LexToken(StartTagClose,'>',37,3783)

LexToken(Text,'Learn More',37,3784)

LexToken(InformationCategoryEndTag,'</information-category>',37,3794)

LexToken(OrganizationStartTagOpen,'<organization',38,3821)

LexToken(StartTagClose,'>',38,3834)

LexToken(Text,'National Institute of Diabetes and Digestive and Kidney Diseases',38,3835)

LexToken(OrganizationEndTag,'</organization>',38,3899)

LexToken(StandardDescriptionStartTagOpen,'<standard-description',39,3918)

LexToken(StartTagClose,'>',39,3939)

LexToken(Text,'NIH',39,3940)

LexToken(StandardDescriptionEndTag,'</standard-description>',39,3943)

LexToken(SiteEndTag,'</site>',40,3969)

LexToken(SiteStartTagOpen,'<site',41,3979)

LexToken(TitleKey,'title=',41,3985)

LexToken(String,'A1C Test and Race/Ethnicity',41,3991)

LexToken(UrlKey,'url=',41,4021)

LexToken(Uri,'https://www.niddk.nih.gov/health-information/diagnostic-tests/a1c-test-race-ethnicity',41,4025)

LexToken(StartTagClose,'>',41,4112)

LexToken(InformationCategoryStartTagOpen,'<information-category',42,4117)

LexToken(StartTagClose,'>',42,4138)

LexToken(Text,'Learn More',42,4139)

LexToken(InformationCategoryEndTag,'</information-category>',42,4149)

LexToken(OrganizationStartTagOpen,'<organization',43,4176)

LexToken(StartTagClose,'>',43,4189)

LexToken(Text,'National Institute of Diabetes and Digestive and Kidney Diseases',43,4190)

LexToken(OrganizationEndTag,'</organization>',43,4254)

LexToken(StandardDescriptionStartTagOpen,'<standard-description',44,4273)

LexToken(StartTagClose,'>',44,4294)

LexToken(Text,'NIH',44,4295)

LexToken(StandardDescriptionEndTag,'</standard-description>',44,4298)

LexToken(SiteEndTag,'</site>',45,4324)

LexToken(SiteStartTagOpen,'<site',46,4334)

LexToken(TitleKey,'title=',46,4340)

LexToken(String,'ClinicalTrials.gov: Hemoglobin A1C',46,4346)

LexToken(UrlKey,'url=',46,4383)

LexToken(Uri,'https://clinicaltrials.gov/search?intr=%22Hemoglobin+A1C%22&amp;aggFilters=status:not%20rec',46,4387)

LexToken(StartTagClose,'>',46,4480)

LexToken(InformationCategoryStartTagOpen,'<information-category',47,4485)

LexToken(StartTagClose,'>',47,4506)

LexToken(Text,'Clinical Trials',47,4507)

LexToken(InformationCategoryEndTag,'</information-category>',47,4522)

LexToken(OrganizationStartTagOpen,'<organization',48,4549)

LexToken(StartTagClose,'>',48,4562)

LexToken(Text,'National Institutes of Health',48,4563)

LexToken(OrganizationEndTag,'</organization>',48,4592)

LexToken(StandardDescriptionStartTagOpen,'<standard-description',49,4611)

LexToken(StartTagClose,'>',49,4632)

LexToken(Text,'NIH',49,4633)

LexToken(StandardDescriptionEndTag,'</standard-description>',49,4636)

LexToken(SiteEndTag,'</site>',50,4662)

LexToken(SiteStartTagOpen,'<site',51,4672)

LexToken(TitleKey,'title=',51,4678)

LexToken(String,'Hemoglobin A1C (HbA1c) Test',51,4684)

LexToken(UrlKey,'url=',51,4714)

LexToken(Uri,'https://medlineplus.gov/lab-tests/hemoglobin-a1c-hba1c-test/',51,4718)

LexToken(LanguageMappedUrlKey,'language-mapped-url=',51,4781)

LexToken(Uri,'https://medlineplus.gov/spanish/pruebas-de-laboratorio/prueba-de-hemoglobina-a1c/',51,4801)

LexToken(StartTagClose,'>',51,4884)

LexToken(InformationCategoryStartTagOpen,'<information-category',52,4889)

LexToken(StartTagClose,'>',52,4910)

LexToken(Text,'Learn More',52,4911)

LexToken(InformationCategoryEndTag,'</information-category>',52,4921)

LexToken(OrganizationStartTagOpen,'<organization',53,4948)

LexToken(StartTagClose,'>',53,4961)

LexToken(Text,'National Library of Medicine',53,4962)

LexToken(OrganizationEndTag,'</organization>',53,4990)

LexToken(StandardDescriptionStartTagOpen,'<standard-description',54,5009)

LexToken(StartTagClose,'>',54,5030)

LexToken(Text,'NIH',54,5031)

LexToken(StandardDescriptionEndTag,'</standard-description>',54,5034)

LexToken(SiteEndTag,'</site>',55,5060)

LexToken(SiteStartTagOpen,'<site',56,5070)

LexToken(TitleKey,'title=',56,5076)

LexToken(String,'A1C',56,5082)

LexToken(UrlKey,'url=',56,5088)

LexToken(Uri,'https://pubmed.ncbi.nlm.nih.gov/?term=%22Glycated+Hemoglobin%22%5Bmajr%3Anoexp%5D+AND+humans%5Bmh%5D+AND+english%5Bla%5D+AND+%22last+1+Year%22+%5Bedat%5D+NOT+%28letter%5Bpt%5D+OR+case+reports%5Bpt%5D+OR+editorial%5Bpt%5D+OR+comment%5Bpt%5D%29+AND+free+full+text%5Bsb%5D+',56,5092)

LexToken(StartTagClose,'>',56,5364)

LexToken(InformationCategoryStartTagOpen,'<information-category',57,5369)

LexToken(StartTagClose,'>',57,5390)

LexToken(Text,'Journal Articles',57,5391)

LexToken(InformationCategoryEndTag,'</information-category>',57,5407)

LexToken(SiteEndTag,'</site>',58,5433)

LexToken(SiteStartTagOpen,'<site',59,5443)

LexToken(TitleKey,'title=',59,5449)

LexToken(String,'Understanding A1C',59,5455)

LexToken(UrlKey,'url=',59,5475)

LexToken(Uri,'https://diabetes.org/about-diabetes/a1c',59,5479)

LexToken(StartTagClose,'>',59,5520)

LexToken(InformationCategoryStartTagOpen,'<information-category',60,5525)

LexToken(StartTagClose,'>',60,5546)

LexToken(Text,'Learn More',60,5547)

LexToken(InformationCategoryEndTag,'</information-category>',60,5557)

LexToken(OrganizationStartTagOpen,'<organization',61,5584)

LexToken(StartTagClose,'>',61,5597)

LexToken(Text,'American Diabetes Association',61,5598)

LexToken(OrganizationEndTag,'</organization>',61,5627)

LexToken(SiteEndTag,'</site>',62,5645)

LexToken(HealthTopicEndTag,'</health-topic>',63,5654)