In [1]:
"""
WE'RE LEARNING PYTHON -- DAY 3!
"""

"\nWE'RE LEARNING PYTHON -- DAY 3!\n"

In [2]:
"""
ANNOTATING/SEARCHING TEXT WITH CONTROLLED VOCABULARIES
"""

'\nANNOTATING/SEARCHING TEXT WITH CONTROLLED VOCABULARIES\n'

In [3]:
# Yesterday, we talked about issues searching which
# required a lemmatizer...

# We needed "community" to match "communities".
#
# But what about more complicated queries?
#
# For instance, should "gay" match "homosexual"?
#
# Or, should "transgender" match "transsexual"?
#
# In medicine, these often get convoluted:
# Should 'neoplasm' match 'cancer' and/or 'tumor'?

# So what do we do?
#
# The answer: controlled vocabularies.

In [4]:
# What is a controlled vocabulary?

# A list of words/phrases which is "controlled" by
# some organization or entity.

# Oftentimes, these vocabularies are used to index content.

# Think: Dewey Decimal System Classification or
# Library of Congress Classification. Heck, even
# Linnaean taxonomy, the International Classification
# of Diseases (ICD), or the Diagnostic and Statistical 
# Manual of Mental Disorders (DSM).

In [5]:
# What is an ontology?

# An ontology is a form of controlled vocabulary which
# allows for non-linear connections and a theoretically
# limitless ability to add various definitions.

# Think of a food web. Various organisms have relationships
# to one another which we may define. Animals may enter
# or leave a specific food web, individual organisms can
# be related to each other in a number of ways.

# Another great illustration of an ontology can be
# found here:
# https://www.researchgate.net/profile/Jake_Cobb/publication/236842047/figure/fig1/AS:299445456523267@1448404768571/Example-pizza-ontology-represented-as-a-graph-G-a-and-a-changed-version-of-the-pizza.png

In [6]:
# Import the sys package.
import sys

In [7]:
!{sys.executable} -m pip install owlready2



In [8]:
# Let's import the owlready2 package.
import owlready2



In [9]:
# We'll start with the Gender, Sex, and Sexual Orientation
# (GSSO) ontology.

# First, we need to download it.
#
# We can do that from GitHub here:
# https://github.com/Superraptor/GSSO
# 
# Or from the National Center for Biomedical Ontology (NCBO)
# BioPortal here:
# https://bioportal.bioontology.org/ontologies/GSSO
#
# But it's always fun to download programmatically!
import requests

url = 'https://raw.githubusercontent.com/Superraptor/GSSO/master/gsso_v2.0.0_rdf_xml.owl'
r = requests.get(url, allow_redirects=True)

# Let's write the URL to a file.
file_name = 'gsso_v2.0.0_rdf_xml.owl'
open(file_name, 'wb').write(r.content)

13531570

In [10]:
# Now let's open up the hood!

# First we need to load the ontology.
onto = owlready2.get_ontology(file_name)
onto.load()

# You can also load from the URL directly, using:
# `onto = get_ontology(url)`

get_ontology("http://purl.bioontology.org/ontology/GSSO/")

In [11]:
# Let's search for an entity in the ontology.

# Well, we don't know any of the entities yet, so let's
# just return them all.
onto.search(iri = "*")

[owlready_ontology.python_module, owlready_ontology.python_name, owl.inverseOf, rdf-schema.comment, rdf-schema.label, XMLSchema.integer, XMLSchema.decimal, XMLSchema.anyURI, 22-rdf-syntax-ns.PlainLiteral, XMLSchema.dateTime, owl.equivalentProperty, rdf-schema.isDefinedBy, rdf-schema.seeAlso, owlready_ontology.class_property_type, owlready_ontology.defined_class, ontology.GSSO, _downloads.owlready_ontology.owl, 2010.MasculineGender, gold.GenderProperty, 2010.ThirdPerson, gold.PersonProperty, obo.IAO_0000115, 1.1.contributor, 1.1.date, 1.1.description, 1.1.format, 1.1.identifier, 1.1.language, 1.1.publisher, 1.1.relation, 1.1.rights, 1.1.subject, 1.1.title, 1.1.type, terms.abstract, terms.description, terms.accessRights, terms.rights, terms.accrualMethod, terms.alternative, terms.title, terms.available, terms.date, terms.conformsTo, terms.relation, terms.contributor, terms.created, terms.creator, terms.dateAccepted, terms.dateCopyrighted, terms.dateSubmitted, terms.format, terms.hasForma

In [12]:
# Okay... That's a bit overwhelming! Let's grab just
# one entity:
first_entity = onto.search_one(iri = "*")

In [13]:
# In ontologies, there are multiple types of "entities",
# with the most prominent being classes, individuals,
# and properties.

# Let's find out which that above entity is:
print(type(first_entity))

<class 'owlready2.annotation.AnnotationPropertyClass'>


In [14]:
# Looks like an annotation property! A specific type of
# property.

# But properties tend to be utilized primarily to express
# relationships between and data about classes and individuals.
# In many cases, they aren't being searched for directly.

# That's our case. We want a class or an individual.

In [15]:
# Let's start with getting all entities:
all_entities = onto.search(iri = "*")

# And then we'll look at the different types in this list.
# We'll print these along with IRIs (Internationalized Resource Identifiers)
for entity in all_entities:
        if isinstance(entity, owlready2.entity.Thing):
            print("individual" + " - " + entity.iri)
        elif isinstance(entity, owlready2.entity.ThingClass):
            print("class" + " - " + entity.iri)
        elif isinstance(entity, owlready2.annotation.AnnotationPropertyClass):
            print("annotation property" + " - " + entity.iri)

annotation property - http://www.lesfleursdunormal.fr/static/_downloads/owlready_ontology.owl#python_module
annotation property - http://www.lesfleursdunormal.fr/static/_downloads/owlready_ontology.owl#python_name
annotation property - http://www.w3.org/2002/07/owl#inverseOf
annotation property - http://www.w3.org/2000/01/rdf-schema#comment
annotation property - http://www.w3.org/2000/01/rdf-schema#label
individual - http://www.w3.org/2001/XMLSchema#integer
individual - http://www.w3.org/2001/XMLSchema#decimal
individual - http://www.w3.org/2001/XMLSchema#anyURI
individual - http://www.w3.org/1999/02/22-rdf-syntax-ns#PlainLiteral
individual - http://www.w3.org/2001/XMLSchema#dateTime
annotation property - http://www.w3.org/2002/07/owl#equivalentProperty
annotation property - http://www.w3.org/2000/01/rdf-schema#isDefinedBy
annotation property - http://www.w3.org/2000/01/rdf-schema#seeAlso
annotation property - http://www.lesfleursdunormal.fr/static/_downloads/owlready_ontology.owl#clas

individual - https://doi.org/10.1089/trgh.2016.0020
individual - https://doi.org/10.1089/trgh.2016.0021
individual - https://doi.org/10.1089/trgh.2016.0037
individual - https://doi.org/10.1089/trgh.2016.0047
individual - https://doi.org/10.1093/cdn/nzz047.P04-001-19
individual - https://doi.org/10.1093/cdn/nzz051.P04-001-19
individual - https://doi.org/10.1530/EC-19-0272
individual - http://www.isni.org/0000000057668002
individual - http://purl.bioontology.org/ontology/GSSO/000345
class - http://purl.bioontology.org/ontology/GSSO/000395
class - http://purl.bioontology.org/ontology/GSSO/002328
individual - http://purl.bioontology.org/ontology/GSSO/005788
class - http://purl.bioontology.org/ontology/GSSO/000133
class - http://purl.bioontology.org/ontology/GSSO/000134
class - http://purl.bioontology.org/ontology/GSSO/000257
class - http://purl.bioontology.org/ontology/GSSO/000135
class - http://purl.bioontology.org/ontology/GSSO/000136
class - http://purl.bioontology.org/ontology/GSSO/000

class - http://purl.bioontology.org/ontology/GSSO/000766
class - http://purl.bioontology.org/ontology/GSSO/000787
class - http://purl.bioontology.org/ontology/GSSO/000767
class - http://purl.bioontology.org/ontology/GSSO/000768
class - http://purl.bioontology.org/ontology/GSSO/000769
class - http://purl.bioontology.org/ontology/GSSO/000777
class - http://purl.bioontology.org/ontology/GSSO/000816
class - http://purl.bioontology.org/ontology/GSSO/000770
class - http://purl.bioontology.org/ontology/GSSO/001170
class - http://purl.bioontology.org/ontology/GSSO/000771
class - http://purl.bioontology.org/ontology/GSSO/000814
class - http://purl.org/sig/ont/fma/fma49184
class - http://purl.bioontology.org/ontology/GSSO/000772
class - http://purl.bioontology.org/ontology/GSSO/000773
class - http://purl.bioontology.org/ontology/GSSO/000774
class - http://purl.bioontology.org/ontology/GSSO/001324
class - http://purl.bioontology.org/ontology/GSSO/000775
class - http://purl.bioontology.org/ontolog

class - http://purl.obolibrary.org/obo/GAZ_00002640
class - http://purl.bioontology.org/ontology/GSSO/001095
class - http://purl.obolibrary.org/obo/GAZ_00002638
class - http://purl.bioontology.org/ontology/GSSO/001096
class - http://purl.obolibrary.org/obo/GAZ_00002943
class - http://purl.bioontology.org/ontology/GSSO/001097
class - http://purl.bioontology.org/ontology/GSSO/001098
class - http://purl.bioontology.org/ontology/GSSO/001099
class - http://purl.bioontology.org/ontology/GSSO/001100
class - http://purl.bioontology.org/ontology/GSSO/001101
class - http://purl.bioontology.org/ontology/GSSO/001102
class - http://purl.bioontology.org/ontology/GSSO/001103
class - http://purl.bioontology.org/ontology/GSSO/001118
class - http://purl.bioontology.org/ontology/GSSO/001104
class - http://purl.bioontology.org/ontology/GSSO/001105
class - http://purl.bioontology.org/ontology/GSSO/001106
class - http://purl.bioontology.org/ontology/GSSO/001107
class - http://purl.bioontology.org/ontology/G

class - http://purl.bioontology.org/ontology/GSSO/001961
class - http://purl.bioontology.org/ontology/GSSO/007411
individual - http://purl.bioontology.org/ontology/GSSO/001956
individual - http://purl.bioontology.org/ontology/GSSO/001957
class - http://purl.bioontology.org/ontology/GSSO/001962
class - http://purl.bioontology.org/ontology/GSSO/001963
class - http://purl.bioontology.org/ontology/GSSO/001964
class - http://purl.bioontology.org/ontology/GSSO/001965
class - http://purl.bioontology.org/ontology/GSSO/001966
class - http://purl.bioontology.org/ontology/GSSO/001967
class - http://purl.bioontology.org/ontology/GSSO/001968
class - http://purl.bioontology.org/ontology/GSSO/001969
class - http://purl.bioontology.org/ontology/GSSO/001970
class - http://purl.bioontology.org/ontology/GSSO/001971
class - http://purl.bioontology.org/ontology/GSSO/001972
class - http://purl.bioontology.org/ontology/GSSO/001973
class - http://purl.bioontology.org/ontology/GSSO/004059
class - http://purl.b

class - http://purl.bioontology.org/ontology/GSSO/003364
class - http://purl.bioontology.org/ontology/GSSO/003365
class - http://purl.bioontology.org/ontology/GSSO/003366
class - http://purl.bioontology.org/ontology/GSSO/007925
class - http://purl.bioontology.org/ontology/GSSO/003367
class - http://purl.bioontology.org/ontology/GSSO/003368
class - http://purl.bioontology.org/ontology/GSSO/003369
class - http://purl.bioontology.org/ontology/GSSO/003371
class - http://purl.bioontology.org/ontology/GSSO/003372
class - http://purl.bioontology.org/ontology/GSSO/003373
class - http://purl.bioontology.org/ontology/GSSO/003374
class - http://purl.bioontology.org/ontology/GSSO/003375
class - http://purl.bioontology.org/ontology/GSSO/003376
class - http://purl.bioontology.org/ontology/GSSO/003377
class - http://purl.bioontology.org/ontology/GSSO/003384
class - http://purl.bioontology.org/ontology/GSSO/003380
class - http://purl.bioontology.org/ontology/GSSO/003381
class - http://purl.bioontology

class - http://purl.obolibrary.org/obo/LifO_0000014
individual - http://www.isni.org/isni/0000000072678671
class - http://purl.bioontology.org/ontology/GSSO/003871
class - http://purl.bioontology.org/ontology/GSSO/003873
class - http://purl.bioontology.org/ontology/GSSO/003874
class - http://purl.bioontology.org/ontology/GSSO/003878
class - http://purl.bioontology.org/ontology/GSSO/003879
class - http://purl.bioontology.org/ontology/GSSO/003881
class - http://purl.bioontology.org/ontology/GSSO/003882
class - http://purl.bioontology.org/ontology/GSSO/003883
class - http://purl.bioontology.org/ontology/GSSO/003884
class - http://purl.bioontology.org/ontology/GSSO/003887
class - http://purl.bioontology.org/ontology/GSSO/003891
class - http://purl.bioontology.org/ontology/GSSO/003892
class - http://purl.bioontology.org/ontology/GSSO/003893
class - http://purl.bioontology.org/ontology/GSSO/003894
class - http://purl.bioontology.org/ontology/GSSO/003895
class - http://purl.bioontology.org/on

class - http://purl.bioontology.org/ontology/GSSO/004982
class - http://purl.bioontology.org/ontology/GSSO/004983
class - http://purl.bioontology.org/ontology/GSSO/004984
class - http://purl.bioontology.org/ontology/GSSO/004985
class - http://purl.bioontology.org/ontology/GSSO/004986
class - http://purl.bioontology.org/ontology/GSSO/004987
class - http://purl.bioontology.org/ontology/GSSO/004988
class - http://purl.org/sig/ont/fma/fma268896
class - http://purl.bioontology.org/ontology/GSSO/004993
class - http://purl.bioontology.org/ontology/GSSO/004994
class - http://purl.bioontology.org/ontology/GSSO/004995
class - http://purl.bioontology.org/ontology/GSSO/004996
class - http://purl.bioontology.org/ontology/GSSO/004997
class - http://purl.org/sig/ont/fma/fma62955
class - http://purl.bioontology.org/ontology/GSSO/004998
class - http://purl.bioontology.org/ontology/GSSO/004999
class - http://purl.bioontology.org/ontology/GSSO/005000
class - http://purl.bioontology.org/ontology/GSSO/0050

class - http://purl.org/sig/ont/fma/fma19656
class - http://purl.bioontology.org/ontology/GSSO/005528
class - http://purl.bioontology.org/ontology/GSSO/005529
class - http://purl.bioontology.org/ontology/GSSO/005530
class - http://purl.bioontology.org/ontology/GSSO/005531
class - http://purl.bioontology.org/ontology/GSSO/005532
class - http://purl.bioontology.org/ontology/GSSO/005533
class - http://purl.bioontology.org/ontology/GSSO/005534
class - http://purl.bioontology.org/ontology/GSSO/005535
class - http://purl.bioontology.org/ontology/GSSO/005536
class - http://purl.bioontology.org/ontology/GSSO/005537
class - http://purl.bioontology.org/ontology/GSSO/005538
class - http://purl.bioontology.org/ontology/GSSO/005539
class - http://purl.bioontology.org/ontology/GSSO/005540
class - http://purl.bioontology.org/ontology/GSSO/005541
class - http://semanticscience.org/resource/SIO_000510
class - http://purl.bioontology.org/ontology/GSSO/005542
class - http://purl.bioontology.org/ontology/

class - http://purl.bioontology.org/ontology/GSSO/006639
individual - http://purl.bioontology.org/ontology/GSSO/006638
class - http://purl.bioontology.org/ontology/GSSO/006640
class - http://purl.bioontology.org/ontology/GSSO/006641
class - http://purl.bioontology.org/ontology/GSSO/006649
class - http://purl.bioontology.org/ontology/GSSO/006650
individual - http://purl.bioontology.org/ontology/GSSO/007161
class - http://purl.bioontology.org/ontology/GSSO/006652
class - http://purl.bioontology.org/ontology/GSSO/006655
class - http://purl.bioontology.org/ontology/GSSO/006656
class - http://purl.bioontology.org/ontology/GSSO/006657
class - http://purl.bioontology.org/ontology/GSSO/006658
class - http://purl.bioontology.org/ontology/GSSO/006659
class - http://purl.bioontology.org/ontology/GSSO/006660
class - http://purl.bioontology.org/ontology/GSSO/006661
class - http://purl.bioontology.org/ontology/GSSO/006662
class - http://purl.bioontology.org/ontology/GSSO/006664
class - http://purl.b

class - http://purl.bioontology.org/ontology/GSSO/007089
class - http://purl.bioontology.org/ontology/GSSO/007090
class - http://purl.bioontology.org/ontology/GSSO/007091
class - http://purl.bioontology.org/ontology/GSSO/007092
class - http://purl.bioontology.org/ontology/GSSO/007099
class - http://purl.bioontology.org/ontology/GSSO/007093
class - http://purl.bioontology.org/ontology/GSSO/007094
class - http://purl.bioontology.org/ontology/GSSO/007098
class - http://purl.bioontology.org/ontology/GSSO/007095
class - http://purl.bioontology.org/ontology/GSSO/007096
class - http://purl.bioontology.org/ontology/GSSO/007097
class - http://semanticscience.org/resource/SIO_000014
class - http://purl.bioontology.org/ontology/GSSO/007102
class - http://purl.bioontology.org/ontology/GSSO/007103
class - http://purl.bioontology.org/ontology/GSSO/007104
class - http://purl.bioontology.org/ontology/GSSO/007105
class - http://purl.bioontology.org/ontology/MESH/D000079102
class - http://purl.bioontolo

class - http://purl.bioontology.org/ontology/GSSO/007980
class - http://purl.bioontology.org/ontology/GSSO/007991
class - http://purl.bioontology.org/ontology/GSSO/007981
class - http://purl.bioontology.org/ontology/GSSO/007992
class - http://purl.bioontology.org/ontology/GSSO/007982
class - http://purl.bioontology.org/ontology/GSSO/007983
class - http://purl.bioontology.org/ontology/GSSO/007984
class - http://purl.bioontology.org/ontology/GSSO/007985
class - http://purl.bioontology.org/ontology/GSSO/008002
class - http://purl.bioontology.org/ontology/GSSO/007986
class - http://purl.bioontology.org/ontology/GSSO/007987
class - http://purl.bioontology.org/ontology/GSSO/007988
class - http://purl.bioontology.org/ontology/GSSO/007989
class - http://purl.bioontology.org/ontology/GSSO/007990
class - http://purl.bioontology.org/ontology/GSSO/007993
class - http://purl.bioontology.org/ontology/GSSO/007994
class - http://purl.bioontology.org/ontology/GSSO/007995
class - http://purl.bioontology

class - http://purl.bioontology.org/ontology/MESH/D052286
class - http://purl.bioontology.org/ontology/MESH/D052287
class - http://purl.bioontology.org/ontology/MESH/D055512
class - http://purl.bioontology.org/ontology/MESH/D055747
class - http://purl.bioontology.org/ontology/MESH/D057214
class - http://purl.bioontology.org/ontology/MESH/D059785
class - http://purl.bioontology.org/ontology/MESH/D060728
class - http://purl.bioontology.org/ontology/MESH/D060805
class - http://purl.bioontology.org/ontology/NCBITAXON/548681
class - http://purl.bioontology.org/ontology/NCBITAXON/10293
class - http://purl.bioontology.org/ontology/NCBITAXON/10298
class - http://purl.bioontology.org/ontology/NCBITAXON/10310
class - http://purl.bioontology.org/ontology/NCBITAXON/1113537
class - http://purl.bioontology.org/ontology/NCBITAXON/809
class - http://purl.bioontology.org/ontology/NCBITAXON/2169561
class - http://purl.bioontology.org/ontology/NCBITAXON/327045
class - http://purl.bioontology.org/ontology

class - http://purl.obolibrary.org/obo/CHEBI_59793
class - http://purl.obolibrary.org/obo/CHEBI_59999
class - http://purl.obolibrary.org/obo/CHEBI_60004
class - http://purl.obolibrary.org/obo/CHEBI_60027
class - http://purl.obolibrary.org/obo/CHEBI_60783
class - http://purl.obolibrary.org/obo/CHEBI_61313
class - http://purl.obolibrary.org/obo/CHEBI_61458
class - http://purl.obolibrary.org/obo/CHEBI_63299
class - http://purl.obolibrary.org/obo/CHEBI_78616
class - http://purl.obolibrary.org/obo/CHEBI_63533
class - http://purl.obolibrary.org/obo/CHEBI_63577
class - http://purl.obolibrary.org/obo/CHEBI_63633
class - http://purl.obolibrary.org/obo/CHEBI_6427
class - http://purl.obolibrary.org/obo/CHEBI_6716
class - http://purl.obolibrary.org/obo/CHEBI_68534
class - http://purl.obolibrary.org/obo/CHEBI_83575
class - http://purl.obolibrary.org/obo/CHEBI_73474
class - http://purl.obolibrary.org/obo/CHEBI_7445
class - http://purl.obolibrary.org/obo/CHEBI_77006
class - http://purl.obolibrary.org

class - http://purl.org/sig/ont/fma/fma17739
class - http://purl.org/sig/ont/fma/fma17744
class - http://purl.org/sig/ont/fma/fma45637
class - http://purl.org/sig/ont/fma/fma17745
class - http://purl.org/sig/ont/fma/fma17752
class - http://purl.org/sig/ont/fma/fma323918
class - http://purl.org/sig/ont/fma/fma17759
class - http://purl.org/sig/ont/fma/fma24208
class - http://purl.org/sig/ont/fma/fma17769
class - http://purl.org/sig/ont/fma/fma37342
class - http://purl.org/sig/ont/fma/fma17770
class - http://purl.org/sig/ont/fma/fma30320
class - http://purl.org/sig/ont/fma/fma18302
class - http://purl.org/sig/ont/fma/fma18305
class - http://purl.org/sig/ont/fma/fma18306
class - http://purl.org/sig/ont/fma/fma18307
class - http://purl.org/sig/ont/fma/fma18308
class - http://purl.org/sig/ont/fma/fma18309
class - http://purl.org/sig/ont/fma/fma18316
class - http://purl.org/sig/ont/fma/fma18317
class - http://purl.org/sig/ont/fma/fma85412
class - http://purl.org/sig/ont/fma/fma18318
class - h

individual - http://purl.bioontology.org/ontology/GSSO/003761
individual - http://purl.bioontology.org/ontology/GSSO/003770
individual - http://purl.bioontology.org/ontology/GSSO/003771
individual - http://purl.bioontology.org/ontology/GSSO/003781
individual - http://purl.bioontology.org/ontology/GSSO/003782
individual - http://purl.bioontology.org/ontology/GSSO/003783
individual - http://purl.bioontology.org/ontology/GSSO/003784
individual - http://purl.bioontology.org/ontology/GSSO/003813
individual - http://purl.bioontology.org/ontology/GSSO/003964
individual - http://purl.bioontology.org/ontology/GSSO/003791
individual - http://purl.bioontology.org/ontology/GSSO/003793
individual - http://purl.bioontology.org/ontology/GSSO/003804
individual - http://purl.bioontology.org/ontology/GSSO/003805
individual - http://purl.bioontology.org/ontology/GSSO/003807
individual - http://purl.bioontology.org/ontology/GSSO/003821
individual - http://purl.bioontology.org/ontology/GSSO/003822
individu

individual - http://purl.bioontology.org/ontology/GSSO/004708
individual - http://purl.bioontology.org/ontology/GSSO/004711
individual - http://purl.bioontology.org/ontology/GSSO/004713
individual - http://purl.bioontology.org/ontology/GSSO/004720
individual - http://purl.bioontology.org/ontology/GSSO/004721
individual - http://purl.bioontology.org/ontology/GSSO/004722
individual - http://purl.bioontology.org/ontology/GSSO/004727
individual - http://purl.bioontology.org/ontology/GSSO/004729
individual - http://purl.bioontology.org/ontology/GSSO/004732
individual - http://purl.bioontology.org/ontology/GSSO/005796
individual - http://purl.bioontology.org/ontology/GSSO/004733
individual - http://purl.bioontology.org/ontology/GSSO/004817
individual - http://purl.bioontology.org/ontology/GSSO/004818
individual - http://purl.bioontology.org/ontology/GSSO/004921
individual - http://purl.bioontology.org/ontology/GSSO/004922
individual - http://purl.bioontology.org/ontology/GSSO/004923
individu

individual - https://portal.issn.org/resource/issn/0890-7064
individual - https://portal.issn.org/resource/issn/0892-1997
individual - https://portal.issn.org/resource/issn/0929-8738
individual - https://portal.issn.org/resource/issn/0964-6639
individual - https://portal.issn.org/resource/issn/1053-8720
individual - https://portal.issn.org/resource/issn/1054-139X
individual - https://portal.issn.org/resource/issn/1067-5027
individual - https://portal.issn.org/resource/issn/1069-7438
individual - https://portal.issn.org/resource/issn/1077-792X
individual - https://portal.issn.org/resource/issn/1086-4873
individual - https://portal.issn.org/resource/issn/1089-4160
individual - https://portal.issn.org/resource/issn/1178-7074
individual - https://portal.issn.org/resource/issn/1360-0443
individual - https://portal.issn.org/resource/issn/1362-4962
individual - https://portal.issn.org/resource/issn/1369-1627
individual - https://portal.issn.org/resource/issn/1381-3439
individual - https://por

In [16]:
# So the three types are:
# (1) owlready2.entity.Thing [individual]
# (2) owlready2.entity.ThingClass [class]
# (3) owlready2.annotation.AnnotationPropertyClass 
#     [annotation property]

# We can test whether an entity is one of these by checking
# the entities type.

# For instance:
if isinstance(first_entity, owlready2.annotation.AnnotationPropertyClass):
    print("This entity is an annotation property!")

This entity is an annotation property!


In [17]:
# So let's use a non-annotation property entity based on the list of IRIs above.
test_iri = 'http://purl.bioontology.org/ontology/GSSO/000096'

# We can get the entity from the IRI like this:
test_entity = onto.search_one(iri=test_iri)

In [18]:
# What's this entity called?
print(test_entity.label)

['transgender']


In [19]:
# Can we see synonyms?
print(test_entity.hasExactSynonym)
print(test_entity.hasNarrowSynonym)
print(test_entity.hasSynonym)
print(test_entity.hasRelatedSynonym)

[]
[]
['trans folk', 'trans-folk', 'transfolk']
[gsso_v2.0.0_rdf_xml.003677]


In [20]:
# One of these is another entity!
print(test_entity.hasRelatedSynonym[0].label)

['transgender umbrella']


In [21]:
# What other things can we see?
print(test_entity.IAO_0000115)

['Having a gender (identity) which is different from the sex one was assigned at birth: being assigned male at birth but having a female gender or vice versa; or, pertaining to such people.', 'Not identifying with culturally conventional gender roles and categories of male or female; having changed gender identity from male to female or female to male, or identifying with elements of both, or having some other gender identity; or, pertaining to such people.', 'Transgender people have a gender identity or gender expression that differs from their assigned sex. In some select cases, transgender people are sometimes called transsexual if they desire medical assistance to transition from one sex to another. "Transgender" is also an umbrella term: in addition to including people whose gender identity is the opposite of their assigned sex (trans men and trans women), it may include people who are not exclusively masculine or feminine (people who are genderqueer or non-binary, including bigen

In [22]:
# Can we see derived terms?
# Let's search!
derived_entities = onto.search(derived_from = test_entity)

for x in derived_entities:
    if x.label:
        print(x.iri + " - " + x.label[0])

http://purl.bioontology.org/ontology/GSSO/000127 - transgender and gender nonconforming
http://purl.bioontology.org/ontology/GSSO/000130 - transgender person
http://purl.bioontology.org/ontology/GSSO/003677 - transgender umbrella
http://purl.bioontology.org/ontology/GSSO/000134 - transgender culture
http://purl.bioontology.org/ontology/GSSO/000144 - transgender slang
http://purl.bioontology.org/ontology/GSSO/000223 - transgender-related website
http://purl.bioontology.org/ontology/GSSO/004281 - transgender-related media
http://purl.bioontology.org/ontology/GSSO/000237 - transgender community
http://purl.bioontology.org/ontology/GSSO/000297 - transgender symbol
http://purl.bioontology.org/ontology/GSSO/000314 - transgender flag
http://purl.bioontology.org/ontology/GSSO/000329 - transgender linguistics
http://purl.bioontology.org/ontology/GSSO/000372 - transgender man
http://purl.bioontology.org/ontology/GSSO/000377 - transgender gay man
http://purl.bioontology.org/ontology/GSSO/000384 -

In [23]:
# One of these terms is "transgender slang"
# The IRI is "http://purl.bioontology.org/ontology/GSSO/000144"
transgender_slang_iri = 'http://purl.bioontology.org/ontology/GSSO/000144'

# Now we can get a list of transgender-slang terms:
transgender_slang_entity = onto.search_one(iri=transgender_slang_iri)

transgender_slang_instances = transgender_slang_entity.instances()

for transgender_slang_instance in transgender_slang_instances:
    print(transgender_slang_instance.iri + " - " + transgender_slang_instance.label[0])

http://purl.bioontology.org/ontology/GSSO/003301 - clock
http://purl.bioontology.org/ontology/GSSO/003302 - read
http://purl.bioontology.org/ontology/GSSO/007564 - second puberty
http://purl.bioontology.org/ontology/GSSO/003790 - pharmasexual
http://purl.bioontology.org/ontology/GSSO/000362 - truscum
http://purl.bioontology.org/ontology/GSSO/005862 - homovestite
http://purl.bioontology.org/ontology/GSSO/004174 - t4t
http://purl.bioontology.org/ontology/GSSO/000160 - stealth
http://purl.bioontology.org/ontology/GSSO/000164 - woodworking
http://purl.bioontology.org/ontology/GSSO/000163 - deep stealth
http://purl.bioontology.org/ontology/GSSO/000165 - peak trans
http://purl.bioontology.org/ontology/GSSO/000357 - baby trans
http://purl.bioontology.org/ontology/GSSO/000358 - boymode
http://purl.bioontology.org/ontology/GSSO/000359 - egg
http://purl.bioontology.org/ontology/GSSO/000360 - gender nazi
http://purl.bioontology.org/ontology/GSSO/000361 - girlmode
http://purl.bioontology.org/ontol

In [24]:
# Now we can search for transgender-slang in a selection of documents.

# Let's import re
import re

# We'll use a selection of the AIDS History Project documents
# to explore LGBT representation.

# First we need to upload the TSV.

In [25]:
# But before that, we need to install pandas.
!{sys.executable} -m pip install pandas

import pandas as pd



In [26]:
# You can do this from your desktop as well if you've
# downloaded the file. Just make sure the file is in the 
# same folder as your program!
# `df = pd.read_csv('NoMoreSilence_ProjectData.tsv', sep='\t')`

url = 'https://raw.githubusercontent.com/charliemacquarie/no-more-silence-workshop-sept2020/master/NoMoreSilence_ProjectData.tsv'
r = requests.get(url, allow_redirects=True)

# Let's write the URL to a file.
file_name = 'NoMoreSilence_ProjectData.tsv'
open(file_name, 'wb').write(r.content)
df = pd.read_csv('NoMoreSilence_ProjectData.tsv', sep='\t')

In [27]:
# Let's look at the data columns.
df.columns

Index(['Unnamed: 0', 'Collection Title', 'Title', 'Local Identifier ', 'Type',
       'Date ', 'Date Type', 'Publication/Origination Info', 'Creator 1 Name',
       'Creator 1 NameType', 'Creator 1 Source', 'Creator 2 Name',
       'Creator 2 NameType', 'Creator 2 Source', 'Creator 3 Name',
       'Creator 3 NameType', 'Creator 3 Source', 'Creator 4 Name',
       'Creator 4 NameType', 'Creator 4 Source', 'Format/Physical Description',
       'Language ', 'Language Code', 'Copyright Status', 'Copyright Statement',
       'Source', 'Subject (Name) 1 Name', 'Subject (Name) 1 Name Type',
       'Subject (Name) 1 Source', 'Subject (Name) 2 Name',
       'Subject (Name) 2 Name Type', 'Subject (Name) 2 Source',
       'Subject (Name) 3 Name', 'Subject (Name) 3 Name Type',
       'Subject (Name) 3 Source', 'Subject (Topic) 1 Heading',
       'Subject (Topic) 1 Heading Type', 'Subject (Topic) 1 Source',
       'Subject (Topic) 2 Heading', 'Subject (Topic) 2 Heading Type',
       'Subject (Topic

In [28]:
# Next let's print the corrected text for the first article:
df['Corrected Text'][0]

' proposition 64 the aids initiative in california prepared by: senate office of research elisabeth ke sten, director september 1986 207 s proposition 64: the aids initiative in california prepared by the senate office of research kathryn duke, j. d., m. p. h. september 1986 about things on which the public thinks long it commonly attains to think right. samuel johnson, lives of the poets,1778 k je are not used to thinking of illness as political. even when we recognize the political dimension to health care and research for example the fact that prevention of lead poisoning or curing sickle cell anaemia is less glamorous and less well financed than heart trans plants it is still difficult to conceive of disease itself as a political construct. dennis al man, aids in the mind of america,1986 table of contents page i. executive summary 1 ii. introduction 3 iii. text of the aids initiative 4 iv. basic information about aids 5 a. aids and the aids virus 5 b. seropositivity, arc, and aids 

In [29]:
# If we want to select corrected text for all documents:
df['Corrected Text']

0       proposition 64 the aids initiative in califor...
1       making your will california state aids legal ...
2       january 11,1997 community liaison committee c...
3       great republic iimsuraimce company i 470 sout...
4       san francisco aids foundation p. o. box 42618...
                             ...                        
644     i gu f lesbian gay aids coalition to un leash...
645     inside: if youvefiadpcp, you may need help wa...
646     sunday,11 november 1990 quality hotel capitol...
647     background th aids coalition to un la ash pow...
648                                                     
Name: Corrected Text, Length: 649, dtype: object

In [30]:
list_of_documents = {}
for counter, x in enumerate(df['Corrected Text']):
    for transgender_slang_instance in transgender_slang_instances:
        if re.search(r"\b" + re.escape(transgender_slang_instance.label[0]) + r"\b", x, re.IGNORECASE):
            list_of_documents[counter] = x
            print(df['Title'][counter] + " - " + "Found " + transgender_slang_instance.label[0] + ".")

"Prop 64: The AIDS Initiative in California" - Found read.
Board Meetings - Found read.
Board Meetings - Found egg.
Correspondence - Found read.
Fundraising - Found read.
Fundraising - Found Roger.
Guardianship for Parents with HIV - Found read.
Historical Documents - Found fish.
AIDS Medical Training for Non-Physicians - Found clock.
AIDS Medical Training for Non-Physicians - Found read.
Board of Directors - Found read.
Board of Directors - Found egg.
United Way - Found read.
United Way - Found Roger.
Social Security Appeals for HIV Disability: An Advocates Manual - Found clock.
Social Security Appeals for HIV Disability: An Advocates Manual - Found read.
Social Security Appeals for HIV Disability: An Advocates Manual - Found fish.
Medical Privacy for the HIV-Infected Individual - Found read.
Annual Reports - Found clock.
Annual Reports - Found read.
Annual Reports - Found egg.
Comments/Testimony SSA HIV Disability System - Found read.
Comments/Testimony SSA HIV Disability System - Fo

Articles and interviews - Found stealth.
Articles and interviews - Found fish.
Articles and interviews - Found Roger.
Journalism, proposals, and related writings - Found read.
Controversy—news and correspondence - Found read.
Controversy—news and correspondence - Found Roger.
Articles and interviews - Found read.
Articles and interviews - Found fish.
Articles and interviews - Found stealth.
Journalism, proposals, and related writings - Found read.
Journalism, proposals, and related writings - Found read.
Journals - Found read.
Journals - Found brick.
Drafts, outline and notes - Found clock.
Drafts, outline and notes - Found read.
Manuscript edits, citations, and biography - Found read.
Articles and interviews - Found read.
Articles and interviews - Found brick.
Journalism, proposals, and related writings - Found read.
College journalism classwork, Oregon Daily Emerald - Found read.
Journalism, proposals, and related writings - Found read.
Obituaries, memorial, and related articles - Fo

Development and Fundraising Events: Fundraising Events: April Harvest, Leola Jones,&others - Found read.
Programs: HIV/AIDS Unity March: Planning Notes and Materials - Found read.
Development and Fundraising Events: Fundraising Events: Indecent Materials: Script (copyright 1990) - Found read.
Publications: AIDSwatch - Found clock.
Programs: Worlds AIDS Day Protest/Arrests - Found read.
Development and Fundraising Events: Fundraising Events: Death Valley Rose Challenge - Found read.
Development and Fundraising Events: Fundraising Events: Death Valley Rose Challenge - Found brick.
Programs: Burroughs Wellcome - Boycott - Advisory Board - Found brick.
Goals and Programs - Found read.
Goals and Programs - Found fish.
Development and Fundraising Events: Fundraising Events: Dance-a-thon: American Indian AIDS Institute [1994] - Found read.
Development and Fundraising Events: Fundraising Events: Dance-a-thon: American Indian AIDS Institute [1994] - Found Roger.
Programs: Candlelight Memorials/

Membership Meeting Minutes: April 1987 - Found read.
Membership Meeting Minutes: April 1988 - Found read.
Membership Meeting Minutes: August 1989 - Found brick.
Membership Meeting Minutes: October 1988 - Found read.
Membership Meeting Minutes: September 1988 - Found read.
Articles/Information Files: AIDS and Lesbians - Found read.
Articles/Information Files: AIDS and Lesbians - Found fish.
Membership Meeting Minutes: June 1988 - Found read.
Membership Meeting Minutes: June 1988 - Found fish.
Articles/Information Files: Impact of AIDS on Women - Found read.
Membership Meeting Minutes: March 1989 - Found read.
Membership Meeting Minutes: March 1989 - Found brick.
Articles/Information Files: Women and AIDS Books - Found read.
Membership Meeting Minutes: May 1989 - Found Roger.
Membership Meeting Minutes: January 1989 - Found read.
Articles/Information Files: Health Care Workers and Their Patients - Found read.
Membership Meeting Minutes: April 1989 - Found read.
Membership Meeting Minutes

In [31]:
# There are quite a few! But of course... Slang won't appear everywhere.
# Many of these are false positives.
#
# Let's try searching for trans identities...

# We already have the transgender entity and its derived terms.
# Let's try those! And add the derived terms and their synonyms.
list_of_trans_words = []

list_of_trans_words.extend(test_entity.hasExactSynonym)
list_of_trans_words.extend(test_entity.hasNarrowSynonym)
list_of_trans_words.extend(test_entity.hasSynonym)
list_of_trans_words.extend(test_entity.hasRelatedSynonym)

for x in derived_entities:
    if x.label:
        list_of_trans_words.append(x.label[0])
    if x.hasExactSynonym:
        list_of_trans_words.extend(x.hasExactSynonym)
    if x.hasNarrowSynonym:
        list_of_trans_words.extend(x.hasNarrowSynonym)
    if x.hasSynonym:
        list_of_trans_words.extend(x.hasSynonym)
    if x.hasRelatedSynonym:
        list_of_trans_words.extend(x.hasRelatedSynonym)
        
print(list_of_trans_words)

['trans folk', 'trans-folk', 'transfolk', gsso_v2.0.0_rdf_xml.003677, 'transgender and gender nonconforming', 'transgender person', 'trans person', 'transgendered person', gsso_v2.0.0_rdf_xml.007932, 'transgender individual', 'transsexual person', 'transgender umbrella', 'trans*', 'transgender culture', 'transgender and transsexual culture', 'transgender slang', 'transgender-related website', 'transgender-related media', 'transgender media', 'transgender community', 'gender community', 'transgender symbol', 'transgender flag', 'transgender linguistics', 'transgender man', 'transgender boy', gsso_v2.0.0_rdf_xml.007931, 'man of trans experience', 'transsexual man', 'transgender gay man', 'transgender woman', 'transgender girl', gsso_v2.0.0_rdf_xml.002323, gsso_v2.0.0_rdf_xml.007930, 'transsexual woman', 'woman of trans experience', 'transgender lesbian', 'gay transgender woman', 'transbian', 'transgender-related film', 'transgender in film', 'transgender-related television program', 'tra

In [32]:
# Let's search!
# (Note: This might take a bit!)
final_list_of_documents = {}
for row_num, corrected_text in list_of_documents.items():
    for trans_word in list_of_trans_words:
        if re.search(r"\b" + re.escape(str(trans_word)) + r"\b", corrected_text, re.IGNORECASE):
            final_list_of_documents[row_num] = x
            print(df['Title'][row_num] + " - " + "Found " + trans_word + ".")

Morrison and Foerster Pro Bono AIDS Panel Forms and Procedures Manual - Found gender community.
Other Organizations/Cooperation: Asian AIDS Project - Found gender community.
Publications: Action Alert - Found gender community.


In [33]:
# So... Seems we didn't have many solid hits.
# But we can expand this list if we want.
# We can add descendants of these terms or historical terms,
# etc., etc.
#
# Maybe we'd get more then!

In [34]:
# But let's try something else...
# Let's try what we did above to look at LGBT identities
# in our dataset.
lesbian_entity = onto.search_one(iri='http://purl.bioontology.org/ontology/GSSO/000381')
gay_entity = onto.search_one(iri='http://purl.bioontology.org/ontology/GSSO/001591')
bisexual_entity = onto.search_one(iri='http://purl.bioontology.org/ontology/GSSO/001590')

# Can we forsee any issues with this?

In [35]:
# We'll turn our previous code into a function!

def get_list_of_terms(entity_to_use):
    list_of_words = []
    list_of_words.extend(entity_to_use.hasExactSynonym)
    list_of_words.extend(entity_to_use.hasNarrowSynonym)
    list_of_words.extend(entity_to_use.hasSynonym)
    list_of_words.extend(entity_to_use.hasRelatedSynonym)

    derived_entities = onto.search(derived_from = entity_to_use)
    for x in derived_entities:
        if x.label:
            list_of_words.append(x.label[0])
        if x.hasExactSynonym:
            list_of_words.extend(x.hasExactSynonym)
        if x.hasNarrowSynonym:
            list_of_words.extend(x.hasNarrowSynonym)
        if x.hasSynonym:
            list_of_words.extend(x.hasSynonym)
        if x.hasRelatedSynonym:
            list_of_words.extend(x.hasRelatedSynonym)
    
    return list_of_words

# And use that function for our lists:
list_of_lesbian_words = get_list_of_terms(lesbian_entity)
list_of_gay_words = get_list_of_terms(gay_entity)
list_of_bisexual_words = get_list_of_terms(bisexual_entity)

In [None]:
# Let's look at all the documents so we have a more direct 
# comparison.
list_of_lesbian_documents = {}
list_of_gay_documents = {}
list_of_bisexual_documents = {}
list_of_transgender_documents = {}

# (Be patient! This might take a second.)
# We'll add break statements to make it a bit faster.
for counter, x in enumerate(df['Corrected Text']):
    for lesbian_word in list_of_lesbian_words:
        if re.search(r"\b" + re.escape(str(lesbian_word)) + r"\b", x, re.IGNORECASE):
            list_of_lesbian_documents[counter] = x
            print(df['Title'][counter] + " - " + "Found lesbian word - " + lesbian_word + ".")
            break
    
    for gay_word in list_of_gay_words:
        if re.search(r"\b" + re.escape(str(gay_word)) + r"\b", x, re.IGNORECASE):
            list_of_gay_documents[counter] = x
            print(df['Title'][counter] + " - " + "Found gay word - " + gay_word + ".")
            break
            
    for bisexual_word in list_of_bisexual_words:
        if re.search(r"\b" + re.escape(str(bisexual_word)) + r"\b", x, re.IGNORECASE):
            list_of_bisexual_documents[counter] = x
            print(df['Title'][counter] + " - " + "Found bisexual word - " + bisexual_word + ".")
            break
            
    for trans_word in list_of_trans_words:
        if re.search(r"\b" + re.escape(str(trans_word)) + r"\b", x, re.IGNORECASE):
            list_of_transgender_documents[counter] = x
            print(df['Title'][counter] + " - " + "Found transgender word - " + trans_word + ".")
            break

Board Meetings - Found lesbian word - Lesbian.
Correspondence - Found lesbian word - Lesbian.
Correspondence - Found gay word - gay man.
Fundraising - Found lesbian word - Lesbian.
Fundraising - Found gay word - gay man.
Historical Documents - Found lesbian word - Lesbian.
AIDS Medical Training for Non-Physicians - Found gay word - gay male.
Board of Directors - Found lesbian word - Lesbian.
"Just Say No to Creditors..." - Found lesbian word - Lesbian.
United Way - Found lesbian word - Lesbian.
Referral panel lists - Found lesbian word - Lesbian.
Annual Reports - Found lesbian word - Lesbian.
Annual Reports - Found gay word - gay man.
Comments/Testimony SSA HIV Disability System - Found lesbian word - Lesbian.
Board of Directors nominees - Found lesbian word - Lesbian.
The Benefits Game - Found lesbian word - Lesbian.
Board of Directors Retreat - Found lesbian word - Lesbian.
Morrison and Foerster Pro Bono AIDS Panel Forms and Procedures Manual - Found lesbian word - Lesbian.
Morrison 

In [None]:
# Let's visualize this a bit.

# First we need to install and import matplotlib.
!{sys.executable} -m pip install matplotlib

import matplotlib.pyplot as plt

In [None]:
# We'll need counts for each of document dictionaries first.
lesbian_doc_num = len(list_of_lesbian_documents.keys())
gay_doc_num = len(list_of_gay_documents.keys())
bisexual_doc_num = len(list_of_bisexual_documents.keys())
transgender_doc_num = len(list_of_transgender_documents.keys())

In [None]:
# Then we'll make a dataframe:
data = {'Identity': ['L', 'G', 'B', 'T'],
        'No. Documents': [lesbian_doc_num, gay_doc_num, bisexual_doc_num, transgender_doc_num]
        }

identity_df = pd.DataFrame(data, columns = ['Identity', 'No. Documents'])
print(identity_df)

In [None]:
# Then we can begin constructing a bar plot.
plt.bar("Identity", "No. Documents", data = identity_df, color = "blue")
plt.xlabel("Identity")
plt.ylabel("No. Documents")
plt.title("Number of Documents for LGBT Identities")
plt.show()

In [None]:
# We could also do a pie chart!
fig, ax = plt.subplots()
ax.pie(identity_df["No. Documents"], labels=identity_df["Identity"], autopct='%1.1f%%')
ax.axis('equal')  # Equal aspect ratio ensures the pie chart is circular.
ax.set_title("Number of Documents for LGBT Identities")

plt.show()

In [None]:
# And you did it! Congrats!