# fuzzy string matching
## 📚 resources
- `seatgeek` / [`thefuzz`](https://github.com/seatgeek/thefuzz) > README > [Usage](https://github.com/seatgeek/thefuzz/blob/83bea3d4a109a5d0c2e07334b504953cda4959c5/README.rst#Usage)
## details
- **to select, from one or more possible labels, the label that best matches the IRI slug**  
- 🚩 *This may be possible because ONS IRI slugs are created from labels with space, punctuation, etc. stripped*


In [80]:
from ons_some import ons_some
from ons_all import ons_all
# see below

In [81]:
from thefuzz import fuzz, process
from rdflib import Graph, RDFS
import re

In [82]:
with open("fuzzy_matching.md", 'w+') as mdfile:
    mdfile.write("# NOTE\nFor matching, parenthesized text is stripped from `:creator_display` labels\n")
    counter = 0 # output was too long! LIMIT
    for entry in ons_all: # see above first cell imports
        if counter < 100 and entry[1]: # LIMITing
            mdfile.write(f"# {entry[0]}\n")
            iri_slug = entry[0].split('/')[-1]
            mdfile.write("## Details\n")
            mdfile.write(f"- **IRI slug**: {entry[0].split('/')[-1]}\n")
            try:
                data = Graph().parse(f"{entry[0]}.nt")
                for s, p, o in data.triples((None, RDFS.label, None)):
                    label = o
                mdfile.write(f"- **rdfs:label**: {label}\n")
            except:
                mdfile.write(f"- [!] Resource with IRI {entry[0]} does not seem to exist in ONS\n")
            mdfile.write(f"## Fuzzy matching results\n")
            for item in entry[1::]:
                updated = re.sub("[\(].*[\)]", "", item)
                updated = updated.strip()
                mdfile.write(f"### {iri_slug} <---> {updated}\n")
                mdfile.write(f"- fuzz.ratio = {fuzz.ratio(iri_slug, updated)}\n")
                mdfile.write(f"- fuzz.partial_ratio = {fuzz.partial_ratio(iri_slug, updated)}\n")
                mdfile.write(f"- fuzz.token_sort_ration = {fuzz.token_sort_ratio(iri_slug, updated)}\n")
            counter += 1
        else: # LIMITing
            pass
