export a unique set of data synthesized

In [4]:
    import requests_cache, platformdirs, requests_cache, pandas, urllib
    from toolz.curried import *
    from pandas import Series, DataFrame, Index
    cache = platformdirs.user_cache_path("a11yhood") / "thingiverse"
    search_session = requests_cache.CachedSession(cache / "search_responses.sqlite")
    thing_session = requests_cache.CachedSession(cache / "thing_responses.sqlite")

In [5]:
    def get_thing_details_tidy(df, index):
        df = df[df.columns.difference(index.columns)]
        df = df.join(
            df.pop("zip_data").apply(Series).stack().explode().apply(operator.itemgetter("url")).groupby(level=0).agg(list).rename("files")
        )
        # the default image is in the files
        df.pop("default_image");
        df.pop("edu_details_parts");
        df.pop("education");
        df.pop("description_html");
        # TODO there is information in details_parts than can be extracted for search.
        details_parts = df.pop("details_parts").explode().apply(Series).set_index("type", append=True)
        # detail_parts is segmented string info we cram into  a single block
        details_parts = details_parts.join(
            details_parts.data.explode().dropna().apply(Series).fillna("").astype(str).apply("\n".join, axis=1).rename("parts")
        )
        details_parts = details_parts.reset_index(1, drop=True)
        df = df.join(
            (("<h2>" + details_parts["name"] + "</h2>") + "\n" + details_parts["parts"]).dropna().groupby(level=0).agg("\n".join).rename("details_parts")
        )
        df["ancestors"] = df.ancestors.explode().dropna().apply(Series).public_url.groupby(level=0).agg(list)
        return df

In [6]:
    things = (
        df := Series(search_session.cache.responses)
        .apply(operator.methodcaller("json"))
        .apply(operator.itemgetter("hits"))
        .explode()
        .apply(Series)
        .set_index("id")
    ).join(
        Series(thing_session.cache.responses)
        .apply(operator.methodcaller("json"))
        .apply(Series).pipe(get_thing_details_tidy, df)
        .set_index("id")
    )
    things.tags = (
        things.tags
        .explode()
        .dropna()
        .apply(operator.itemgetter("name"))
        .groupby(pandas.Grouper(level=0))
        .agg(list)
    )

## ravelry

In [7]:
    import requests_cache, platformdirs
    cache = platformdirs.user_cache_path("a11yhood") / "ravelry"
    search_cache = requests_cache.CachedSession(cache / "search_responses.json")
    patterns_cache = requests_cache.CachedSession(cache / "patterns_responses.json")

In [8]:
    patterns = (
        df := Series(search_cache.cache.responses)
        .apply(operator.methodcaller("json"))
        .apply(operator.itemgetter("patterns"))
        .explode()
        .apply(Series)
        .set_index("id")
    ).join(
        (
            g:= Series(patterns_cache.cache.responses)
            .apply(operator.methodcaller("json"))
            .apply(operator.itemgetter("patterns"))
            .apply(dict.values)
            .explode()
            .apply(Series)
            # .explode()
            .set_index("id")
        )[g.columns.difference(df.columns)]
    )

In [9]:
    patterns = patterns.join(pandas.concat([
        patterns.pattern_categories.explode().apply(Series).name,
        patterns.pattern_attributes.explode().apply(Series).permalink.str.replace("-", " ")
    ], axis=0).groupby(level=0).agg(list).rename("tags"))

## github

In [10]:
    cache = platformdirs.user_cache_path("a11yhood") / "github"
    search_cache = cache / "search_responses.pkl"

In [11]:
    with __import__("shelve").open(search_cache) as db:
        (
            repos := Series(db)
            .apply(operator.itemgetter("data"))
            .apply(operator.itemgetter("search"))
            .apply(operator.itemgetter("edges"))
            .explode()
            .apply(operator.itemgetter("node"))
            .apply(Series).set_index("id")
        )

    repos["tags"] = (
        repos.repositoryTopics
        .apply(operator.itemgetter("edges"))
        .explode()
        .apply(operator.itemgetter("node"))
        .apply(operator.itemgetter("topic"))
        .apply(operator.itemgetter("name"))
        .str.replace("-", " ")
        .groupby(pandas.Grouper(level=0)).agg(list)
    )

    repos["issues"] = repos["issues"].apply(get("totalCount"))
    repos["pullRequests"] = repos["pullRequests"].apply(get("totalCount"))
    repos = repos.drop(columns="licenseInfo").join(repos["licenseInfo"].dropna().apply(get("name")))

## text analysis

`all` is a synthesis of each of the services with >1400 entries

In [12]:
    (all := pandas.concat([
        (r := repos.set_index("url"))
        ["description stargazerCount forkCount licenseInfo tags".split()]
        .join(r.index.to_series().str.rpartition("/")[2].rename("name"))
        .rename(columns=dict(licenseInfo="license")),
        things.set_index("public_url")["name tags license description".split()],
        patterns.set_index(
            "https://www.ravelry.com/patterns/library/" + patterns.permalink
        )["name notes tags".split()].rename(columns=dict(notes="description"))
    ], axis=0))

Unnamed: 0,description,stargazerCount,forkCount,license,tags,name
https://github.com/ai-collection/ai-collection,The Generative AI Landscape - A Collection of ...,8017.0,797.0,MIT License,"[artificial intelligence, collections, ai, ass...",ai-collection
https://github.com/OptiKey/OptiKey,OptiKey - Full computer control and speech wit...,4323.0,505.0,GNU General Public License v3.0,"[eye tracking, eyetracking, eye tracker, eyes,...",OptiKey
https://github.com/brunopulis/awesome-a11y,A curate list about A11Y,1859.0,145.0,Creative Commons Zero v1.0 Universal,"[accessibility, wai aria, wcag, a11y, awesome ...",awesome-a11y
https://github.com/Stypox/dicio-android,Dicio assistant app for Android,931.0,87.0,GNU General Public License v3.0,"[assistant, assistive technology, personal ass...",dicio-android
https://github.com/cboard-org/cboard,Augmentative and Alternative Communication (AA...,673.0,187.0,GNU General Public License v3.0,"[aac, autism, cerebral palsy, progressive web ...",cboard
...,...,...,...,...,...,...
https://www.ravelry.com/patterns/library/home-made-ace-bandages,,,,,"[Medical, one piece, seamless, written pattern]","Home Made ""ACE"" Bandages"
https://www.ravelry.com/patterns/library/laryngectomy-covers-stoma-covers,,,,,"[Medical, Medical, written pattern, medical de...",Laryngectomy Covers (Stoma Covers)
https://www.ravelry.com/patterns/library/easy-knitted-stoma-bib,,,,,"[Medical, Medical, written pattern, medical de...",Easy knitted stoma bib
https://www.ravelry.com/patterns/library/herbal-neck-wrap,Other Notions Needed:\r\n\r\n3 to 4 inch sewab...,,,,"[Medical, rectangle, worked flat, in the round]",Herbal Neck Wrap


In [13]:
    all.index.to_series().apply(urllib.parse.urlparse).apply(
        operator.attrgetter("netloc")
    ).value_counts().to_frame()

Unnamed: 0,count
www.ravelry.com,643
www.thingiverse.com,491
github.com,256


In [14]:
    counts = all.tags.explode().str.lower().value_counts()
    (tags := counts.head(100)).to_frame("tags").T.style.set_caption(
        "aggregated tags from all of the services"
    )

tags,written pattern,assistive technology,medical,in the round,one piece,accessibility,assistivetech,screen reader,medical device accessory,worked flat,other,adaptive,therapy aid,adult,seamless,unisex,disability,seamed,phototutorial,medical device access,assistive device,ribbed,occupational therapy,wai aria,bottom up,wcag,3 dimensional,female,teen,stripes colorwork,textured,handicap,chart,disabled,prosthetic,rectangle,assistive,rehabilitation,ergotherapie,hand,python3,child,chrome extension,top cuff down,firefox extension,opera extension,video tutorial,mature,a11y,scala,haskell,scalameta,buttonholes,neovim plugin,wheelchair,muff,other accessibility,braille,mid-calf,animal,limited mobility,top down,toe up,prosthetic hand,buttoned,aac,icord,prosthesis,bobble or popcorn,aid,amigurumi,reversible,cables,assistive tech,discapacidad,toddler,therapy,kitchener,occupational,male,technology,arduino,heel flap,blind,nvda,"beanie, toque",communication,ceapat,imserso,joystick,preemie,switch access,e-nable,spinal cord injury,sideways,beads,hand tools,adaptive technology,accesibilidad,ipad
tags,534,448,393,254,236,229,227,224,217,192,188,182,167,154,149,116,113,103,100,90,89,85,78,77,77,76,72,58,55,49,46,45,45,44,44,44,42,40,38,37,36,35,34,33,33,32,32,32,32,32,32,32,32,32,31,31,30,30,30,28,28,26,26,26,25,24,23,23,21,21,20,20,19,19,19,19,19,19,18,18,18,17,17,17,17,17,17,17,17,17,16,16,16,15,15,15,15,15,15,15


In [15]:
all[~all.index.duplicated()].to_json("at.json.gz")