export a unique set of data synthesized

# aggregating items across providers

the aggregated data we combine is reduced from the cached original requests.

## thingiverse

In [1]:

import os, pandas, urllib, itertools
from toolz.curried import *
from pandas import DataFrame, Series, Index
with __import__("importnb").Notebook():
    from app import App
    import github, thingiverse, ravelry


In [2]:
def get_things(app):
    app.manager.register(thingiverse)
    things = app.compact().set_index("id")
    things.tags = (
        things.tags
        .explode()
        .dropna()
        .apply(operator.itemgetter("name"))
        .groupby(pandas.Grouper(level=0))
        .agg(list)
    )   
    return things

In [3]:
def get_repos(app):
    app.manager.register(github)
    repos = app.compact()
    repos["tags"] = repos["topics"]
    return repos

In [4]:
def get_patterns(app):
    app.manager.register(ravelry)
    patterns = app.compact()
    patterns = patterns.join(pandas.concat([
        patterns.pattern_categories.explode().apply(Series).name,
        patterns.pattern_attributes.explode().apply(Series).permalink.str.replace("-", " ")
    ], axis=0).groupby(level=0).agg(list).rename("tags"))
    return patterns

In [5]:
@App.impl
def get_frames(app):
    return pandas.concat([
        (r := get_repos(App()).set_index("url"))
        ["description stargazerCount forkCount licenseInfo tags".split()]
        .join(r.index.to_series().str.rpartition("/")[2].rename("name"))
        .rename(columns=dict(licenseInfo="license")),
        get_things(App()).set_index("public_url")["name tags license description".split()],
        (patterns := get_patterns(App())).set_index(
            "https://www.ravelry.com/patterns/library/" + patterns.permalink
        )["name notes tags".split()].rename(columns=dict(notes="description"))
    ], axis=0)

## text analysis

`all` is a synthesis of each of the services with >1400 entries

In [6]:
@App.impl
def finalize(df):
    all = app.compact()
    all.index.to_series().dropna().apply(urllib.parse.urlparse).apply(
        operator.attrgetter("netloc")
    ).value_counts().to_frame().pipe(display)
    counts = all.tags.explode().str.lower().value_counts()
    (tags := counts.head(100)).to_frame("tags").T.style.set_caption(
        "aggregated tags from all of the services"
    ).pipe(display)
    all[~all.index.duplicated()].to_json("at.json.gz")

In [7]:
    if __name__ == "__main__":
        app = App()
        app.main(__import__(__name__), run=locals().get("__file__") is not None)

Unnamed: 0,count
www.ravelry.com,690
www.thingiverse.com,552
github.com,208


tags,assistivetech,assistive technology,written pattern,assistive device,medical,disability,accessibility,occupational therapy,in the round,one piece,other,medical device accessory,adaptive,handicap,worked flat,disabled,assistive,therapy aid,rehabilitation,ergotherapie,prosthetic,adult,hand,seamless,wheelchair,assistive-technology,screen-reader,unisex,braille,assistive tech,seamed,limited mobility,prosthetic hand,phototutorial,aid,medical device access,discapacidad,prosthesis,ipad,bottom up,ribbed,ceapat,imserso,accesibilidad,3 dimensional,aac,hand tools,technology,e-nable,therapy,female,adaptive technology,occupational,switch access,customizer,speech therapy,teen,at switch,holder,pen,enable,communication,switch,key,rectangle,blind,spinal cord injury,stripes colorwork,arduino,keyboard,chart,disabilities,textured,speech,joystick,printable prosthetics,accesibility,assistive device for elderly or disabled,handle,access,terapia ocupacional,tech,arm,iphone,a11y,keyguard,button,3d prosthetic,exoskeleton,aide technique,pen holder,augmentative,mobility aid accessory,pencil holder,assitive technology,arthritis,finger,visually impaired,child,ios
tags,1213,1197,579,518,482,476,473,331,277,248,236,224,222,206,202,202,195,192,182,178,170,160,156,155,145,137,128,120,120,120,109,108,108,106,105,94,93,93,89,88,88,85,85,77,73,73,72,72,68,67,65,64,63,61,61,60,58,57,56,56,56,55,55,52,51,50,50,49,49,48,47,47,47,46,46,45,45,45,44,44,44,44,41,41,41,41,41,40,40,40,40,40,40,40,40,40,40,40,39,38
