In [1]:
dataset_1 = [
    "da",
    "bg",
    "cs",
    "el",
    "de",
    "en",
    "fr",
    "es",
    "fi",
    "lt",
    "hu",
    "it",
    "nl",
    "pl",
    "pt",
    "ro",
    "sk",
    "sl",
    "sv",
]

dataset_2 = [
    "bul",
    "dan",
    "ger",
    "ces",
    "spa",
    "fin",
    "ell",
    "hun",
    "ita",
    "fre",
    "lit",
    "nld",
    "ron",
    "slk",
]

mapping = {
    "da": "dan",
    "es": "spa",
}

In [2]:
from qq import Database, IdType

db = Database.load()

In [3]:
for lang in dataset_1 + dataset_2:
    print(f"{lang:4} -> {db.convert(lang, IdType.ISO_639_3)}")

da   -> dan
bg   -> bul
cs   -> ces
el   -> ell
de   -> deu
en   -> eng
fr   -> fra
es   -> spa
fi   -> fin
lt   -> lit
hu   -> hun
it   -> ita
nl   -> nld
pl   -> pol
pt   -> por
ro   -> ron
sk   -> slk
sl   -> slv
sv   -> swe
bul  -> bul
dan  -> dan
ger  -> deu
ces  -> ces
spa  -> spa
fin  -> fin
ell  -> ell
hun  -> hun
ita  -> ita
fre  -> fra
lit  -> lit
nld  -> nld
ron  -> ron
slk  -> slk


In [4]:
dataset_3 = [
    "nld",
    "dut",
    "Q7411",
    "dutc1256",
    "fr",
    "fre",
    "ell",
]

In [5]:
from collections import defaultdict
from pprint import pprint

mapping = defaultdict(set)

for lang in dataset_1 + dataset_2 + dataset_3:
    code = db.convert(lang, IdType.GLOTTOCODE)
    mapping[code].add(lang)

pprint(mapping)

defaultdict(<class 'set'>,
            {'bulg1262': {'bul', 'bg'},
             'czec1258': {'cs', 'ces'},
             'dani1285': {'dan', 'da'},
             'dutc1256': {'dutc1256', 'nl', 'Q7411', 'dut', 'nld'},
             'finn1318': {'fin', 'fi'},
             'hung1274': {'hu', 'hun'},
             'ital1282': {'ita', 'it'},
             'lith1251': {'lt', 'lit'},
             'mode1248': {'el', 'ell'},
             'poli1260': {'pl'},
             'port1283': {'pt'},
             'roma1327': {'ro', 'ron'},
             'slov1268': {'sl'},
             'slov1269': {'slk', 'sk'},
             'stan1288': {'spa', 'es'},
             'stan1290': {'fre', 'fr'},
             'stan1293': {'en'},
             'stan1295': {'de', 'ger'},
             'swed1254': {'sv'}})


In [6]:
am = db.get("am")
am2 = db.guess("amh")

am == am2

True

In [7]:
am.family_tree

[Languoid(name="Amharic-Argobba", glottocode="amha1244", ...),
 Languoid(name="South Ethiopic", glottocode="sout3078", ...),
 Languoid(name="Ethiosemitic", glottocode="ethi1244", ...),
 Languoid(name="West Semitic", glottocode="west2786", ...),
 Languoid(name="Semitic", glottocode="semi1276", ...),
 Languoid(name="Afro-Asiatic", glottocode="afro1255", ...)]

In [8]:
am.siblings

[Languoid(name="Argobba", bcp_47="agj", iso_639_3="agj", iso_639_3="agj", glottocode="argo1244", wikidata_id="Q29292", ...)]

In [9]:
am.parent

Languoid(name="Amharic-Argobba", glottocode="amha1244", ...)

In [10]:
am.parent.parent.parent

Languoid(name="Ethiosemitic", glottocode="ethi1244", ...)

In [11]:
am.canonical_scripts

[Script(name="Ethiopic", iso_15924="Ethi", ...)"]

In [12]:
am.languoids_in_same_region[:5]

[Languoid(name="Northern Gumuz", bcp_47="guk", iso_639_3="guk", iso_639_3="guk", glottocode="gumu1244", wikidata_id="Q2396970", ...),
 Languoid(name="Konso", bcp_47="kxc", iso_639_3="kxc", iso_639_3="kxc", glottocode="kons1243", wikidata_id="Q56624", ...),
 Languoid(name="Mesqan", bcp_47="mvz", iso_639_3="mvz", iso_639_3="mvz", glottocode="mesq1240", wikidata_id="Q6821677", ...),
 Languoid(name="Ale-Gawwada", bcp_47="gwd", iso_639_3="gwd", iso_639_3="gwd", glottocode="gaww1239", wikidata_id="Q3032135", ...),
 Languoid(name="Baiso", bcp_47="bsw", iso_639_3="bsw", iso_639_3="bsw", glottocode="bais1246", wikidata_id="Q56615", ...)]

In [13]:
am.languoids_with_same_script[:5]

[Languoid(name="Ngwo", bcp_47="ngn", iso_639_3="ngn", iso_639_3="ngn", glottocode="ngwo1241", wikidata_id="Q36051", ...),
 Languoid(name="Nande", bcp_47="nnb", iso_639_3="nnb", iso_639_3="nnb", glottocode="nand1264", wikidata_id="Q3196953", ...),
 Languoid(name="Nnam", bcp_47="nbp", iso_639_3="nbp", iso_639_3="nbp", glottocode="nnam1238", wikidata_id="Q36138", ...),
 Languoid(name="Lakota", bcp_47="lkt", iso_639_3="lkt", iso_639_3="lkt", glottocode="lako1247", wikidata_id="Q33537", ...),
 Languoid(name="Tasmanian", iso_639_3="xtz", iso_639_3="xtz", glottocode="tasm1247", ...)]

In [None]:
# QQ warns about deprecated codes and resolves them to the
# recommended alternative / replacement (if available)
db.guess("in")

  return self.get(code, id_type)


Languoid(name="Standard Indonesian", bcp_47="id", iso_639_1="id", iso_639_3="ind", iso_639_3="ind", glottocode="indo1316", wikidata_id="Q9240", ...)