In [1]:
import altair as alt
import pandas as pd

#Endangered/unsafe (2698)
#Definitely endangered (2362)
#Not in use (1181)
#Potentially vulnerable (1163)
#Severely endangered (463)
#Critically endangered (383)
#Safe (65) 

## Datos tomados de aquí: https://en.wal.unesco.org/en/discover/languages
source = pd.DataFrame({
    'Categoria': ['Estable', 'Potencialmente vulnerable', 'En peligro', 'Definitivamente en peligro', 'Severamene en peligro', 'Criticamente en peligro', 'Sin usarse'],
    'Cantidad': [65, 1163,2698, 2362, 463, 383, 1181]
})

print("Total de lenguages categorizados:",sum(source['Cantidad']))

base = alt.Chart(source)

bars = base.mark_bar().encode(
    y=alt.Y('Categoria',sort=None),
    x='Cantidad',
    color=alt.Color('Categoria', sort=None,scale=alt.Scale(range=['#077c09','#ae23d7','#fe9595','#fc5a5a','#fc4343','#ff0000','#00b2ff']), legend=None)
)

text = base.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x='Cantidad',
    y=alt.Y('Categoria', sort=None),
    text='Cantidad'
)

bars+text

Total de lenguages categorizados: 8315


In [2]:
# Institutional 485
# Stable 3481
# Endangered 3193
# Extinct 454

## Datos tomados de aquí: https://www.ethnologue.com/
source = pd.DataFrame({
    'Categoria': ['Institucional', 'Estable', 'En peligro', 'Extintos'],
    'Cantidad': [485, 3481, 3293, 454]
})

print("Total de lenguages categorizados:",sum(source['Cantidad']))

base = alt.Chart(source)

bars = base.mark_bar().encode(
    y=alt.Y('Categoria',sort=None),
    x='Cantidad',
    color=alt.Color('Categoria', sort=None,scale=alt.Scale(range=['#00ff00','#077c09','#fc4343','#ff0000']), legend=None)
)

text = base.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x='Cantidad',
    y=alt.Y('Categoria', sort=None),
    text='Cantidad'
)

bars+text

Total de lenguages categorizados: 7713


In [3]:
#no en peligro 	2643 	34,13%
#amenazado 	1595 	20,59%
#movedizo 	1805 	23,31%
#moribundo 	422 	5,45%
#casi extinto 	299 	3,86%
#extinguido 	981 	12,67%

## Datos tomados de aquí: https://glottolog.org/langdoc/status
source = pd.DataFrame({
    'Categoria': ['No en peligro', 'Amenazado', 'En sustitucion', 'Moribundo','Casi extinto', 'Extinto'],
    'Cantidad': [2643, 1595, 1805, 442, 299, 281]
})

print("Total de lenguages categorizados:",sum(source['Cantidad']))

base = alt.Chart(source)

bars = base.mark_bar().encode(
    y=alt.Y('Categoria',sort=None),
    x='Cantidad',
    color=alt.Color('Categoria', sort=None,scale=alt.Scale(range=['#077c09','#ae23d7','#f684ee','#fe9595','#fc5a5a','#ff0000']), legend=None)
)

text = base.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x='Cantidad',
    y=alt.Y('Categoria', sort=None),
    text='Cantidad'
)

bars+text


                     

Total de lenguages categorizados: 7065


## Glottlog

In [4]:
from pyglottolog import Glottolog, config
glottolog = Glottolog('glottolog-5.2/')
print(glottolog)

  import pkg_resources


<Glottolog repos glottolog-5.2 at /home/ivanvladimir/projects/lom_charts/notebooks/glottolog-5.2>


In [5]:
languoids = {l.id: l for l in glottolog.languoids()}

In [6]:
print(config.LanguageType)

<class 'pyglottolog.config.LanguageType'>


In [11]:
from collections import Counter

languages_mx=[]
extinct=[]
for i,l in enumerate(languoids.values()):
    if l.category == "Spoken L1 Language":
        countries_=set(c.id for c in l.countries)
        if 'MX' in countries_:
            try:
                if str(l.endangerment.status.name)=='extinct':
                    extinct.append(l)
                languages_mx.append(l)

            except AttributeError:
                print("Sin status:",l.name)
           
aes_mx=Counter(l.endangerment.status.name for l in languages_mx)
egids_mx=Counter(l.endangerment.status.egids for l in languages_mx)
unesco_mx=Counter(l.endangerment.status.unesco for l in languages_mx)

print(f"Total lenguajes en glottlog con México en countries {len(languages_mx)}")
print(aes_mx)
print(f'Lenguajes extintion en México: {",".join([l.name for l in extinct])}')

Sin status: Chichicapan Zapotec
Sin status: Amatlán Zapotec
Sin status: Zaniza Zapotec
Sin status: Texmelucan Zapotec
Sin status: Kwatl
Total lenguajes en glottlog con México en countries 303
Counter({'threatened': 127, 'shifting': 83, 'not endangered': 53, 'extinct': 25, 'moribund': 8, 'nearly extinct': 7})
Lenguajes extintion en México: Guaicurian,Coahuilteco,Comecrudan,Cuitlatec,Cotoname,Maratino,Southern Pame,Chiapanec,Teojomulco Chatino,Soltec,Chicomuceltec,Epigraphic Mayan,Robinia,Cochimi,Tequistlateco Chontal,Tubar,Teguima,Eudeve,Pochutec,Classical Nahuatl,Tepecano,Jova,Coca,Northern Tarahumara,Tapachultec


In [21]:
labels2spa={
    'extinct':'Extinto',
    'shifting':'En transición',
    'threatened':'Amenazado',
    'not endangered':'No en peligro',
    'moribund':'Moribundo',
    'nearly extinct':'Casi extinto'
}

order=['not endangered','threatened','shifting','moribund','nearly extinct','extinct']

source = pd.DataFrame({'Categoria':[labels2spa[ix] for ix in order],'Cantidad':[aes_mx[ix] for ix in order], 'Order': range(len(order))})

base=alt.Chart(source).encode(
    alt.Theta("Cantidad:Q").stack(True),
    color=alt.Color('Categoria', sort=alt.SortField('Order'),scale=alt.Scale(range=['#077c09','#ae23d7','#f684ee','#fe9595','#fc5a5a','#ff0000'])),
    order=alt.Order('Order:O')
)

pie = base.mark_arc(outerRadius=120)
text = base.mark_text(radius=140, size=12).encode(text="Cantidad")

pie + text

In [22]:
source

Unnamed: 0,Categoria,Cantidad,Order
0,No en peligro,53,0
1,Amenazado,127,1
2,En transición,83,2
3,Moribundo,8,3
4,Casi extinto,7,4
5,Extinto,25,5


In [23]:
print(egids_mx.keys())

order=['<=6a','6b','7', '8a', '8b','>=9']

source = pd.DataFrame({'Categoria':[ix for ix in order],'Cantidad':[egids_mx[ix] for ix in order], 'Order': range(len(order))})

base=alt.Chart(source).encode(
    alt.Theta("Cantidad:Q").stack(True),
    color=alt.Color('Categoria', sort=alt.SortField('Order'),scale=alt.Scale(range=['#077c09','#ae23d7','#f684ee','#fe9595','#fc5a5a','#ff0000'])),
    order=alt.Order('Order:O')
)

pie = base.mark_arc(outerRadius=120)
text = base.mark_text(radius=140, size=12).encode(text="Cantidad")

pie + text

dict_keys(['>=9', '7', '6b', '<=6a', '8a', '8b'])


In [24]:
source

Unnamed: 0,Categoria,Cantidad,Order
0,<=6a,53,0
1,6b,127,1
2,7,83,2
3,8a,8,3
4,8b,7,4
5,>=9,25,5


In [25]:
print(unesco_mx.keys())

labels2spa={
    'safe':'Estable', 'vulnerable':'Vulnerable', 'definitely endangered':'Definitivamente en peligro', 'severely endangered':'Severamene en peligro', 'critically endangered':'Criticamente en peligro','extinct':'Sin usarse'
}

order=['safe', 'vulnerable', 'definitely endangered', 'severely endangered', 'critically endangered','extinct']

source = pd.DataFrame({'Categoria':[labels2spa[ix] for ix in order],'Cantidad':[unesco_mx[ix] for ix in order], 'Order': range(len(order))})

base=alt.Chart(source).encode(
    alt.Theta("Cantidad:Q").stack(True),
    color=alt.Color('Categoria', sort=alt.SortField('Order'),scale=alt.Scale(range=['#077c09','#ae23d7','#f684ee','#fe9595','#fc5a5a','#ff0000'])),
    order=alt.Order('Order:O')
)

pie = base.mark_arc(outerRadius=120)
text = base.mark_text(radius=140, size=12).encode(text="Cantidad")

pie + text

dict_keys(['extinct', 'definitely endangered', 'vulnerable', 'safe', 'severely endangered', 'critically endangered'])


In [26]:
source

Unnamed: 0,Categoria,Cantidad,Order
0,Estable,53,0
1,Vulnerable,127,1
2,Definitivamente en peligro,83,2
3,Severamene en peligro,8,3
4,Criticamente en peligro,7,4
5,Sin usarse,25,5


In [28]:
isos_mx=set(l.iso for l in languages_mx)
print("Different ISOs for the languages",len(isos_mx))

Different ISOs for the languages 292


In [29]:
# Fuente de datos: https://www.inegi.org.mx/temas/lengua/#informacion_general
from io import StringIO

csv_data="""Decada,Porcentaje
1930,	16
1940,	14.8
1950,	11.2
1960,	10.4
1970,	7.8
1980*,   9.0
1990,	7.5
2000,	7.1
2010,	6.7
2015,	6.6
2020,	6.2"""

csv_file = StringIO(csv_data)

df = pd.read_csv(csv_file)
df['Decada'] = df['Decada'].astype('str')
df['Porcentaje'] = pd.to_numeric(df['Porcentaje'])

base = alt.Chart(df)

bars = base.mark_bar().encode(
    y=alt.Y('Porcentaje',sort=None),
    x=alt.X('Decada:N', scale=alt.Scale(paddingInner=0.8 ))
).properties(
    width=600  # Set specific width in pixels
)

text = base.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x='Decada:N',
    y=alt.Y('Porcentaje', sort=None),
    text='Porcentaje'
)

bars+text

In [30]:
# Fuente de datos: https://www.inegi.org.mx/temas/lengua/#informacion_general
from io import StringIO

csv_data="""Decada,Población
1930,	2.6
1940,	2.9
1950,	2.8
1960,	3.6
1970,	3.7
1980,   5.1
1990,	6.0
2000,	6.9
2010,	7.5
2020,	7.8"""

csv_file = StringIO(csv_data)

df = pd.read_csv(csv_file)
df['Decada'] = df['Decada'].astype('str')
df['Población'] = pd.to_numeric(df['Población'])

base = alt.Chart(df)

bars = base.mark_bar().encode(
    y=alt.Y('Población',sort=None, title="Población en millones"),
    x=alt.X('Decada:N', scale=alt.Scale(paddingInner=0.8 ))
).properties(
    width=600  # Set specific width in pixels
)

text = base.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x='Decada:N',
    y=alt.Y('Población', sort=None),
    text='Población'
)

bars+text

In [31]:
csv_data="""Año,Población
1518, 25.2
1548, 16.8
1568, 2.65
1585, 1.9
1595, 1.37
1605, 1.37"""

csv_file = StringIO(csv_data)

df = pd.read_csv(csv_file)
df['Año'] = df['Año'].astype('str')
df['Población'] = pd.to_numeric(df['Población'])

base = alt.Chart(df)

bars = base.mark_bar().encode(
    y=alt.Y('Población',sort=None, title="Población en millones"),
    x=alt.X('Año:N', scale=alt.Scale(paddingInner=0.8 ))
).properties(
    width=600  # Set specific width in pixels
)

text = base.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x='Año:N',
    y=alt.Y('Población', sort=None),
    text='Población'
)

bars+text

In [131]:
from io import StringIO
import numpy as np

csv_data="""Lengua,Mujeres,Hombres,"Total"
Náhuatl,"853625.00","798333.00","1651958.00"
Maya,"376631.00","398124.00","774755.00"
Tseltal,"298320.00","290824.00","589144.00"
Tsotsil,"282024.00","268250.00","550274.00"
Mixteco,"280869.00","245724.00","526593.00"
Zapoteco,"256681.00","234164.00","490845.00"
Otomí,"155386.00","143475.00","298861.00"
Totonaco,"132506.00","123838.00","256344.00"
Ch'ol,"128343.00","126372.00","254715.00"
Mazateco,"124598.00","112614.00","237212.00"
Huasteco,"85206.00","83523.00","168729.00"
Mazahua,"83138.00","70659.00","153797.00"
Tlapaneco,"77327.00","70105.00","147432.00"
Chinanteco,"76634.00","67760.00","144394.00"
Tarasco,"73205.00","69254.00","142459.00"
Mixe,"73896.00","65864.00","139760.00"
Tarahumara,"47070.00","44484.00","91554.00"
Zoque,"37186.00","36832.00","74018.00"
Tojolabal,"34082.00","32871.00","66953.00"
Chontal de Tabasco,"30157.00","30406.00","60563.00"
Huichol,"30891.00","29372.00","60263.00"
Amuzgo,"31488.00","28396.00","59884.00"
Chatino,"28280.00","23796.00","52076.00"
Tepehuano del sur,"22581.00","21805.00","44386.00"
Mayo,"17407.00","21100.00","38507.00"
Popoluca de la Sierra,"18747.00","17366.00","36113.00"
Cora,"16891.00","16335.00","33226.00"
Triqui,"15678.00","13867.00","29545.00"
No especificado,"10856.00","11921.00","22777.00"
Yaqui,"8952.00","10424.00","19376.00"
Huave,"9352.00","9475.00","18827.00"
Popoloca,"9337.00","7937.00","17274.00"
Cuicateco,"6788.00","6173.00","12961.00"
Pame,"6087.00","5837.00","11924.00"
Mam,"5204.00","6165.00","11369.00"
Q'anjob'al,"5604.00","5247.00","10851.00"
Tepehuano del norte,"5083.00","4772.00","9855.00"
Tepehua,"4684.00","4200.00","8884.00"
Popoluca,"4208.00","4219.00","8427.00"
Chontal de Oaxaca,"2756.00","2857.00","5613.00"
Sayulteco,"2459.00","2306.00","4765.00"
Chuj,"1778.00","1738.00","3516.00"
Akateko,"1564.00","1330.00","2894.00"
Otras lenguas,"1016.00","1437.00","2453.00"
Chichimeco Jonaz,"1162.00","1202.00","2364.00"
Tlahuica,"1146.00","1092.00","2238.00"
Guarijío,"1079.00","1060.00","2139.00"
Chontal,648.00,"1056.00","1704.00"
Q'eqchi',744.00,855.00,"1599.00"
Matlatzinca,649.00,596.00,"1245.00"
Pima,522.00,515.00,1037.00
Chocholteco,449.00,398.00,847.00
Lacandón,375.00,396.00,771.00
Seri,336.00,387.00,723.00
K'iche',275.00,314.00,589.00
Kumiai,241.00,254.00,495.00
Jakalteko,252.00,229.00,481.00
Texistepequeño,176.00,192.00,368.00
Tepehuano,130.00,187.003,17.00
Paipai,118.00,113.00,231.00
Pápago,69.00,134.00,203.00
Ixcateco,95.00,100.00,195.00
Cucapá,77.00,99.00,176.00
Kaqchikel,74.00,95.00,169.00
Qato'k,52.00,74.00,126.00
Ixil,61.00,56.00,117.00
Teko,22.00,56.00,78.00
Oluteco,29.00,48.00,77.00
Kiliwa,28.00,48.00,76.00
Ayapaneco,32.00,39.00,71.00
Kickapoo,21.00,42.00,63.00
Awakateko,10.00,10.00,20.00
"""

csv_file = StringIO(csv_data)

df = pd.read_csv(csv_file)
df['Lengua'] = df['Lengua'].astype('str')
df['Mujeres'] = pd.to_numeric(df['Mujeres'])
df['Hombres'] = pd.to_numeric(df['Hombres'])
df['Total'] = pd.to_numeric(df['Total'])
df['LogTotal'] = df['Total'].apply(np.log10)
df['LogHombres'] = df['Hombres'].apply(np.log10)
df['LogMujeres'] = df['Mujeres'].apply(np.log10)
df = df.sort_values('Total', ascending=False)

chart1 = alt.Chart(df).mark_bar(color='blue', opacity=1.0).encode(
    x=alt.X('Lengua:N',sort=None, scale=alt.Scale(paddingInner=0.2 )),
    y=alt.Y('LogTotal:Q',sort=None,axis=alt.Axis(
                labelExpr="format(pow(10,datum.value),',')",
                labels=True,
            ),title="Total hablantes (log10)")  # Log scale on y-axis
).properties(
    width=900
)

chart1

In [130]:
selection = alt.selection_point(fields=['Lengua'])
df = df.sort_values('Mujeres', ascending=False)

df_long = df.melt(id_vars=['Lengua'], 
                  value_vars=['LogMujeres', 'LogHombres'],
                  var_name='Sexo', 
                  value_name='Población')

chart3 = alt.Chart(df_long).mark_bar(opacity=1).encode(
    x=alt.X('Lengua:N',sort=None),
    y=alt.Y('Población:Q',sort=None,axis=alt.Axis(
                labelExpr="format(pow(10,datum.value),',')",
                labels=True,
            ),title="Total hablantes por sexo(log10)"),
    color=alt.Color('Sexo:N',
                    scale=alt.Scale(domain=['LogMujeres', 'LogHombres'],range=['orange', 'green']),
                    legend=alt.Legend(labelExpr="datum.value == 'LogMujeres' ? 'Mujeres' : 'Hombres'")),
    xOffset='Sexo:N'
).properties(
    width=900
)

chart3