In [2]:
import altair as alt
import pandas as pd
import eco_style
alt.themes.enable('light')
import json
import numpy as np

gh_root = "https://raw.githubusercontent.com/jhellingsdata/RADataHub/main/Chart%20Packs/Universities/"

# Students Since 1990

In [3]:
df = pd.read_csv('ons_1992_onwards_counts.csv')
df.columns = ['date', 'value']
df.iloc[105:, :]

Unnamed: 0,date,value
105,Dec-Feb 2001,1296
106,Jan-Mar 2001,1293
107,Feb-Apr 2001,1279
108,Mar-May 2001,1297
109,Apr-Jun 2001,1294
...,...,...
286,Jan-Mar 2016,1880
287,Feb-Apr 2016,1925
288,Mar-May 2016,1899
289,Apr-Jun 2016,1897


In [101]:
df = pd.read_csv("18_24_fte.csv")
df['month'] = df.date.astype(str).str.slice(0, 3)
df['year'] = df.date.astype(str).str[-4:]
df['date'] = (df.month + ' ' + df.year)
df['date'] = pd.to_datetime(df['date'], format='%b %Y', errors='coerce').dt.strftime('%Y-%m-%d')
df = df.dropna()
df = df[['date', 'value']]

df.to_csv('18_24_fte_clean.csv', index=False)

df_url = gh_root + "18_24_fte_clean.csv"

chart = alt.Chart(alt.Data(url=df_url)).mark_line(interpolate='basis', color='#E6224B').encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    tooltip=[
        {'type': 'temporal', 'field': 'date', 'title': 'Date'},
        {'type': 'quantitative', 'field': 'value', 'title': 'Value'}
    ]
)

chart = chart.properties(
    width=500,
    height=300,
)

chart.save("18_24_students_headless.json")
chart.save("18_24_students_headless.png", scale_factor=3.0)

chart = chart.properties(
    title={
            "text": ["Two Million Students"],
            "dx": 55,
            "anchor": "start",
            "subtitle": ["Young people aged 18 to 24 in full-time education", "Source: ONS", ""],
            "subtitleColor": "#676A86",
        }
)

chart

# HESA Data

In [6]:
df = pd.read_csv("HESA_2001_onwards_counts.csv", skiprows=13)
df['date'] = df['Academic Year'].str.split('/').str[0] + '-09-01'
df['date'] = pd.to_datetime(df['date'])
df = df[['date','Level of study', 'Number']]
df.columns = ['date', 'series', 'value']
df['label'] = np.where(df['date'] == '2021-09-01', df['series'], '')

df.to_csv('he_students_hesa_clean.csv', index=False)

df_url = gh_root + "he_students_hesa_clean.csv"

base = alt.Chart(
    alt.Data(url=df_url)
).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    color=alt.Color('series:N', scale=alt.Scale(), legend=None),
    tooltip=[
        {'type': 'temporal', 'field': 'date', 'title': 'Date'},
        {'type': 'nominal', 'field': 'series', 'title': 'Series'},
        {'type': 'quantitative', 'field': 'value', 'title': 'Value'}
    ]
)

lines = base.mark_line(point=True)

end_labels = base.mark_text(align='left', dx=5).encode(
    text='label:N'
)

chart = (lines + end_labels).properties(
    width=500,
    height=300,
)

chart.save("he_students_hesa_headless.json")
chart.save("he_students_hesa_headless.png", scale_factor=3.0)

chart = chart.properties(
            title={
        "text": ["Higher Education Students"],
        "dx": 55,
        "anchor": "start",
        "subtitle": ["by level of study", "Source: HESA", ""],
        "subtitleColor": "#676A86",
    }
)

chart.save("he_students_hesa.json")
chart.save("he_students_hesa.png", scale_factor=3.0)

chart

In [104]:
df_url

'https://raw.githubusercontent.com/jhellingsdata/RADataHub/main/Chart%20Packs/Universities/he_students_hesa_clean.csv'

# Foreign Students

In [56]:
df = pd.read_csv("non_dom_students.csv", skiprows=10)
df = df.melt(id_vars=['Academic Year'], var_name='country', value_name='value')
df['date'] = df['Academic Year'].str.slice(0, 4) + '-09-01'
df.query("date == '2021-09-01'").sort_values('value', ascending=False)

countries_of_interest = [
    "China", "India",  "Other Asia",  "Nigeria", "Total EU"
]

df = df[['date', 'country', 'value']]

df['label'] = np.where(df.date == '2021-09-01', df.country, '')
df['value'] = df.value.astype(str).str.replace(',', '').astype(float)

full_df = df.copy()

### Filtered Chart

In [71]:
base = alt.Chart(df.query("country in @countries_of_interest")).mark_line().encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    color=alt.Color('country:N', title='Country', legend=None),
    tooltip=[
        {'type': 'temporal', 'field': 'date', 'title': 'Date'},
        {'type': 'nominal', 'field': 'country', 'title': 'Country'},
        {'type': 'quantitative', 'field': 'value', 'title': 'Value'}
    ]
)

lines = base.mark_line()

text = base.mark_text(align='left', 
                      dy=alt.expr("datum.country == 'Total EU' ? -10 : datum.country == 'Nigeria' ? 10 : 0"),
                      dx=5).encode(
    text=alt.Text('label:N'),
)

chart = (lines + text).properties(
    width=500,
    height=300,
    title={
        "text": ["International students in UK universities"],
        "dx": 40,
        "anchor": "start",
        "subtitle": ["First year non-UK student by domicile", "Source: HESA", ""],
        "subtitleColor": "#676A86",
    }
)
    

chart.save("intl_students_selected_countries.json")
chart.save("intl_students_selected_countries.png", scale_factor=3.0)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Full Chart

In [60]:
str(countries_of_interest)

"['China', 'India', 'Other Asia', 'Nigeria', 'Total EU']"

In [105]:
df = full_df.copy()

colours = ["#36B7B4","#E6224B","#F4C245","#0063AF","#00A767"]
df['color'] = np.where(df.country.isin(countries_of_interest), df.country.map(dict(zip(countries_of_interest, colours))), 'rgba(0,0,0,0.2)')


nearest = alt.selection(type='single', nearest=True, on='mouseover',
                            fields=['year'])

df.to_csv('non_dom_students_clean.csv', index=False)

base = alt.Chart(df).mark_line().encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    color=alt.Color('color', scale=None),
    detail='country:N',
    tooltip=[
        {'type': 'temporal', 'field': 'date', 'title': 'Date'},
        {'type': 'nominal', 'field': 'country', 'title': 'Country'},
        {'type': 'quantitative', 'field': 'value', 'title': 'Value'}
    ]
)

lines = base.mark_line()

text = base.mark_text(align='left', 
                      dy=alt.expr("datum.country == 'Total EU' ? -10 : datum.country == 'Nigeria' ? 10 : 0"),
                      dx=5).encode(
    text=alt.Text('label:N'),
)

chart = (lines + text).properties(
    width=500,
    height=300,
    title={
        "text": ["International students in UK universities"],
        "dx": 40,
        "anchor": "start",
        "subtitle": ["First year non-UK student by domicile", "Source: HESA", ""],
        "subtitleColor": "#676A86",
    }
)
    

chart.save("international_students_all.png", scale_factor=3)
chart.save("international_students_all.json")

chart

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# A Stacked Area

In [78]:
df = full_df.copy().pivot(index='date', columns='country', values='value').reset_index()
df['other'] = df[[c for c in df.columns if c not in countries_of_interest + ['date']]].sum(axis=1)
df = df[['date'] + countries_of_interest + ['other']]
df = df.melt(id_vars='date', var_name='country', value_name='value')
df

colors = ["#36B7B4","#E6224B","#F4C245","#0063AF","#00A767", "rgba(0,0,0,0.2)"]
df['color'] = np.where(df.country.isin(countries_of_interest), df.country.map(dict(zip(countries_of_interest, colors))), 'rgba(0,0,0,0.2)')

base = alt.Chart(df).mark_area().encode(
    

Unnamed: 0,date,country,value
0,2006-09-01,China,25135.0
1,2007-09-01,China,24670.0
2,2008-09-01,China,28905.0
3,2009-09-01,China,36950.0
4,2010-09-01,China,44805.0
...,...,...,...
91,2017-09-01,other,55795.0
92,2018-09-01,other,57215.0
93,2019-09-01,other,57550.0
94,2020-09-01,other,54690.0


In [28]:
1.914908e+06-9.835977e+05

931310.3

In [11]:
df.date.value_counts()

date
1992-03-01    1
2013-02-01    1
2014-11-01    1
2013-10-01    1
2013-09-01    1
             ..
2002-08-01    1
2002-07-01    1
2002-06-01    1
2002-05-01    1
2024-01-01    1
Name: count, Length: 382, dtype: int64

In [30]:
pd.to_datetime('Mar 1992', format='%b %Y')

Timestamp('1992-03-01 00:00:00')