In [15]:
import pandas as pd
import numpy as np
import altair as alt
import re

import eco_style
alt.themes.enable('light')

ThemeRegistry.enable('light')

In [39]:
df = pd.read_csv("https://www.nomisweb.co.uk/api/v01/dataset/NM_17_1.data.csv?geography=2092957697...2092957703&date=latestMINUS76,latestMINUS72,latestMINUS68,latestMINUS64,latestMINUS60,latestMINUS56,latestMINUS52,latestMINUS48,latestMINUS44,latestMINUS40,latestMINUS36,latestMINUS32,latestMINUS28,latestMINUS24,latestMINUS20,latestMINUS16,latestMINUS12,latestMINUS8,latestMINUS4,latest&cell=402720769,402722305,402722561,402724097,402724353,402725889,402726145,402727681,402727937,402729473,402729729,402731265,402731521,402733057&measures=20100,20701")
df.to_csv("lfs_data.csv", index=False)

In [40]:
df = pd.read_csv("lfs_data.csv")
df = df[['GEOGRAPHY_NAME', 'DATE', 'CELL_NAME', 'OBS_VALUE']]
df.GEOGRAPHY_NAME.unique()
df = df[df.GEOGRAPHY_NAME.isin(['United Kingdom', 'England', 'Wales', 'Scotland',
       'Northern Ireland'])]
df.columns = ['country', 'date', 'measure', 'value']
df.measure.value_counts()
def extract_info(measure):
    age_range = re.search(r'\(Aged (.+?) -', measure)
    status = 'Inactive' if 'Inactive' in measure else 'All'
    return age_range.group(1) if age_range else None, status

# Apply the function to extract age range and status
df[['age_range', 'status']] = df['measure'].apply(lambda x: pd.Series(extract_info(x)))

# Filter out any rows where age range couldn't be extracted
df = df.dropna(subset=['age_range'])

# Define the age groups to be aggregated
age_groups_mapping = {
    '16-34': ['16-19', '20-24', '25-34'],
    '35-49': ['35-49'],
    '50-64': ['50-64'],
    '16-64': ['16-19', '20-24', '25-34', '35-49', '50-64']
}

# Initialize a list to hold the final results
results = []

# Loop through each country and date, and calculate the inactivity rates
for (country, date), group_data in df.groupby(['country', 'date']):
    
    for group_name, subgroups in age_groups_mapping.items():
        total_population = group_data[(group_data['age_range'].isin(subgroups)) & (group_data['status'] == 'All')]['value'].sum()
        inactive_population = group_data[(group_data['age_range'].isin(subgroups)) & (group_data['status'] == 'Inactive')]['value'].sum()
        
        inactivity_rate = (inactive_population / total_population) if total_population > 0 else None
        
        results.append({
            'country': country,
            'date': date,
            'age_group': group_name,
            'inactivity_rate (%)': inactivity_rate
        })

# Convert the results into a DataFrame
full_df = pd.DataFrame(results)

# Fig 1: Inactivity by Nation


In [55]:
df = pd.read_csv("https://www.nomisweb.co.uk/api/v01/dataset/NM_17_5.data.csv?geography=2092957697...2092957703&date=latestMINUS73,latestMINUS69,latestMINUS65,latestMINUS61,latestMINUS57,latestMINUS53,latestMINUS49,latestMINUS45,latestMINUS41,latestMINUS37,latestMINUS33,latestMINUS29,latestMINUS25,latestMINUS21,latestMINUS17,latestMINUS13,latestMINUS9,latestMINUS5,latestMINUS1&variable=111&measures=20599,21001,21002,21003")
df.to_csv("lfs_data_headline_pct.csv", index=False)

In [56]:
df

Unnamed: 0,DATE,DATE_NAME,DATE_CODE,DATE_TYPE,DATE_TYPECODE,DATE_SORTORDER,GEOGRAPHY,GEOGRAPHY_NAME,GEOGRAPHY_CODE,GEOGRAPHY_TYPE,...,MEASURES,MEASURES_NAME,OBS_VALUE,OBS_STATUS,OBS_STATUS_NAME,OBS_CONF,OBS_CONF_NAME,URN,RECORD_OFFSET,RECORD_COUNT
0,2005-12,Jan 2005-Dec 2005,2005-12,date,0,0,2092957697,United Kingdom,K02000001,countries,...,20599,Variable,23.7,A,Normal Value,F,Free (free for publication),Nm-17d5d32092e0d2092957697d40b22d20599,0,532
1,2005-12,Jan 2005-Dec 2005,2005-12,date,0,0,2092957697,United Kingdom,K02000001,countries,...,21001,Numerator,9182700.0,A,Normal Value,F,Free (free for publication),Nm-17d5d32092e0d2092957697d40b22d21001,1,532
2,2005-12,Jan 2005-Dec 2005,2005-12,date,0,0,2092957697,United Kingdom,K02000001,countries,...,21002,Denominator,38753300.0,A,Normal Value,F,Free (free for publication),Nm-17d5d32092e0d2092957697d40b22d21002,2,532
3,2005-12,Jan 2005-Dec 2005,2005-12,date,0,0,2092957697,United Kingdom,K02000001,countries,...,21003,Confidence,0.2,A,Normal Value,F,Free (free for publication),Nm-17d5d32092e0d2092957697d40b22d21003,3,532
4,2005-12,Jan 2005-Dec 2005,2005-12,date,0,0,2092957698,Great Britain,K03000001,countries,...,20599,Variable,23.5,A,Normal Value,F,Free (free for publication),Nm-17d5d32092e0d2092957698d40b22d20599,4,532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,2023-12,Jan 2023-Dec 2023,2023-12,date,0,18,2092957702,Northern Ireland,N92000002,countries,...,21003,Confidence,1.1,A,Normal Value,F,Free (free for publication),Nm-17d5d32380e0d2092957702d40b22d21003,527,532
528,2023-12,Jan 2023-Dec 2023,2023-12,date,0,18,2092957703,England and Wales,K04000001,countries,...,20599,Variable,21.1,A,Normal Value,F,Free (free for publication),Nm-17d5d32380e0d2092957703d40b22d20599,528,532
529,2023-12,Jan 2023-Dec 2023,2023-12,date,0,18,2092957703,England and Wales,K04000001,countries,...,21001,Numerator,7820300.0,A,Normal Value,F,Free (free for publication),Nm-17d5d32380e0d2092957703d40b22d21001,529,532
530,2023-12,Jan 2023-Dec 2023,2023-12,date,0,18,2092957703,England and Wales,K04000001,countries,...,21002,Denominator,37052200.0,A,Normal Value,F,Free (free for publication),Nm-17d5d32380e0d2092957703d40b22d21002,530,532


In [59]:
df = pd.read_csv("lfs_data_headline_pct.csv")

df = df[['DATE', 'GEOGRAPHY_NAME', 'OBS_VALUE', 'MEASURES_NAME']]
df = df[df.GEOGRAPHY_NAME.isin(['United Kingdom', 'England', 'Wales', 'Scotland', 'Northern Ireland'])]
df.columns = ['date', 'country', 'value', 'measure']
df = df[df.measure == 'Variable']
df['value'] = df['value']/100

df.tail()

Unnamed: 0,date,country,value,measure
504,2023-12,United Kingdom,0.213,Variable
512,2023-12,England,0.21,Variable
516,2023-12,Wales,0.23,Variable
520,2023-12,Scotland,0.225,Variable
524,2023-12,Northern Ireland,0.247,Variable


In [42]:
df = full_df.copy()
df = df.query('age_group == "16-64"')
df.columns = ['country', 'date', 'age_group', 'inactivity_rate']

df['label'] = np.where(df['date'] == '2024-03', df.country, '')

base = alt.Chart(df).encode(
    x=alt.X('date:T', title='Date'),
    y=alt.Y('inactivity_rate:Q', title='Inactivity Rate (%)', axis=alt.Axis(format='%')),
    color=alt.Color('country:N', title='Country', legend=None),
)

lines = base.mark_line().encode()

labels = base.mark_text(align='left', dx=5).encode(
    text='label:N'
)

chart = (lines + labels).properties(
    width=450,
    height=300
)

chart




  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [43]:
df.query('country == "Scotland"').tail(10)

Unnamed: 0,country,date,age_group,inactivity_rate,label
203,Scotland,2015-03,16-64,0.228611,
207,Scotland,2016-03,16-64,0.233317,
211,Scotland,2017-03,16-64,0.23795,
215,Scotland,2018-03,16-64,0.231224,
219,Scotland,2019-03,16-64,0.229175,
223,Scotland,2020-03,16-64,0.236612,
227,Scotland,2021-03,16-64,0.243466,
231,Scotland,2022-03,16-64,0.243876,
235,Scotland,2023-03,16-64,0.234803,
239,Scotland,2024-03,16-64,0.238879,Scotland


In [28]:
df.query("date == '2024-03'")

Unnamed: 0,country,date,age_group,inactivity_rate,label
311,England,2024-03,16-64,0.215653,England
623,Northern Ireland,2024-03,16-64,0.254509,Northern Ireland
935,Scotland,2024-03,16-64,0.238879,Scotland
1247,United Kingdom,2024-03,16-64,0.218506,United Kingdom
1559,Wales,2024-03,16-64,0.247788,Wales


In [None]:
# Load the df from the CSV file
file_path = 'path_to_your_file.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)


# Convert the results into a dfFrame
results_df = pd.dfFrame(results)



# Fig 1

In [74]:
full_df = pd.read_csv("merged_series_data.csv")

In [156]:
df = full_df.copy()
df = df.query('`Source File` == "chart1.xml"')
df.columns = ['series', 'year', 'value', 'file']
df = df.drop(columns=['file'])
df.series.unique()
df['year'] = pd.to_datetime(df['year'], format='%Y.0')
# ['Inactivity UK (%)', 'Inactivity England (%)',
#        'Inactivity Scotland (%)', 'Inactivity Wales (%)',
#        'Inactivity Northern Ireland (%)']

df['series'] = df['series'].map({
    'Inactivity UK (%)': 'UK',
    'Inactivity England (%)': 'England',
    'Inactivity Scotland (%)': 'Scotland',
    'Inactivity Wales (%)': 'Wales',
    'Inactivity Northern Ireland (%)': 'Northern Ireland'
})


df['label'] = np.where(df['year'].dt.year == 2023, df.series, '')

base = alt.Chart(df).encode(
    x=alt.X('year:T', title=''),
    y=alt.Y('value:Q', title='Inactivity Rate (%)', axis=alt.Axis(format='%')),
    color=alt.Color('series:N', title='Country', legend=None),
)

lines = base.mark_line(
    point=True
).encode(
    opacity=alt.condition(alt.datum.series == 'Northern Ireland', alt.value(1), alt.value(0.4))
)

labels = base.mark_text(align='left',
                        dy=alt.expr("datum.series == 'Scotland' ? -5 : datum.series == 'England' ? 5 : 0"),
                                                                          dx=5).encode(
    text='label:N'
)

chart = (lines + labels).properties(
    width=450,
    height=300
)

chart.save('charts/fig1.png', scale_factor=2.0)
chart.save('charts/fig1.json')
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Fig 2

In [158]:
df = pd.read_csv("fig2.csv")
df.series.unique()
df = df[df.series.isin(['UK 16-34', 'UK 35-49', 'UK 50-64', 'NI 16-34', 'NI 35-49',
       'NI 50-64'])]
df

df['label'] = np.where(df['date'] == 2023, df.series, '')
df['date'] = pd.to_datetime(df['date'], format='%Y')

base = alt.Chart(df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='Inactivity Rate (%)', axis=alt.Axis(format='%')),
    color=alt.Color('series:N', title='Country', legend=None, scale=alt.Scale(range=["#f0627b", "#e6224b", "#9f162f", "#6ecbc8", "#36b7b4", "#27908d"])),
)

lines = base.mark_line(
    point=True
)

labels = base.mark_text(align='left',
                        dy=alt.expr("datum.series == 'NI 16-34' ? -5 : datum.series == 'UK 16-34' ? 5 : 0"),
                                                                          dx=5).encode(
    text='label:N'
)

chart = (lines + labels).properties(
    width=450,
    height=300
)

chart.save('charts/fig2.png', scale_factor=2.0)
chart.save('charts/fig2.json')
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Fig 3

In [159]:
df = pd.read_csv("fig2.csv")
df.series.unique()
df = df[df.series.isin(['UK Male', 'UK Female', 'NI Male', 'NI Female'])]
df

df['label'] = np.where(df['date'] == 2023, df.series, '')
df['date'] = pd.to_datetime(df['date'], format='%Y')

base = alt.Chart(df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='Inactivity Rate (%)', axis=alt.Axis(format='%')),
    color=alt.Color('series:N', title='Country', legend=None, scale=alt.Scale(range=["#f0627b", "#e6224b", "#6ecbc8", "#36b7b4"])),
)

lines = base.mark_line(
    point=True
)

labels = base.mark_text(align='left',
                        dx=5).encode(
    text='label:N'
)

chart = (lines + labels).properties(
    width=450,
    height=300
)

chart.save('charts/fig3.png', scale_factor=2.0)
chart.save('charts/fig3.json')

chart
                                    

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Fig 4

In [161]:
df = pd.read_csv("fig4.csv")
# year	region	series	value
df

chart = alt.Chart(df).mark_bar().encode(
    y=alt.Y('value:Q', title='Inactivity Rate (%)', axis=alt.Axis(format='%')),
    x=alt.X('year:N', title=''),
    color=alt.Color('series:N', title='', legend=alt.Legend(orient='top', title='', labelLimit=1000000000)),
    column=alt.Column('region:N', title='', header=alt.Header(orient='bottom'))
).properties(
    width=225,
    height=300
)

chart.save('charts/fig4.json')
chart.save('charts/fig4.png', scale_factor=2.0)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [136]:
df

Unnamed: 0,component,year - country,value,year,country
183,Home or Family,2020 - NI,0.0501,2020,NI
184,Home or Family,2021 - UK,0.0424,2021,UK
185,Long-Term Illness/Disability,2020 - NI,0.0979,2020,NI
186,Long-Term Illness/Disability,2021 - UK,0.1009,2021,UK
187,Retired,2020 - NI,0.0309,2020,NI
188,Retired,2021 - UK,0.0309,2021,UK
189,Student,2020 - NI,0.0671,2020,NI
190,Student,2021 - UK,0.0777,2021,UK
191,Other,2020 - NI,0.0311,2020,NI
192,Other,2021 - UK,0.0352,2021,UK


In [None]:
# Open the chart2.xml file and extract the data
import xml.etree.ElementTree as ET

In [89]:
labels

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [73]:
df = pd.read_csv("reshaped_all_charts_series_data.csv")
df['Chart File'].unique()
df

Unnamed: 0,Chart File,Series Name,X,Y
0,chart1.xml,Inactivity UK (%),2005.0,0.2333
1,chart1.xml,Inactivity UK (%),2006.0,0.2295
2,chart1.xml,Inactivity UK (%),2007.0,0.2316
3,chart1.xml,Inactivity UK (%),2008.0,0.2294
4,chart1.xml,Inactivity UK (%),2009.0,0.2314
...,...,...,...,...
90,chart1.xml,Inactivity Northern Ireland (%),2019.0,0.2597
91,chart1.xml,Inactivity Northern Ireland (%),2020.0,0.2771
92,chart1.xml,Inactivity Northern Ireland (%),2021.0,0.2871
93,chart1.xml,Inactivity Northern Ireland (%),2022.0,0.2779
