# Labour Market Statistics Cleaning & Pre Processing
## 1. Importing required modules & libraries

In [1]:
import pandas as pd

## 2. Loading the data

In [2]:
labour_market_statistics_df = pd.read_csv('1_raw_databases/Labour Market Statistics.csv', low_memory=False)

## 3. Inspecting the data
### As you can see there are 1527 columns and 1831 rows!

In [3]:
labour_market_statistics_df.shape

(1527, 1831)

In [4]:
labour_market_statistics_df[:10]

Unnamed: 0,Title,AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay,AWE: Whole Economy Real Terms Year on Year three Month Growth (%): Seasonally Adjusted Regular Pay,AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Regular Pay,AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Total Pay,AWE: Whole Economy Real Terms Year on Year Three Month Growth (%): Seasonally Adjusted Total Pay,AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Total Pay,Employment rate Canada (OECD) seasonally adjusted,Employment Rate Japan (OECD) seasonally adjusted,Employment Rate United States (OECD) seasonally adjusted,...,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Luxembourg - Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Netherlands - Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Austria - Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Portugal - Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Finland - Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Sweden - Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED United Kingdom Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED United States,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Japan- Eurostat,STANDARDISED ILO UNEMPLOYMENT RATES SEASONALLY ADJUSTED Canada-OECD
0,CDID,A2F9,A2FA,A2FC,A3WV,A3WW,A3WX,A48O,A48P,A48Q,...,ZXDQ,ZXDR,ZXDS,ZXDT,ZXDU,ZXDV,ZXDW,ZXDX,ZXDY,ZXDZ
1,PreUnit,,,£,,,£,,,,...,,,,,,,,,,
2,Unit,,,,,,,,,,...,,,,,,,,,,
3,Release Date,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,...,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024,12-11-2024
4,Next release,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,...,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024,17 December 2024
5,Important Notes,,,,,,,,,,...,,,,,,,,,,
6,1891,,,,,,,,,,...,,,,,,,,,,
7,1892,,,,,,,,,,...,,,,,,,,,,
8,1893,,,,,,,,,,...,,,,,,,,,,
9,1894,,,,,,,,,,...,,,,,,,,,,


### I would like to first cut the data down by year to years greater than 2017, to do so I will filter the dataframe to get all rows after the first 5. 

In [5]:
labour_market_statistics_df = labour_market_statistics_df[6:]

### I then make a metric which takes a date in and returns a year and apply it to the dataframe. Finally I will filter the dataframe. I also use this time to get a list of dates which remain. I would like to restructure the data with the statistic as the row and the date data as the column.

In [6]:
def return_year(date):
    year = date[:4]
    return int(year)

In [7]:
labour_market_statistics_df['year'] = labour_market_statistics_df['Title'].apply(return_year)

In [8]:
labour_market_statistics_df = labour_market_statistics_df[labour_market_statistics_df['year'] > 2017]

In [9]:
list_dates = labour_market_statistics_df['Title'].unique().tolist()

In [10]:
# labour_market_statistics_df

### Obtaining a list of all the metrics available
#### This allows me to pull them out into categories as you can see there are 1832 categories which is quite a challenge to comprehend. I used this guide to the labour market statistic https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/methodologies/aguidetolabourmarketstatistics to get a feeling for the statistics available and add them to lists of groups, this will help structure my analysis and report

In [11]:
list_of_metrics = labour_market_statistics_df.columns.tolist()

##### uncomment to review all metrics:

In [49]:
for item in list_of_metrics:
    print(item)

Title
AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay
AWE: Whole Economy Real Terms Year on Year three Month Growth (%): Seasonally Adjusted Regular Pay
AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Regular Pay
AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Total Pay
AWE: Whole Economy Real Terms Year on Year Three Month Growth (%): Seasonally Adjusted Total Pay
AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Total Pay
Employment rate Canada (OECD) seasonally adjusted
Employment Rate Japan (OECD) seasonally adjusted
Employment Rate United States (OECD) seasonally adjusted
Standardised ILO unemployment rates, seasonally adjusted, Romania - Eurostat
Standardised ILO unemployment rates, seasonally adjusted, Bulgaria - Eurostat
Standardised ILO Unemployment rates - Total EU
International Comparison Employment rates - Romania
International Comparison employment rates - Bu

In [13]:
# pd.DataFrame(list_of_metrics)

In [14]:
print(len(list_of_metrics))

1832


### 1. Earnings Statistics. 

#### The metrics relating to earnings are under the AWE heading, all of the metrics in the database are separated by a colon which can be used to extract rich data for different demographic groups

In [15]:
average_weekly_earnings_metrics = []

for metric in list_of_metrics:
    if metric[:3] == 'AWE':
        average_weekly_earnings_metrics.append(metric)

In [16]:
len(average_weekly_earnings_metrics)

87

In [17]:
for item in average_weekly_earnings_metrics:
    print(item)

AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay
AWE: Whole Economy Real Terms Year on Year three Month Growth (%): Seasonally Adjusted Regular Pay
AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Regular Pay
AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Total Pay
AWE: Whole Economy Real Terms Year on Year Three Month Growth (%): Seasonally Adjusted Total Pay
AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Total Pay
AWE: Services Level (£): Seasonally Adjusted Total Pay Excluding Arrears
AWE: Services Year on Year Single Month Growth (%): Seasonally Adjusted Total Pay Excluding Arrears
AWE: Services Year on Year Three Month Average Growth (%): Seasonally Adjusted Total Pay Excluding Arrears
AWE: Finance and Business Services Level (£): Seasonally Adjusted Total Pay Excluding Arrears
AWE: Finance and Business Services Year on Year Single Month Growth (%): Seasonally Adju

In [18]:
metrics_list = ['whole_metric', 'sector', 'category']
metric_list = metrics_list + list_dates   
average_weekly_earnings_df = pd.DataFrame(columns=metric_list)

for metric in average_weekly_earnings_metrics:
    whole_metric = [metric][0]
    sector = metric.split(': ')[1]
    category = metric.split(': ')[2]
    
    row_data = {
    'whole_metric': whole_metric,
    'sector': sector,
    'category': category
    }
    
    for date in list_dates: 
        row_data[date] = None
    
    row_df = pd.DataFrame([row_data])
    
    average_weekly_earnings_df = pd.concat([average_weekly_earnings_df, row_df], ignore_index=True)

In [19]:
average_weekly_earnings_df

Unnamed: 0,whole_metric,sector,category,2018,2019,2020,2021,2022,2023,2024,...,2023 DEC,2024 JAN,2024 FEB,2024 MAR,2024 APR,2024 MAY,2024 JUN,2024 JUL,2024 AUG,2024 SEP
0,AWE: Whole Economy Real Terms Year on Year Sin...,Whole Economy Real Terms Year on Year Single M...,Seasonally Adjusted Regular Pay,,,,,,,,...,,,,,,,,,,
1,AWE: Whole Economy Real Terms Year on Year thr...,Whole Economy Real Terms Year on Year three Mo...,Seasonally Adjusted Regular Pay,,,,,,,,...,,,,,,,,,,
2,AWE: Whole Economy Real Terms Level (£): Seaso...,Whole Economy Real Terms Level (£),Seasonally Adjusted Regular Pay,,,,,,,,...,,,,,,,,,,
3,AWE: Whole Economy Real Terms Year on Year Sin...,Whole Economy Real Terms Year on Year Single M...,Seasonally Adjusted Total Pay,,,,,,,,...,,,,,,,,,,
4,AWE: Whole Economy Real Terms Year on Year Thr...,Whole Economy Real Terms Year on Year Three Mo...,Seasonally Adjusted Total Pay,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,AWE: Public Sector Year on Year Single Month G...,Public Sector Year on Year Single Month Growth...,Seasonally Adjusted Regular Pay Excluding Arrears,,,,,,,,...,,,,,,,,,,
83,AWE: Public Sector Year on Year Three Month Av...,Public Sector Year on Year Three Month Average...,Seasonally Adjusted Regular Pay Excluding Arrears,,,,,,,,...,,,,,,,,,,
84,AWE: Public Sector Excluding Financial Service...,Public Sector Excluding Financial Services Lev...,Seasonally Adjusted Regular Pay Excluding Arrears,,,,,,,,...,,,,,,,,,,
85,AWE: Public Sector Excluding Financial Service...,Public Sector Excluding Financial Services Yea...,Seasonally Adjusted Regular Pay Exc Arrears,,,,,,,,...,,,,,,,,,,


In [20]:
average_weekly_earnings_df.set_index('whole_metric', inplace=True)

In [21]:
metric = 'AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay'

In [22]:
for metric in average_weekly_earnings_metrics:
    filter_lms_dataframe = labour_market_statistics_df.copy()
    filter_lms_dataframe = filter_lms_dataframe[['Title', metric]]
    for i in range(len(filter_lms_dataframe)):
        date_period = filter_lms_dataframe.iloc[i]['Title']
        value = filter_lms_dataframe.iloc[i][metric]
        average_weekly_earnings_df.at[metric, f'{date_period}'] = value

In [23]:
average_weekly_earnings_df

Unnamed: 0_level_0,sector,category,2018,2019,2020,2021,2022,2023,2024,2018 Q1,...,2023 DEC,2024 JAN,2024 FEB,2024 MAR,2024 APR,2024 MAY,2024 JUN,2024 JUL,2024 AUG,2024 SEP
whole_metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay,Whole Economy Real Terms Year on Year Single M...,Seasonally Adjusted Regular Pay,,,,,,,,,...,1.9,1.7,2.0,2.5,2.7,2.4,2.3,1.9,1.5,2.2
AWE: Whole Economy Real Terms Year on Year three Month Growth (%): Seasonally Adjusted Regular Pay,Whole Economy Real Terms Year on Year three Mo...,Seasonally Adjusted Regular Pay,,,,,,,,,...,1.8,1.9,1.9,2.1,2.4,2.5,2.4,2.2,1.9,1.9
AWE: Whole Economy Real Terms Level (£): Seasonally Adjusted Regular Pay,Whole Economy Real Terms Level (£),Seasonally Adjusted Regular Pay,462,470,475,487,476,477,,459,...,481,481,482,484,486,487,488,488,488,490
AWE: Whole Economy Real Terms Year on Year Single Month Growth (%): Seasonally Adjusted Total Pay,Whole Economy Real Terms Year on Year Single M...,Seasonally Adjusted Total Pay,,,,,,,,,...,1.6,1.4,2.1,2.2,2.4,2.2,0.3,1.0,1.5,1.6
AWE: Whole Economy Real Terms Year on Year Three Month Growth (%): Seasonally Adjusted Total Pay,Whole Economy Real Terms Year on Year Three Mo...,Seasonally Adjusted Total Pay,,,,,,,,,...,1.4,1.5,1.7,1.9,2.2,2.3,1.6,1.2,0.9,1.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AWE: Public Sector Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay Excluding Arrears,Public Sector Year on Year Single Month Growth...,Seasonally Adjusted Regular Pay Excluding Arrears,,,,,,,,,...,6.0,6.1,6.2,6.8,6.0,6.4,5.6,5.2,4.8,4.2
AWE: Public Sector Year on Year Three Month Average Growth (%): Seasonally Adjusted Regular Pay Excluding Arrears,Public Sector Year on Year Three Month Average...,Seasonally Adjusted Regular Pay Excluding Arrears,,,,,,,,,...,5.9,6.0,6.1,6.4,6.3,6.4,6.0,5.7,5.2,4.7
AWE: Public Sector Excluding Financial Services Level (£): Seasonally Adjusted Regular Pay Excluding Arrears,Public Sector Excluding Financial Services Lev...,Seasonally Adjusted Regular Pay Excluding Arrears,520,537,558,577,592,629,,514,...,647,653,653,659,657,662,662,663,663,664
AWE: Public Sector Excluding Financial Services Year on Year Single Month Growth (%): Seasonally Adjusted Regular Pay Exc Arrears,Public Sector Excluding Financial Services Yea...,Seasonally Adjusted Regular Pay Exc Arrears,,,,,,,,,...,6.0,6.0,6.2,6.8,6.0,6.5,5.6,5.0,4.7,3.9


In [24]:
threshold = len(average_weekly_earnings_df) * 0.5

average_weekly_earnings_df.dropna(axis=1, thresh=threshold, inplace=True)

In [25]:
def remove_seasonally_adjusted(string):
    return string.split("Seasonally Adjusted ")[-1]

In [26]:
average_weekly_earnings_df['category'] = average_weekly_earnings_df['category'].apply(remove_seasonally_adjusted)

In [27]:
def unit(string):
    if string[-2] == '%':
        return "percentage %"
    if string[-2] == '£':
        return "pounds £"

In [28]:
average_weekly_earnings_df['unit'] = average_weekly_earnings_df['sector'].apply(unit)

In [29]:
last_column = average_weekly_earnings_df.columns[-1]
columns = list(average_weekly_earnings_df.columns)
columns.remove(last_column)
columns.insert(2, last_column)
average_weekly_earnings_df = average_weekly_earnings_df[columns]

In [30]:
df_reset = average_weekly_earnings_df.reset_index(drop=True)

In [31]:
df_reset

Unnamed: 0,sector,category,unit,2018 JAN,2018 FEB,2018 MAR,2018 APR,2018 MAY,2018 JUN,2018 JUL,...,2023 DEC,2024 JAN,2024 FEB,2024 MAR,2024 APR,2024 MAY,2024 JUN,2024 JUL,2024 AUG,2024 SEP
0,Whole Economy Real Terms Year on Year Single M...,Regular Pay,percentage %,0.0,0.4,0.7,0.4,0.3,0.4,0.8,...,1.9,1.7,2.0,2.5,2.7,2.4,2.3,1.9,1.5,2.2
1,Whole Economy Real Terms Year on Year three Mo...,Regular Pay,percentage %,-0.2,0.1,0.3,0.5,0.5,0.4,0.5,...,1.8,1.9,1.9,2.1,2.4,2.5,2.4,2.2,1.9,1.9
2,Whole Economy Real Terms Level (£),Regular Pay,pounds £,458,459,461,460,460,461,462,...,481,481,482,484,486,487,488,488,488,490
3,Whole Economy Real Terms Year on Year Single M...,Total Pay,percentage %,-0.2,0.0,0.4,0.6,0.4,-0.1,1.0,...,1.6,1.4,2.1,2.2,2.4,2.2,0.3,1.0,1.5,1.6
4,Whole Economy Real Terms Year on Year Three Mo...,Total Pay,percentage %,-0.2,0.0,0.1,0.3,0.4,0.3,0.4,...,1.4,1.5,1.7,1.9,2.2,2.3,1.6,1.2,0.9,1.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,Public Sector Year on Year Single Month Growth...,Regular Pay Excluding Arrears,percentage %,2.3,2.5,2.5,2.4,1.8,2.5,2.9,...,6.0,6.1,6.2,6.8,6.0,6.4,5.6,5.2,4.8,4.2
83,Public Sector Year on Year Three Month Average...,Regular Pay Excluding Arrears,percentage %,2.0,2.3,2.4,2.5,2.2,2.2,2.4,...,5.9,6.0,6.1,6.4,6.3,6.4,6.0,5.7,5.2,4.7
84,Public Sector Excluding Financial Services Lev...,Regular Pay Excluding Arrears,pounds £,513,515,515,515,517,519,522,...,647,653,653,659,657,662,662,663,663,664
85,Public Sector Excluding Financial Services Yea...,Regular Pay Exc Arrears,percentage %,2.3,2.5,2.5,2.4,1.9,2.6,2.9,...,6.0,6.0,6.2,6.8,6.0,6.5,5.6,5.0,4.7,3.9


In [32]:
df_reset.to_csv('2_processed_databases/labour_market_statistics_data/average_weekly_earnings.csv', index=False)

### 2. Unemployment Statistics

#### From my reading of the documentation I understand that the metrics relating to unemployment are under the labour force survey (LFS) and the particular ones of interest are those 

In [33]:
unemployment_metrics = []

for metric in list_of_metrics:
    if metric[:3] == 'LFS' and 'unemployment' in metric.lower() and 'ilo' in metric.lower() and 'lfs experimental' not in metric.lower():
        unemployment_metrics.append(metric)

### I have a look at all the words in the metric to see which columns I should make for my dataframe:

In [34]:
unemployment_metrics

['LFS: ILO Unemployment rate annual change: UK: All: Aged 16 and over (pp): SA',
 'LFS: ILO Unemployment Level Annual Change: UK: All: Aged 16 and over (thousands): SA',
 'LFS: ILO Unemployment quarterly change: UK: All: Aged 16 and over (thousands): SA',
 'LFS: ILO Unemployment rate quarterly change: UK: All: Aged 16 and over (pp): SA',
 'LFS: ILO Unemployment: UK: All: Aged 65+: Thousands: SA',
 'LFS: ILO Unemployment: Females: Aged 65+: Thousands: SA',
 'LFS: ILO unemployment rate: UK: All: Aged 65+: %: SA',
 'LFS: ILO unemployment rate: UK: Female: Aged 65+: %: SA',
 'LFS: ILO unemployment rate: UK: All: Aged 50-64: %: SA',
 'LFS: ILO unemployment rate: UK: Female: Aged 50-64: %: SA',
 'LFS: ILO Unemployment rate: UK: All: Aged 16-64: %: SA',
 'LFS: ILO Unemployment rate: UK: Female: Aged 16-64: %: SA',
 'LFS: ILO unemployment rate: UK: All: Aged 16 and over: %: NSA',
 'LFS: ILO unemployment rate: UK: Male: Aged 16 and over: %: NSA',
 'LFS: ILO unemployment rate: UK: Female: Aged 1

In [35]:
set_words = set()
for metric in unemployment_metrics:
    for word in metric.split(": "):
        set_words.add(word)
set_words

{'%',
 'Aged 16 and over',
 'Aged 16 and over (pp)',
 'Aged 16 and over (thousands)',
 'Aged 16-24',
 'Aged 16-64',
 'Aged 25-34',
 'Aged 35-49',
 'Aged 50-64',
 'Aged 65+',
 'All',
 'East',
 'East Midlands',
 'England',
 'Female',
 'Females',
 'Great Britain',
 'ILO Unemployment',
 'ILO Unemployment Level Annual Change',
 'ILO Unemployment quarterly change',
 'ILO Unemployment rate',
 'ILO Unemployment rate annual change',
 'ILO Unemployment rate quarterly change',
 'ILO unemployment rate',
 'LFS',
 'London',
 'Male',
 'NSA',
 'North East',
 'North West (GOR)',
 'Northern Ireland',
 'SA',
 'Scotland',
 'South East (GOR)',
 'South West',
 'Thousands',
 'UK',
 'Wales',
 'West Midlands',
 'Yorks & the Humber'}

### From the above I grouped the metrics into categories This will be important to allow us to understand the data and plot visualisations

In [36]:
sectors = [
    'LFS'
]

categories = [
    'ILO Unemployment',
    'ILO Unemployment Level Annual Change',
    'ILO Unemployment quarterly change',
    'ILO Unemployment rate',
    'ILO Unemployment rate annual change',
    'ILO Unemployment rate quarterly change',
    'ILO unemployment rate'
]

regions = [
    'East',
    'East Midlands',
    'England',
    'Great Britain',
    'Scotland',
    'South East (GOR)',
    'South West',
    'London',
    'UK',
    'Wales',
    'West Midlands',
    'Yorks & the Humber',
    'North East',
    'North West (GOR)',
    'Northern Ireland'
]

ages = [
    'Aged 16 and over',
    'Aged 16 and over (pp)',
    'Aged 16 and over (thousands)',
    'Aged 16-24',
    'Aged 16-64',
    'Aged 25-34',
    'Aged 35-49',
    'Aged 50-64',
    'Aged 65+',
]

genders = [
    'Male', 
    'All',
    'Female',
    'Females',
]

units = [
    'Thousands',
    '%'
]

seasonal_adjustments = [
    'SA',
    'NSA',   
]

metrics_list_dict = {}

metrics_list_dict['sector'] = sectors
metrics_list_dict['category'] = categories
metrics_list_dict['region'] = regions
metrics_list_dict['age'] = ages
metrics_list_dict['gender'] = genders
metrics_list_dict['unit'] = units
metrics_list_dict['seasonal_adjustment'] = seasonal_adjustments

In [37]:
unemployment_metrics

['LFS: ILO Unemployment rate annual change: UK: All: Aged 16 and over (pp): SA',
 'LFS: ILO Unemployment Level Annual Change: UK: All: Aged 16 and over (thousands): SA',
 'LFS: ILO Unemployment quarterly change: UK: All: Aged 16 and over (thousands): SA',
 'LFS: ILO Unemployment rate quarterly change: UK: All: Aged 16 and over (pp): SA',
 'LFS: ILO Unemployment: UK: All: Aged 65+: Thousands: SA',
 'LFS: ILO Unemployment: Females: Aged 65+: Thousands: SA',
 'LFS: ILO unemployment rate: UK: All: Aged 65+: %: SA',
 'LFS: ILO unemployment rate: UK: Female: Aged 65+: %: SA',
 'LFS: ILO unemployment rate: UK: All: Aged 50-64: %: SA',
 'LFS: ILO unemployment rate: UK: Female: Aged 50-64: %: SA',
 'LFS: ILO Unemployment rate: UK: All: Aged 16-64: %: SA',
 'LFS: ILO Unemployment rate: UK: Female: Aged 16-64: %: SA',
 'LFS: ILO unemployment rate: UK: All: Aged 16 and over: %: NSA',
 'LFS: ILO unemployment rate: UK: Male: Aged 16 and over: %: NSA',
 'LFS: ILO unemployment rate: UK: Female: Aged 1

In [38]:
metrics_list_dict

{'sector': ['LFS'],
 'category': ['ILO Unemployment',
  'ILO Unemployment Level Annual Change',
  'ILO Unemployment quarterly change',
  'ILO Unemployment rate',
  'ILO Unemployment rate annual change',
  'ILO Unemployment rate quarterly change',
  'ILO unemployment rate'],
 'region': ['East',
  'East Midlands',
  'England',
  'Great Britain',
  'Scotland',
  'South East (GOR)',
  'South West',
  'London',
  'UK',
  'Wales',
  'West Midlands',
  'Yorks & the Humber',
  'North East',
  'North West (GOR)',
  'Northern Ireland'],
 'age': ['Aged 16 and over',
  'Aged 16 and over (pp)',
  'Aged 16 and over (thousands)',
  'Aged 16-24',
  'Aged 16-64',
  'Aged 25-34',
  'Aged 35-49',
  'Aged 50-64',
  'Aged 65+'],
 'gender': ['Male', 'All', 'Female', 'Females'],
 'unit': ['Thousands', '%'],
 'seasonal_adjustment': ['SA', 'NSA']}

In [39]:
metrics_list = [
    'whole_metric', 
    'sector', 
    'category',
    'age',
    'gender',
    'region',
    'seasonal_adjustment',
    'unit'
]

In [40]:
metric_list = metrics_list + list_dates   

In [41]:
list_of_unemployment_metric_dictionaries = []

for unemployment_metric in unemployment_metrics:
    individual_metric_dict = {} 

    individual_metric_dict['whole_metric'] = unemployment_metric

    for metric, metric_list in metrics_list_dict.items():
        for m in metric_list:
            for word in unemployment_metric.split(': '):
                if m == word:
                    individual_metric_dict[metric] = m

    if '(pp)' in unemployment_metric:
        individual_metric_dict['unit'] = '%'
    elif '(thousands)' in unemployment_metric:
        individual_metric_dict['unit'] = 'Thousands'

    for key in individual_metric_dict.keys():
        if key not in metrics_list_dict.keys() and key != 'whole_metric':
            individual_metric_dict[key] = None

    list_of_unemployment_metric_dictionaries.append(individual_metric_dict)

In [42]:
ilo_unemployment_statistics_dataframe = pd.DataFrame(columns=metric_list)
    
for i, unemployment_metric in enumerate(unemployment_metrics):
    for j in range(len(list_of_unemployment_metric_dictionaries)):
        if unemployment_metric == list_of_unemployment_metric_dictionaries[j]['whole_metric']:
            row_data = list_of_unemployment_metric_dictionaries[j]

            for date in list_dates:
                row_data[date] = None

            row_df = pd.DataFrame([row_data])

    ilo_unemployment_statistics_dataframe = pd.concat([ilo_unemployment_statistics_dataframe, row_df], ignore_index=True)


In [43]:
ilo_unemployment_statistics_dataframe

Unnamed: 0,SA,NSA,whole_metric,sector,category,region,age,gender,seasonal_adjustment,unit,...,2023 DEC,2024 JAN,2024 FEB,2024 MAR,2024 APR,2024 MAY,2024 JUN,2024 JUL,2024 AUG,2024 SEP
0,,,LFS: ILO Unemployment rate annual change: UK: ...,LFS,ILO Unemployment rate annual change,UK,Aged 16 and over (pp),All,SA,%,...,,,,,,,,,,
1,,,LFS: ILO Unemployment Level Annual Change: UK:...,LFS,ILO Unemployment Level Annual Change,UK,Aged 16 and over (thousands),All,SA,Thousands,...,,,,,,,,,,
2,,,LFS: ILO Unemployment quarterly change: UK: Al...,LFS,ILO Unemployment quarterly change,UK,Aged 16 and over (thousands),All,SA,Thousands,...,,,,,,,,,,
3,,,LFS: ILO Unemployment rate quarterly change: U...,LFS,ILO Unemployment rate quarterly change,UK,Aged 16 and over (pp),All,SA,%,...,,,,,,,,,,
4,,,LFS: ILO Unemployment: UK: All: Aged 65+: Thou...,LFS,ILO Unemployment,UK,Aged 65+,All,SA,Thousands,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,,,LFS: ILO unemployment rate: Aged 16-24: UK: Ma...,LFS,ILO unemployment rate,UK,Aged 16-24,Male,NSA,%,...,,,,,,,,,,
64,,,LFS: ILO unemployment rate: Aged 16-24: UK: Fe...,LFS,ILO unemployment rate,UK,Aged 16-24,Female,NSA,%,...,,,,,,,,,,
65,,,LFS: ILO unemployment rate: Northern Ireland: ...,LFS,ILO unemployment rate,Northern Ireland,,All,SA,%,...,,,,,,,,,,
66,,,LFS: ILO unemployment rate: Northern Ireland: ...,LFS,ILO unemployment rate,Northern Ireland,,Male,SA,%,...,,,,,,,,,,


In [44]:
ilo_unemployment_statistics_dataframe.set_index('whole_metric', inplace=True)

In [45]:
for metric in unemployment_metrics:
    filter_lms_dataframe = labour_market_statistics_df.copy()
    filter_lms_dataframe = filter_lms_dataframe[['Title', metric]]
    for i in range(len(filter_lms_dataframe)):
        date_period = filter_lms_dataframe.iloc[i]['Title']
        value = filter_lms_dataframe.iloc[i][metric]
        ilo_unemployment_statistics_dataframe.at[metric, f'{date_period}'] = value

In [46]:
ilo_unemployment_statistics_dataframe = ilo_unemployment_statistics_dataframe.reset_index()

In [47]:
ilo_unemployment_statistics_dataframe.drop('whole_metric', axis=1, inplace=True)
ilo_unemployment_statistics_dataframe.drop('SA', axis=1, inplace=True)
ilo_unemployment_statistics_dataframe.drop('NSA', axis=1, inplace=True)

In [48]:
ilo_unemployment_statistics_dataframe.to_csv('2_processed_databases/labour_market_statistics_data/ilo_unemployment_statistics.csv', index=False)