### This is even more shortened version of the dataset

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl

raw_data = {
    "year": list(range(2018, 2025)),
    "unemp_18_35": [8.0, 6.5, 9.0, 7.5, 6.0, 5.5, 5.8],
    "emp_18_35": [54.0, 56.1, 51.9, 54.0, 55.5, 56.7, 57.5],
    "labor_particip_18_35": [58.7, 60.0, 57.0, 58.4, 59.0, 60.0, 61.0],
    "pop_18_35_thousands": [12300, 12200, 12000, 11800, 11600, 11400, 11200],
    "edu_tertiary_25_34": [65.0, 66.5, 69.0, 69.5, 69.7, 69.7, 69.7],
    "neet_15_29": [19.0, 18.5, 21.0, 20.5, 19.5, 18.0, 17.5],
    "avg_income_krw": [1800000, 1850000, 1750000, 1800000, 1900000, 1950000, 2000000],
    "non_regular_pct": [41.0, 42.5, 44.0, 43.0, 42.0, 41.5, 40.0],
    "underemployment_pct": [6.0, 5.8, 7.5, 6.9, 6.2, 5.7, 5.5]
}

df_raw = pd.DataFrame(raw_data)

# -----------------------------------
# Clean and Rename Columns
# -----------------------------------
df_cleaned = df_raw.rename(columns={
    "year": "Year",
    "unemp_18_35": "Unemployment Rate (18–35) [%]",
    "emp_18_35": "Employment Rate (18–35) [%]",
    "labor_particip_18_35": "Labor Force Participation [%]",
    "pop_18_35_thousands": "Population (18–35) [thousands]",
    "edu_tertiary_25_34": "Tertiary Edu. Attainment (25–34) [%]",
    "neet_15_29": "NEET Rate (15–29) [%]",
    "avg_income_krw": "Youth Avg. Monthly Income (KRW)",
    "non_regular_pct": "Non-Regular Employment Rate (Youth) [%]",
    "underemployment_pct": "Underemployment Rate [%]"
})

# -----------------------------------
# Add Derived Columns
# -----------------------------------
df_cleaned["Employment Gap [%]"] = (
    df_cleaned["Labor Force Participation [%]"] - df_cleaned["Employment Rate (18–35) [%]"]
)

df_cleaned["Underutilization Rate [%]"] = (
    df_cleaned["Unemployment Rate (18–35) [%]"] + df_cleaned["Underemployment Rate [%]"]
)

# Optional: reorder columns for presentation
ordered_columns = [
    "Year",
    "Population (18–35) [thousands]",
    "Unemployment Rate (18–35) [%]",
    "Employment Rate (18–35) [%]",
    "Labor Force Participation [%]",
    "Employment Gap [%]",
    "Underemployment Rate [%]",
    "Underutilization Rate [%]",
    "NEET Rate (15–29) [%]",
    "Youth Avg. Monthly Income (KRW)",
    "Non-Regular Employment Rate (Youth) [%]",
    "Tertiary Edu. Attainment (25–34) [%]"
]

df_final = df_cleaned[ordered_columns]

df_final

Unnamed: 0,Year,Population (18–35) [thousands],Unemployment Rate (18–35) [%],Employment Rate (18–35) [%],Labor Force Participation [%],Employment Gap [%],Underemployment Rate [%],Underutilization Rate [%],NEET Rate (15–29) [%],Youth Avg. Monthly Income (KRW),Non-Regular Employment Rate (Youth) [%],Tertiary Edu. Attainment (25–34) [%]
0,2018,12300,8.0,54.0,58.7,4.7,6.0,14.0,19.0,1800000,41.0,65.0
1,2019,12200,6.5,56.1,60.0,3.9,5.8,12.3,18.5,1850000,42.5,66.5
2,2020,12000,9.0,51.9,57.0,5.1,7.5,16.5,21.0,1750000,44.0,69.0
3,2021,11800,7.5,54.0,58.4,4.4,6.9,14.4,20.5,1800000,43.0,69.5
4,2022,11600,6.0,55.5,59.0,3.5,6.2,12.2,19.5,1900000,42.0,69.7
5,2023,11400,5.5,56.7,60.0,3.3,5.7,11.2,18.0,1950000,41.5,69.7
6,2024,11200,5.8,57.5,61.0,3.5,5.5,11.3,17.5,2000000,40.0,69.7
