In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl
import seaborn as sns


df = pd.read_excel("/home/azizbek/Humblebee/Problem solving/Youth_Unemployment_Full_Expanded_SK_2018_2024 (1).xlsx")


df.fillna(method='ffill', inplace=True)

# Add derived column: Employment Gap (LF Participation - Employment Rate)
df["Employment Gap [%]"] = df["Labor Force Participation [%]"] - df["Employment Rate (18–35) [%]"]

# Add derived column: Underutilization Rate (Unemployment + Underemployment)
df["Underutilization Rate [%]"] = df["Unemployment Rate (18–35) [%]"] + df["Underemployment Rate [%]"]

# ----------------------------------
# Summary Statistics
# ----------------------------------

print("Summary of Youth Unemployment Dataset:")
print(df.describe())

df.to_csv("youth_unemployment_transformed.csv", index=False)

### Youth labor Market Trends in South Korea (2018-2024) (Viz 1)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(df["Year"], df["Unemployment Rate (18–35) [%]"], marker='o', label="Unemployment Rate")
plt.plot(df["Year"], df["Employment Rate (18–35) [%]"], marker='o', label="Employment Rate")
plt.plot(df["Year"], df["Labor Force Participation [%]"], marker='o', label="Labor Force Participation")
plt.title("Youth Labor Market Trends in South Korea (2018–2024)")
plt.xlabel("Year")
plt.ylabel("Percentage")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("fig_labor_trends.png")
plt.show()


### Youth NEET Rate (Not in Employment, Education or training)

In [None]:
# NEET Rate
plt.figure(figsize=(8, 5))
plt.bar(df["Year"], df["NEET Rate (15–29) [%]"], color='salmon')
plt.title("Youth NEET Rate (Not in Employment, Education or Training)")
plt.xlabel("Year")
plt.ylabel("NEET Rate (%)")
plt.grid(axis='y')
plt.tight_layout()
plt.savefig("fig_neet_rate.png")
plt.show()

### Youth NEET Rate Over Time - Area Chart

In [None]:
x = df["Year"].values.astype(float)
y = df["NEET Rate (15–29) [%]"].values.astype(float)

plt.figure()
plt.fill_between(x, y, color="orange", alpha=0.5)
plt.plot(x, y, color="darkorange", marker="o")
plt.title("Youth NEET Rate Over Time - Area Chart")
plt.xlabel("Year")
plt.ylabel("NEET Rate (%)")
plt.tight_layout()
# plt.savefig("chart3_neet_area.png")

### Youth Labor Underutilization vs Unemployment

In [None]:
# Underutilization vs Unemployment
plt.figure(figsize=(10, 5))
plt.plot(df["Year"], df["Underutilization Rate [%]"], marker='o', label="Underutilization")
plt.plot(df["Year"], df["Unemployment Rate (18–35) [%]"], marker='x', linestyle='--', label="Unemployment Only")
plt.title("Youth Labor Underutilization vs Unemployment")
plt.xlabel("Year")
plt.ylabel("Percentage")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("fig_underutilization.png")
plt.show()

### Youth Unemployment Rate (18-35) - Bar Chart

In [None]:
plt.figure()
plt.bar(df["Year"], df["Unemployment Rate (18–35) [%]"], color=plt.cm.Reds([0.3 + i*0.1 for i in range(len(df))]))
plt.title("Youth Unemployment Rate (18–35) - Bar Chart")
plt.xlabel("Year")
plt.ylabel("Unemployment Rate (%)")
plt.tight_layout()
# plt.savefig("chart1_youth_unemployment_rate.png")

### Youth Unemployment Rate (ages 15-24), 2018-2024

In [None]:
years = [2018, 2019, 2020, 2021, 2022, 2023, 2024]
df_comp = pd.DataFrame({
    "Year": years,
    "South Korea": [8.0, 6.5, 9.0, 7.5, 6.0, 5.5, 5.8],
    "China": [10.5, 10.2, 12.0, 13.1, 14.2, 15.0, 14.5],
    "Japan": [4.5, 4.3, 5.0, 5.2, 4.8, 4.6, 4.7],
    "United States": [9.0, 8.8, 14.5, 12.0, 10.0, 9.5, 9.0]
})
# Numbers are taken from the summaries of other raw datasets and inserted here.
plt.figure()
for country in ["South Korea", "China", "Japan", "United States"]:
    plt.plot(df_comp["Year"], df_comp[country], marker="o", label=country)
plt.title("Youth Unemployment Rate (ages ~15–24), 2018–2024")
plt.xlabel("Year")
plt.ylabel("Youth Unemployment Rate (%)")
plt.legend()
plt.grid(True)
plt.tight_layout()
# plt.savefig("chart2_international_comparison.png")

### Share of Youth Not in Eployment, Education or Training 

In [None]:
plt.figure()
countries = ["S. Korea", "United States", "Japan"]
neet_vals = [21, 13, 4]
colors = ["orange", "magenta", "green"]

plt.bar(countries, neet_vals, color=colors)
for i, val in enumerate(neet_vals):
    plt.text(i, val + 0.5, f"{val}%", ha='center')
plt.title("Share of Youth Not in Employment, Education or Training")
plt.ylabel("NEET Rate (%)")
plt.tight_layout()
# plt.savefig("chart4_neet_comparison.png")

### Share of 15-29 Employees in Non-Regular Jobs (Korea,2021)

In [None]:
plt.figure()
plt.bar(["Male", "Female"], [39, 45], color=["blue", "red"])
plt.title("Share of 15–29 Employees in Non-Regular Jobs (Korea, 2021)")
plt.ylabel("Employment (% of young employees)")
plt.text(0, 40, "39%", ha='center', color="white", weight="bold")
plt.text(1, 46, "45%", ha='center', color="white", weight="bold")
plt.tight_layout()
# plt.savefig("chart5_non_regular_gender.png")

### Population aged 18-35 in South Korea

In [None]:
plt.figure()
plt.plot(df["Year"], df["Population (18–35) [thousands]"], marker="o", color="gold")
plt.title("Population Aged 18–35 in South Korea")
plt.xlabel("Year")
plt.ylabel("Population (thousands)")
plt.grid(True)
plt.tight_layout()
# plt.savefig("chart6_population_trend.png")

### Youth Employment by sector

In [None]:
plt.figure()
plt.bar(df["Year"], df["Youth in Services Sector [%]"], label="Services", color="skyblue")
plt.bar(
    df["Year"],
    df["Youth in Manufacturing Sector [%]"],
    bottom=df["Youth in Services Sector [%]"],
    label="Manufacturing",
    color="gray"
)
plt.title("Youth Employment by Sector - Stacked Bar Chart")
plt.xlabel("Year")
plt.ylabel("Share of Employment (%)")
plt.legend()
plt.tight_layout()
# plt.savefig("chart7_sector_stacked.png")

### Non-Regular Employment Rate 

In [None]:
plt.figure()
sns.barplot(
    y="Year",
    x="Non-Regular Employment Rate (Youth) [%]",
    data=df,
    palette="Purples",
    orient="h"
)
plt.title("Non-Regular Employment Rate - Horizontal Bar Chart")
plt.xlabel("Rate (%)")
plt.ylabel("Year")
plt.tight_layout()
# plt.savefig("chart8_non_regular_horizontal.png")