
# UK Unemployment Analysis

This project explores UK unemployment data, cleaning and preparing it for analysis, and visualizing trends over time (monthly, quarterly, annual).  
The goal is to uncover patterns such as best/worst unemployment rates, long-term trends, and average unemployment by decade.


In [None]:

# Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries loaded")


In [None]:

# Load data
filePath = "data/series-160925.csv"
raw = pd.read_csv(filePath, dtype=str)
raw.head(12)


In [None]:

# Clean the data
data = raw.iloc[7:]
column_value = data.columns[1]
data = data.rename(columns={"Title": "Period", column_value: "Unemployment Rate (%)"})

# Clean unemployment values
data["Unemployment Rate (%)"] = (
    data["Unemployment Rate (%)"]
    .astype(str).str.strip()
    .str.replace(r"[^\d\.\-]", "", regex=True)
)
data["Unemployment Rate (%)"] = pd.to_numeric(data["Unemployment Rate (%)"], errors="coerce")

data.head()


In [None]:

# Masks
annual_mask    = data["Period"].str.fullmatch(r"\d{4}")
quarterly_mask = data["Period"].str.fullmatch(r"\d{4}\sQ[1-4]")
monthly_mask   = data["Period"].str.fullmatch(r"\d{4}\s(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)")

# Subsets
annual    = data.loc[annual_mask,    ["Period", "Unemployment Rate (%)"]].copy()
quarterly = data.loc[quarterly_mask, ["Period", "Unemployment Rate (%)"]].copy()
monthly   = data.loc[monthly_mask,   ["Period", "Unemployment Rate (%)"]].copy()


In [None]:

# Monthly conversion
def parse_month(period: str):
    year, mon = period.split()
    mon = mon.title()
    return pd.to_datetime(f"{year}-{mon}-01", format="%Y-%b-%d", errors="coerce")

monthly["Date"] = monthly["Period"].apply(parse_month)
monthly = monthly.sort_values("Date").reset_index(drop=True)

# Quarterly conversion
def parse_quarter(period: str):
    year, q = period.split()
    qmap = {"Q1": "03-31", "Q2": "06-30", "Q3": "09-30", "Q4": "12-31"}
    return pd.to_datetime(f"{year}-{qmap[q]}", format="%Y-%m-%d", errors="coerce")

quarterly["Date"] = quarterly["Period"].apply(parse_quarter)
quarterly = quarterly.sort_values("Date").reset_index(drop=True)

# Annual conversion
annual["Date"] = pd.to_datetime(annual["Period"] + "-12-31", format="%Y-%m-%d", errors="coerce")
annual = annual.sort_values("Date").reset_index(drop=True)


In [None]:

print("Best unemployment:", monthly.loc[monthly["Unemployment Rate (%)"].idxmin()])
print("Worst unemployment:", monthly.loc[monthly["Unemployment Rate (%)"].idxmax()])


In [None]:

# Monthly trend
plt.figure(figsize=(12,6))
plt.plot(monthly["Date"], monthly["Unemployment Rate (%)"], label="Monthly", color="blue")
plt.title("UK Unemployment Rate (Monthly)")
plt.xlabel("Date")
plt.ylabel("Unemployment Rate (%)")
plt.legend()
plt.show()


In [None]:

# Quarterly trend
plt.figure(figsize=(12,6))
plt.plot(quarterly["Date"], quarterly["Unemployment Rate (%)"], label="Quarterly", color="green")
plt.title("UK Unemployment Rate (Quarterly)")
plt.xlabel("Date")
plt.ylabel("Unemployment Rate (%)")
plt.legend()
plt.show()


In [None]:

# Annual trend
plt.figure(figsize=(12,6))
plt.plot(annual["Date"], annual["Unemployment Rate (%)"], label="Annual", color="red")
plt.title("UK Unemployment Rate (Annual)")
plt.xlabel("Date")
plt.ylabel("Unemployment Rate (%)")
plt.legend()
plt.show()


In [None]:

# Decade averages
monthly["Year"] = monthly["Date"].dt.year
by_decade = monthly.groupby((monthly["Year"] // 10) * 10)["Unemployment Rate (%)"].mean()

by_decade.plot(kind="bar", figsize=(10,5), title="Average Unemployment by Decade")
plt.show()



### Conclusion

- Unemployment was highest in the **1980s (~10%)**.  
- Lowest periods were in the **1970s and 2020s (~4-5%)**.  
- Long-term trend shows spikes during recessions and gradual declines afterward.  
- Recent decades have seen lower and more stable unemployment rates.
