In [None]:

import pandas as pd
import matplotlib.pyplot as plt

# Load data
df_pop = pd.read_csv("total uk population .csv")
df_mig = pd.read_csv("emigration imigration total population.csv")

# Clean and prepare population data
df_pop.columns = df_pop.columns.str.strip()
df_pop.columns = ['Year', 'Population']
df_pop['Year'] = df_pop['Year'].str.extract(r'(\d{4})').astype(int)
df_pop['Population'] = pd.to_numeric(df_pop['Population'], errors='coerce')

# Clean and prepare emigration data
df_mig.columns = df_mig.columns.str.strip()
df_mig = df_mig[df_mig['Year ending'].apply(lambda x: str(x).isdigit())]
df_mig['Year'] = df_mig['Year ending'].astype(int)
df_mig['Emigration'] = pd.to_numeric(df_mig['Emigration'], errors='coerce')
df_yearly = df_mig.groupby('Year')['Emigration'].mean().reset_index()

# Merge and calculate percentage
df_merged = pd.merge(df_yearly, df_pop, on='Year', how='inner')
df_merged['Emigration_pct'] = (df_merged['Emigration'] * 1000 / df_merged['Population']) * 100

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df_merged['Year'], df_merged['Emigration_pct'], marker='o', color='crimson', linewidth=2)
events = {
    2004: "EU Expansion",
    2008: "Global Financial Crisis",
    2016: "Brexit Referendum",
    2020: "COVID-19 onset",
    2021: "Post-Brexit Rules"
}
y_text = df_merged['Emigration_pct'].max() * 0.95
for year, label in events.items():
    plt.axvline(x=year, color='gray', linestyle='--', linewidth=1)
    plt.text(year, y_text, label, rotation=90, verticalalignment='top', fontsize=9)

plt.title("Emigration as Percentage of Total UK Population (1991–2024)", fontsize=14, weight='bold')
plt.xlabel("Year")
plt.ylabel("Emigration (% of Population)")
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
