In [None]:

import pandas as pd
import matplotlib.pyplot as plt

# Load and preprocess data
df = pd.read_csv("Emigration over years .csv")
df.columns = df.iloc[0]
df = df[1:]
df = df.rename(columns={
    'Year ending': 'Year',
    'Emigration': 'British',
    'Unnamed: 3': 'EU',
    'Unnamed: 4': 'Non-EU'
})
df = df.drop(columns=[df.columns[1]])
df = df.rename(columns={'Non-EU': 'Non_EU'})
df = df[df['Year'].notna()]
df = df[df['Year'].str.isnumeric()]
df['Year'] = df['Year'].astype(int)
df['British'] = pd.to_numeric(df['British'], errors='coerce')
df['EU'] = pd.to_numeric(df['EU'], errors='coerce')
df['Non_EU'] = pd.to_numeric(df['Non_EU'], errors='coerce')
df_avg = df.groupby('Year')[['British', 'EU', 'Non_EU']].mean().reset_index()

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df_avg['Year'], df_avg['British'], label='British', marker='o')
plt.plot(df_avg['Year'], df_avg['EU'], label='EU', marker='s')
plt.plot(df_avg['Year'], df_avg['Non_EU'], label='Non-EU', marker='^')

events = {
    2004: "EU Expansion",
    2008: "Global Financial Crisis",
    2016: "Brexit Referendum",
    2020: "COVID-19 onset",
    2021: "Post-Brexit Rules"
}
y_text = df_avg[['British', 'EU', 'Non_EU']].max().max() * 0.95
for year, label in events.items():
    plt.axvline(x=year, color='gray', linestyle='--', linewidth=1)
    plt.text(year, y_text, label, rotation=90, verticalalignment='top', fontsize=9)

plt.title("UK Emigration Trends by Group (1991–2024, Averaged by Year)", fontsize=16, weight='bold')
plt.xlabel("Year")
plt.ylabel("Emigrants (Thousands)")
plt.legend(title="Group")
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
