In [None]:

import pandas as pd
import matplotlib.pyplot as plt

# Load base data
df = pd.read_csv("emigration imigration total population.csv")
df.columns = df.columns.str.strip()
df = df[df['Year ending'].apply(lambda x: str(x).isdigit())]
df['Year'] = df['Year ending'].astype(int)
df['Emigration'] = pd.to_numeric(df['Emigration'], errors='coerce')
df['Immigration'] = pd.to_numeric(df['Immigration'], errors='coerce')
df_yearly = df.groupby('Year')[['Emigration', 'Immigration']].mean().reset_index()

# Add missing immigration values
extra = pd.DataFrame({
    'Year': [2023]*4 + [2024]*2,
    'Quarter': ['Q1', 'Q2', 'Q3', 'Q4', 'Q1', 'Q2'],
    'Immigration': [1312, 1320, 1304, 1316, 1293, 1207]
})
extra_avg = extra.groupby('Year')['Immigration'].mean().reset_index()
df_yearly = pd.merge(df_yearly, extra_avg, on='Year', how='outer', suffixes=('', '_new'))
df_yearly['Immigration'] = df_yearly['Immigration'].combine_first(df_yearly['Immigration_new'])
df_yearly = df_yearly.drop(columns=['Immigration_new']).sort_values('Year')

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df_yearly['Year'], df_yearly['Immigration'], label='Immigration', color='gold', marker='o')
plt.plot(df_yearly['Year'], df_yearly['Emigration'], label='Emigration', color='blue', marker='s')
events = {
    2004: "EU Expansion",
    2008: "Global Financial Crisis",
    2016: "Brexit Referendum",
    2020: "COVID-19 onset",
    2021: "Post-Brexit Rules"
}
y_max = df_yearly[['Emigration', 'Immigration']].max().max() * 0.95
for year, label in events.items():
    plt.axvline(x=year, color='gray', linestyle='--', linewidth=1)
    plt.text(year, y_max, label, rotation=90, verticalalignment='top', fontsize=9)

plt.title("UK Immigration vs Emigration (1991–2024)", fontsize=14, weight='bold')
plt.xlabel("Year")
plt.ylabel("Thousands of People")
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
