In [41]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from io import StringIO
from datetime import datetime, timedelta
# Add this at the top of your notebook
import warnings
warnings.filterwarnings('ignore')

## Scrape one day

In [42]:
day=7;month=11;year=2025
url = f'https://uec.hse.ie/uec/TGAR.php?EDDATE={day}%2F{month}%2F{year}'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

table = soup.find('table')
df = pd.read_html(str(table), flavor='html5lib', skiprows=[0])[0]
#df.columns = df.columns.droplevel(0)

In [43]:
regions=['HSE Dublin & North East','HSE Dublin & Midlands','HSE Dublin & South East','HSE South West','HSE Mid West','HSE West & North West']
df_cp = df.copy()# Create a copy to manipulate

# Replace region names with NaN
df_cp = df_cp.replace(regions, np.nan)
df_cp = df_cp.dropna(axis=1, how='all')  # Remove all-NaN columns
df_cp = df_cp.dropna(axis=0, how='all')  # Remove all-NaN rows

# cut first 8 columns, there are just repeats
col_to_cut=8
df_cp = df_cp.iloc[:, col_to_cut:]
df_cp = df_cp.drop('Unnamed: 9_level_0', axis=1)

# Rename hospital column
df_cp = df_cp.rename(columns={'Unnamed: 8_level_0': 'Hospital'})
# move 2nd multi index to first row
# get second header
second_level = df_cp.columns.get_level_values(1).tolist()
# drop second header
df_cp.columns = df_cp.columns.droplevel(1)
# Insert as first row
df_cp.loc[0] = second_level
# set date
df_cp.insert(0, 'Date', f'{day:02d}/{month:02d}/{year}')#inplace by default
# print it out
# df_cp

## Scrape 3 years into 6 .csv files

In [48]:
# set date
#day=7;month=11;year=2025
# scrape the html
def scrape_date(day, month, year):
  print(f'Scraping date: {day:02d}/{month:02d}/{year}')
  url = f'https://uec.hse.ie/uec/TGAR.php?EDDATE={day}%2F{month}%2F{year}'
  response = requests.get(url)
  soup = BeautifulSoup(response.content, 'html.parser')

  table = soup.find('table')
  df = pd.read_html(StringIO(str(table)), flavor='html5lib', skiprows=[0])[0]
  df_cp = df.copy()# Create a copy to manipulate

  #df.columns = df.columns.droplevel(0)
  regions=['HSE Dublin & North East','HSE Dublin & Midlands','HSE Dublin & South East','HSE South West','HSE Mid West','HSE West & North West']

  # Replace region names with NaN
  df_cp = df_cp.replace(regions, np.nan).infer_objects(copy=False)
  df_cp = df_cp.dropna(axis=1, how='all')  # Remove all-NaN columns
  df_cp = df_cp.dropna(axis=0, how='all')  # Remove all-NaN rows

  # cut first 8 columns, there are just repeats
  col_to_cut=8
  df_cp = df_cp.iloc[:, col_to_cut:]
  df_cp = df_cp.drop('Unnamed: 9_level_0', axis=1)

  # Rename hospital column
  df_cp = df_cp.rename(columns={'Unnamed: 8_level_0': 'Hospital'})
  # move 2nd multi index to first row
  # get second header
  second_level = df_cp.columns.get_level_values(1).tolist()
  # drop second header
  df_cp.columns = df_cp.columns.droplevel(1)
  # Insert as first row
  df_cp.loc[0] = second_level
  # set date
  df_cp.insert(0, 'Date', f'{day:02d}/{month:02d}/{year}')#inplace by default
  # print it out
  # df_cp
  #df_cp['Region'] = df_cp['Hospital'].map(hospital_to_region_dict)
  return df_cp 

In [45]:
# Get all dates for past 3 years
end_date    = datetime(2025, 11, 15)
start_date  = datetime(2023, 1, 1)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')

### 6 month chunks

### start with 2023

In [46]:
start_date  = datetime(2023, 1, 1);end_date    = datetime(2023, 6, 1)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')
pd.concat([scrape_date(date.day, date.month, date.year) for date in dates], ignore_index=True).to_csv('uec_data_2023p1.csv', index=False)

In [49]:
start_date  = datetime(2023, 6, 2);end_date    = datetime(2023, 12, 31)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')
pd.concat([scrape_date(date.day, date.month, date.year) for date in dates], ignore_index=True).to_csv('uec_data_2023p2.csv', index=False)

Scraping date: 02/06/2023
Scraping date: 03/06/2023
Scraping date: 04/06/2023
Scraping date: 05/06/2023
Scraping date: 06/06/2023
Scraping date: 07/06/2023
Scraping date: 08/06/2023
Scraping date: 09/06/2023
Scraping date: 10/06/2023
Scraping date: 11/06/2023
Scraping date: 12/06/2023
Scraping date: 13/06/2023
Scraping date: 14/06/2023
Scraping date: 15/06/2023
Scraping date: 16/06/2023
Scraping date: 17/06/2023
Scraping date: 18/06/2023
Scraping date: 19/06/2023
Scraping date: 20/06/2023
Scraping date: 21/06/2023
Scraping date: 22/06/2023
Scraping date: 23/06/2023
Scraping date: 24/06/2023
Scraping date: 25/06/2023
Scraping date: 26/06/2023
Scraping date: 27/06/2023
Scraping date: 28/06/2023
Scraping date: 29/06/2023
Scraping date: 30/06/2023
Scraping date: 01/07/2023
Scraping date: 02/07/2023
Scraping date: 03/07/2023
Scraping date: 04/07/2023
Scraping date: 05/07/2023
Scraping date: 06/07/2023
Scraping date: 07/07/2023
Scraping date: 08/07/2023
Scraping date: 09/07/2023
Scraping dat

### 2024

In [51]:
start_date  = datetime(2024, 1, 1);end_date    = datetime(2024, 6, 1)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')
pd.concat([scrape_date(date.day, date.month, date.year) for date in dates], ignore_index=True).to_csv('uec_data_2024p1.csv', index=False)

Scraping date: 01/01/2024
Scraping date: 02/01/2024
Scraping date: 03/01/2024
Scraping date: 04/01/2024
Scraping date: 05/01/2024
Scraping date: 06/01/2024
Scraping date: 07/01/2024
Scraping date: 08/01/2024
Scraping date: 09/01/2024
Scraping date: 10/01/2024
Scraping date: 11/01/2024
Scraping date: 12/01/2024
Scraping date: 13/01/2024
Scraping date: 14/01/2024
Scraping date: 15/01/2024
Scraping date: 16/01/2024
Scraping date: 17/01/2024
Scraping date: 18/01/2024
Scraping date: 19/01/2024
Scraping date: 20/01/2024
Scraping date: 21/01/2024
Scraping date: 22/01/2024
Scraping date: 23/01/2024
Scraping date: 24/01/2024
Scraping date: 25/01/2024
Scraping date: 26/01/2024
Scraping date: 27/01/2024
Scraping date: 28/01/2024
Scraping date: 29/01/2024
Scraping date: 30/01/2024
Scraping date: 31/01/2024
Scraping date: 01/02/2024
Scraping date: 02/02/2024
Scraping date: 03/02/2024
Scraping date: 04/02/2024
Scraping date: 05/02/2024
Scraping date: 06/02/2024
Scraping date: 07/02/2024
Scraping dat

In [52]:
start_date  = datetime(2024, 6, 2);end_date    = datetime(2024, 12, 31)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')
pd.concat([scrape_date(date.day, date.month, date.year) for date in dates], ignore_index=True).to_csv('uec_data_2024p2.csv', index=False)

Scraping date: 02/06/2024
Scraping date: 03/06/2024
Scraping date: 04/06/2024
Scraping date: 05/06/2024
Scraping date: 06/06/2024
Scraping date: 07/06/2024
Scraping date: 08/06/2024
Scraping date: 09/06/2024
Scraping date: 10/06/2024
Scraping date: 11/06/2024
Scraping date: 12/06/2024
Scraping date: 13/06/2024
Scraping date: 14/06/2024
Scraping date: 15/06/2024
Scraping date: 16/06/2024
Scraping date: 17/06/2024
Scraping date: 18/06/2024
Scraping date: 19/06/2024
Scraping date: 20/06/2024
Scraping date: 21/06/2024
Scraping date: 22/06/2024
Scraping date: 23/06/2024
Scraping date: 24/06/2024
Scraping date: 25/06/2024
Scraping date: 26/06/2024
Scraping date: 27/06/2024
Scraping date: 28/06/2024
Scraping date: 29/06/2024
Scraping date: 30/06/2024
Scraping date: 01/07/2024
Scraping date: 02/07/2024
Scraping date: 03/07/2024
Scraping date: 04/07/2024
Scraping date: 05/07/2024
Scraping date: 06/07/2024
Scraping date: 07/07/2024
Scraping date: 08/07/2024
Scraping date: 09/07/2024
Scraping dat

### 2025

In [53]:
start_date  = datetime(2025, 1, 1);end_date    = datetime(2025, 6, 1)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')
pd.concat([scrape_date(date.day, date.month, date.year) for date in dates], ignore_index=True).to_csv('uec_data_2025p1.csv', index=False)

Scraping date: 01/01/2025
Scraping date: 02/01/2025
Scraping date: 03/01/2025
Scraping date: 04/01/2025
Scraping date: 05/01/2025
Scraping date: 06/01/2025
Scraping date: 07/01/2025
Scraping date: 08/01/2025
Scraping date: 09/01/2025
Scraping date: 10/01/2025
Scraping date: 11/01/2025
Scraping date: 12/01/2025
Scraping date: 13/01/2025
Scraping date: 14/01/2025
Scraping date: 15/01/2025
Scraping date: 16/01/2025
Scraping date: 17/01/2025
Scraping date: 18/01/2025
Scraping date: 19/01/2025
Scraping date: 20/01/2025
Scraping date: 21/01/2025
Scraping date: 22/01/2025
Scraping date: 23/01/2025
Scraping date: 24/01/2025
Scraping date: 25/01/2025
Scraping date: 26/01/2025
Scraping date: 27/01/2025
Scraping date: 28/01/2025
Scraping date: 29/01/2025
Scraping date: 30/01/2025
Scraping date: 31/01/2025
Scraping date: 01/02/2025
Scraping date: 02/02/2025
Scraping date: 03/02/2025
Scraping date: 04/02/2025
Scraping date: 05/02/2025
Scraping date: 06/02/2025
Scraping date: 07/02/2025
Scraping dat

In [54]:
start_date  = datetime(2025, 6, 2);end_date= datetime(2025,11, 15)
dates       = pd.date_range(start=start_date, end=end_date, freq='D')
pd.concat([scrape_date(date.day, date.month, date.year) for date in dates], ignore_index=True).to_csv('uec_data_2025p2.csv', index=False)

Scraping date: 02/06/2025
Scraping date: 03/06/2025
Scraping date: 04/06/2025
Scraping date: 05/06/2025
Scraping date: 06/06/2025
Scraping date: 07/06/2025
Scraping date: 08/06/2025
Scraping date: 09/06/2025
Scraping date: 10/06/2025
Scraping date: 11/06/2025
Scraping date: 12/06/2025
Scraping date: 13/06/2025
Scraping date: 14/06/2025
Scraping date: 15/06/2025
Scraping date: 16/06/2025
Scraping date: 17/06/2025
Scraping date: 18/06/2025
Scraping date: 19/06/2025
Scraping date: 20/06/2025
Scraping date: 21/06/2025
Scraping date: 22/06/2025
Scraping date: 23/06/2025
Scraping date: 24/06/2025
Scraping date: 25/06/2025
Scraping date: 26/06/2025
Scraping date: 27/06/2025
Scraping date: 28/06/2025
Scraping date: 29/06/2025
Scraping date: 30/06/2025
Scraping date: 01/07/2025
Scraping date: 02/07/2025
Scraping date: 03/07/2025
Scraping date: 04/07/2025
Scraping date: 05/07/2025
Scraping date: 06/07/2025
Scraping date: 07/07/2025
Scraping date: 08/07/2025
Scraping date: 09/07/2025
Scraping dat

## Concat 3 years/6 files to one

In [None]:
pd.concat([pd.read_csv(f'uec_data_2023p1.csv'),
           pd.read_csv(f'uec_data_2023p2.csv'),
           pd.read_csv(f'uec_data_2024p1.csv'),
           pd.read_csv(f'uec_data_2024p2.csv'),
           pd.read_csv(f'uec_data_2025p1.csv'),
           pd.read_csv(f'uec_data_2025p2.csv'),], 
           ignore_index=True).to_csv('../data/uec_data_2023_2025_full.csv', index=False)