# Scrape links

In [361]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.ecb.europa.eu/press/pressconf/{}/html/index_include.en.html"

urls = []

df =[]

for year in range(1990, 2025):
    soup = BeautifulSoup(requests.get(url.format(year)).content, "html.parser")
    for a in soup.select(".title a")[::-1]:
      date = a.find_previous(class_="date").text
      name = a.text
      link = "https://www.ecb.europa.eu" + a["href"]
      urls.append({'Date': date, 'Name': name, 'Link': link})

df = pd.DataFrame(urls)

#Keep only monetary policy statements
df = df[df['Name'].str.contains('Monetary policy statement|Introductory statement')==True]
df = df[df['Name'].str.contains('Introductory statement on the winning design chosen in the international urban planning and architectural design competition')==False]


In [362]:
df

Unnamed: 0,Date,Name,Link
0,9 June 1998,Willem F. Duisenberg: ECB Press conference: In...,https://www.ecb.europa.eu/press/pressconf/1998...
1,8 July 1998,Willem F. Duisenberg: ECB Press conference: In...,https://www.ecb.europa.eu/press/pressconf/1998...
2,11 September 1998,Willem F. Duisenberg: ECB Press conference: In...,https://www.ecb.europa.eu/press/pressconf/1998...
3,13 October 1998,Willem F. Duisenberg: Introductory statement w...,https://www.ecb.europa.eu/press/pressconf/1998...
4,3 November 1998,Willem F. Duisenberg: Introductory statement w...,https://www.ecb.europa.eu/press/pressconf/1998...
...,...,...,...
271,15 June 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...
272,27 July 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...
273,14 September 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...
274,26 October 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...


# Scrape conference text

In [363]:
alocation = []
apresident = []
avice_president = []
atext = []

for pressconf in df['Link']:
  url = pressconf
  data = requests.get(url)

  page = requests.get(url).content
  soup = BeautifulSoup(page, parser='lxml-parser', features='html')
  textbox = soup.find('main')

  name = soup.find('div', class_='title').get_text().strip()

  try:
      location_text = textbox.find(class_='ecb-publicationDate').get_text()
  except:
      location_text = "null"
      pass

  title_text = textbox.find('h2', class_='ecb-pressContentSubtitle').get_text()

  header = soup.find('head')
  header_text = header.find('title').get_text()


  title_text = title_text.replace('-', ' ')
  title_text = title_text.replace('European Central Bank', 'ECB')
  title_text = title_text.replace(' and ', ', and ')

  ps = textbox.find_all('p')

  items = title_text.split(',')

  president = None
  vice_president = None
  location = None
  date = None

  # Check for president
  Pres = [x for x in items if 'President' in x and 'ECB' in x and 'Vice' not in x]
  if len(Pres) == 1:
      p_idx = items.index(Pres[0]) - 1
      president = items[p_idx].strip()

  # Check for Vice-President
  VP = [x for x in items if 'Vice President' in x and 'ECB' in x]
  if len(VP) == 1:
      vp_idx = items.index(VP[0]) - 1
      vice_president = items[vp_idx]

  if location_text != 'null':
    #location
    location = location_text.split(', ')[0]
    location = location.strip()
    #date
    date = location_text.split(', ')[1]
    date = date.strip()
    #remove from start of text

  if location_text == 'null':
      if len(items) == 6:
          location = items[4]
          date = items[5]
      # Case 2: There was no comma separating the VP and the location.
      elif len(VP) > 0:
          candidate = VP[0].replace('Vice President of the ECB', '').strip()
          if candidate != '':
              location = candidate
              date = items[3]

  text='\n'.join([x.get_text() for x in ps])
  text = text.replace("\n", " ")
  text = text.replace("\t", " ")
  text = text.split('***', 1)[0]
  text = text.split('* * *', 1)[0]
  text = text.split('Transcript of the questions asked', 1)[0]
  text = text.split('Question:', 1)[0]
  text = text.split('QUESTION:', 1)[0]
  text = text.split('My first question would be', 1)[0]
  text = text.split('Question (translation):', 1)[0]
  text = text.split('Q:', 1)[0]
  text = text.replace("Click here for the transcript of questions and answers. ", "")
  text = text.replace("With the transcript of the questions and answers ", "")
  text = text.replace("With a transcript of the questions and answers ", "")
  text = text.replace("Reproduction is permitted provided that the source is acknowledged.", "")
  text = text.replace("Disclaimer Please note that related topic tags are currently available for selected content only.", "")

  if location_text != 'null':
    if text.startswith(location) == True:
      text = text.split(date[-4:], 1)[1]

  alocation.append(location)
  apresident.append(president)
  avice_president.append(vice_president)
  atext.append(text)



df['Location'] = alocation
df['President'] = apresident
df['Vice President'] = avice_president
df['Text'] = atext

In [366]:
df

Unnamed: 0,Date,Name,Link,Location,President,Vice President,Text
0,9 June 1998,Willem F. Duisenberg: ECB Press conference: In...,https://www.ecb.europa.eu/press/pressconf/1998...,,Willem F. Duisenberg,,"Ladies and gentlemen, I am here today to repor..."
1,8 July 1998,Willem F. Duisenberg: ECB Press conference: In...,https://www.ecb.europa.eu/press/pressconf/1998...,,Willem F. Duisenberg,,"Ladies and gentlemen, the Vice-President and I..."
2,11 September 1998,Willem F. Duisenberg: ECB Press conference: In...,https://www.ecb.europa.eu/press/pressconf/1998...,,Willem F. Duisenberg,,"Ladies and gentlemen, the Vice-President and I..."
3,13 October 1998,Willem F. Duisenberg: Introductory statement w...,https://www.ecb.europa.eu/press/pressconf/1998...,,Willem F. Duisenberg,,"Ladies and gentlemen, in line with our stated..."
4,3 November 1998,Willem F. Duisenberg: Introductory statement w...,https://www.ecb.europa.eu/press/pressconf/1998...,,Willem F. Duisenberg,,"Ladies and gentlemen, as in previous months,..."
...,...,...,...,...,...,...,...
271,15 June 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...,Frankfurt am Main,Christine Lagarde,Luis de Guindos,"Good afternoon, the Vice-President and I welc..."
272,27 July 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...,Frankfurt am Main,Christine Lagarde,Luis de Guindos,"Good afternoon, the Vice-President and I ..."
273,14 September 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...,Frankfurt am Main,Christine Lagarde,Luis de Guindos,"Good afternoon, the Vice-President and I ..."
274,26 October 2023,"Christine Lagarde, Luis de Guindos: Monetary p...",https://www.ecb.europa.eu/press/pressconf/2023...,Athens,Christine Lagarde,Luis de Guindos,"Good afternoon, the Vice-President and I ..."
