**The Following codes were used to extract the Euro 2024 final Squad List from this url** ("https://www.skysports.com/football/news/11095/13137394/euro-2024-squad-lists-hosts-germany-france-netherlands-england-scotland-italy-and-more") using BeautifulSoup

In [None]:
#1. Import the relevant libraries

from bs4 import BeautifulSoup
import requests
from csv import writer
import pandas as pd
from itertools import zip_longest


In [None]:
#2. Create soup

page = requests.get("https://www.skysports.com/football/news/11095/13137394/euro-2024-squad-lists-hosts-germany-france-netherlands-england-scotland-italy-and-more")
soup = BeautifulSoup(page.content, "html.parser")
soup.prettify().split("\n") #view soup with prettify()

['<!DOCTYPE html>',
 '<html class="no-js" lang="en">',
 ' <head>',
 '  <meta charset="utf-8"/>',
 '  <title>',
 '   Euro 2024 squad lists: Hosts Germany, France, Netherlands, England, Scotland, Italy and more | Football News | Sky Sports',
 '  </title>',
 '  <meta content="index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1" name="robots"/>',
 '  <meta content="Euro 2024 squad lists: Hosts Germany, France, Netherlands, England, Scotland, Italy and more | Football News | Sky Sports" name="title"/>',
 '  <meta content="Final Euro 2024 squads announced; see who has been selected for the European Championships in Germany; tournament starts on June 14" name="description"/>',
 '  <meta content="" name="keywords"/>',
 '  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>',
 '  <link crossorigin="use-credentials" href="/assets/manifest.json" rel="manifest"/>',
 '  <link href="https://www.skysports.com/football/news/11095/13137394/euro-2024-squad-lists-host

In [None]:
#3. Find the portion of soup that holds the squad list.
#   It held with this div tag "sdc-site-layout__col sdc-site-layout__col1"

soup2=soup.find("div", class_="sdc-site-layout__col sdc-site-layout__col1")


In [76]:
#4. Extract country and squads into a list
country=[]
squad=[]

for i in soup2.find_all("h3"):
    country.append(i.text)
country.remove("Also See:")

for i in soup2.find_all("p"):
    squad.append(i.text)


In [None]:
# View Country

country

['Germany',
 'Scotland',
 'Hungary',
 'Switzerland',
 'Spain',
 'Croatia',
 'Italy',
 'Albania',
 'Slovenia',
 'Denmark',
 'Serbia',
 'England',
 'Netherlands',
 'France',
 'Poland',
 'Austria',
 'Ukraine',
 'Slovakia',
 'Belgium',
 'Romania',
 'Portugal',
 'Czech Republic',
 'Georgia',
 'Turkey']

In [None]:
# View Squad
squad

['Who is going to Germany? Countries have named their final squads for Euro 2024 and you can see them all here.',
 'Aleksandar Pavlovic will take no part in Euro 2024 because of tonsillitis. Germany have called up Borussia Dortmund midfielder Emre Can as his replacement.',
 'Pavlovic told Sky in Germany last week he was considering having his tonsils removed after a previous bout of the illness meant he missed a possible national team debut against France and the Netherlands in March.',
 'Netherlands, meanwhile, have called up Bologna striker Joshua Kirkzee, who is being monitored by Manchester United.',
 'The tournament gets underway in Munich on June 14 when hosts Germany face Scotland.',
 "Teams have selected 26-player squads for the tournament. So, who's in and who's out? See all the squad lists below.",
 'Goalkeepers: Oliver Baumann (Hoffenheim), Manuel Neuer (Bayern Munich), Marc-Andre ter Stegen (Barcelona).',
 '\n              Euro 2024 fixtures, schedule, groups, venues: All y

In [None]:
#5. Extract Each Position from squad (Goalkeepers, Defenders, Midfielders, Forwards)

Goalkeepers = [i for i in squad if i.startswith("Goalkeepers")]
Defenders = [i for i in squad if i.startswith("Defenders")]
Midfielders = [i for i in squad if i.startswith("Midfielders")]
Forwards = [i for i in squad if i.startswith("Forwards")|i.startswith("Attackers")]


In [None]:
#7. Remove the prefixes ("Goalkeepers:","Defenders:","Midfielders:","Forwards:"and "Attackers:") from each column

squad_df["Goalkeepers"] = squad_df["Goalkeepers"].replace(r"^Goalkeepers:", "", regex=True)
squad_df["Defenders"] = squad_df["Defenders"].replace(r"^Defenders:", "", regex=True)
squad_df["Midfielders"] = squad_df["Midfielders"].replace(r"^Midfielders:", "", regex=True)
squad_df["Forwards"] = squad_df["Forwards"].replace(r"Forwards:", "", regex=True)
squad_df["Forwards"] = squad_df["Forwards"].replace(r"Attackers:", "", regex=True)
squad_df

Unnamed: 0,Country,Goalkeepers,Defenders,Midfielders,Forwards
0,Germany,"Oliver Baumann (Hoffenheim), Manuel Neuer (Ba...","Waldemar Anton (Stuttgart), Benjamin Henrichs...","Robert Andrich (Bayer Leverkusen), Chris Fuhr...","Maximilian Beier (Hoffenheim), Niclas Fullkru..."
1,Scotland,"Zander Clark (Hearts), Angus Gunn (Norwich), ...","Liam Cooper (Leeds), Grant Hanley (Norwich), ...","Stuart Armstrong (Southampton), Ryan Christie...","Che Adams (Southampton), Tommy Conway (Bristo..."
2,Hungary,"Denes Dibusz (Ferencvaros), Peter Gulacsi (RB...","Botond Balogh (Parma), Endre Botka (Ferencvar...","Bendeguz Bolla (Servette), Mihaly Kata (MTK),...","Martin Adam (Ulsan Hyundai), Kevin Csoboth (U..."
3,Switzerland,"Yann Sommer (Inter Milan), Yvon Mvogo (Lorien...","Ricardo Rodriguez (Torino), Fabian Schar (New...","Granit Xhaka (Bayer Leverkusen), Xherdan Shaq...","Breel Embolo (Monaco), Steven Zuber (AEK Athe..."
4,Spain,"Unai Simon (Athletic Bilbao), Alex Remiro (Re...","Dani Carvajal (Real Madrid), Jesus Navas (Sev...","Rodri (Manchester City), Martin Zubimendi (Re...","Alvaro Morata (Atletico Madrid), Joselu (Real..."
5,Croatia,"Dominik Livakovic (Fenerbahce), Nediljko Labr...","Josip Stanisic (Bayer Leverkusen), Marin Pong...","Lovro Majer (Wolfsburg), Mateo Kovacic (Manch...","Ivan Perisic (Hajduk Split), Andrej Kramaric ..."
6,Italy,"Gianluigi Donnarumma (Paris Saint-Germain), A...","Alessandro Bastoni (Inter Milan), Raoul Bella...","Nicolo Barella (Inter Milan), Bryan Cristante...","Federico Chiesa (Juventus), Stephan El Shaara..."
7,Albania,"Etrit Berisha (Empoli), Thomas Strakosha (Bre...","Berat Djimsiti (Atalanta), Elseid Hysaj (Lazi...","Amir Abrashi, Kristjan Asllani (Inter Milan),...","Jasir Asani (Gwangju FC), Armando Broja (Fulh..."
8,Slovenia,"Jan Oblak (Atletico Madrid), Vid Belec (APOEL...","Petar Stojanovic (Sampdoria), Jaka Bijol (Udi...","Timi Max Elsnik (Olimpija Ljubljana), Jasmin ...","Josip Ilicic (Maribor), Andraz Sporar (Panath..."
9,Denmark,"Kasper Schmeichel (Anderlecht), Frederik Ronn...","Andreas Christensen (Barcelona), Simon Kjaer ...","Christian Eriksen (Manchester United), Thomas...","Jacob Bruun Larsen (Burnley), Andreas Skov Ol..."


In [None]:
#8. Each row in the dataframe now contains only data for a particular country
#   Split each column by comma (,) to separate each player in each column
#   Then recreate a new dataframe using the function below, this time containing only single players per row.
#   Finally download on a country by country basis. This should result in 24 downloads for the 24 teams.

Goalkeepers2 = squad_df["Goalkeepers"].str.split(",").explode()
Defenders2 = squad_df["Defenders"].str.split(",").explode()
Midfielders2 = squad_df["Midfielders"].str.split(",").explode()
Forwards2 = squad_df["Forwards"].str.split(",").explode()

#4. Create a function to download the players for each country,
#   replace the value of i with numbers 0 - 23 to download all 24 teams into 24 excel sheets.
i=0
def make_squadlist():
  squadlist_df = pd.DataFrame(list(zip_longest(Goalkeepers2[i], Defenders2[i], Midfielders2[i], Forwards2[i], fillvalue="")), columns= ["Goalkeepers", "Defenders", "Midfielders", "Forwards"])
  from google.colab import files
  squadlist_df.to_csv('squadlist.csv')
  files.download('squadlist.csv')
  return squadlist_df
make_squadlist()



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,Goalkeepers,Defenders,Midfielders,Forwards
0,Oliver Baumann (Hoffenheim),Waldemar Anton (Stuttgart),Robert Andrich (Bayer Leverkusen),Maximilian Beier (Hoffenheim)
1,Manuel Neuer (Bayern Munich),Benjamin Henrichs (RB Leipzig),Chris Fuhrich (Stuttgart),Niclas Fullkrug (Borussia Dortmund)
2,Marc-Andre ter Stegen (Barcelona).,Joshua Kimmich (Bayern Munich),Pascal Gross (Brighton),Kai Havertz (Arsenal)
3,,Robin Koch (Eintracht Frankfurt),Ilkay Gundogan (Barcelona),Thomas Muller (Bayern Munich)
4,,Maximilian Mittelstadt (Stuttgart),Toni Kroos (Real Madrid),Deniz Undav (Stuttgart).
5,,David Raum (RB Leipzig),Jamal Musiala (Bayern Munich),
6,,Antonio Rudiger (Real Madrid),Emre Can (Borussia Dortmund),
7,,Nico Schlotterbeck (Borussia Dortmund),Leroy Sane (Bayern Munich),
8,,Jonathan Tah (Bayer Leverkusen).,Florian Wirtz (Bayer Leverkusen).,


In [None]:
#9. After downloading all 24 files you may choose to keep as 24 different files
# or union all files together to form one excel.
# This was completed using Excel's power query