In [8]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings("ignore")

PL_table_2023 = "https://fbref.com/en/comps/9/Premier-League-Stats"


# match_url = "https://fbref.com/en/matches/1234/lineups/" # Replace with the URL for the specific match you want to retrieve lineups for

response = requests.get(PL_table_2023)
htmltext = BeautifulSoup(response.text, "html.parser")
PL_table_2019 = "https://fbref.com/en/comps/9/2019-2020/stats/2019-2020-Premier-League-Stats"
response2019 = requests.get(PL_table_2019)
htmltext2019 = BeautifulSoup(response2019.text, "html.parser")
strhtmltext2019 = str(htmltext2019)

With the recent loan move of Weston Mckennie from Juventus, Leeds United now have 3 American Midfielders, all playing under the illustrious Jesse Marsch. This is uncharted territory for USMNT players, at least since the Fulhamerica days in the late 2000s. While this may be rare for Americans, how rare is it in the context of world football?

To test this, I will attempt to find instances of 3 countrymen all playing in the same line (3 attackers, midfielders, or defenders) of a premier league club. By scraping data off the website FBRef, we will look at non-UEFA countrymen to see how rare "Leeds United State" truly is.

In [9]:
def getseasondata(year):
    # construct URL
    PL_table = "https://fbref.com/en/comps/9/"+str(year)+"-"+str(year+1)+"/stats/"+str(year)+"-"+str(year+1)+"-Premier-League-Stats"
    # Get whole html of the page
    response = requests.get(PL_table)
    htmltext = BeautifulSoup(response.text, "html.parser")
    # work around to not trigger crawler blocker
    time.sleep(3)
    strhtmltext = str(htmltext)
    # scrape each player, create dataframe of raw text
    pattern = r'<tr >.*?data-stat=\"player\".*?\/en\/squads\/.{8}\/.*?>.*?<'
    gotten = re.findall(pattern, strhtmltext)
    df = pd.DataFrame(gotten)
    # convert text to columns Player, Team, Nationality, and Position
    # using regex
    pattern = r'player".*?"(.*?)"'
    df["Player"] = df[0].str.extract(pattern)
    pattern = r'Stats">(.*?)<$'
    df["Team"] = df[0].str.extract(pattern)
    pattern = r'\/en\/country\/(.{3})/'
    df["Nat"] = df[0].str.extract(pattern)
    pattern = r'position.*?>(.{2}|.{5})<'
    df["Pos"] = df[0].str.extract(pattern)
    
    return df
PL_table_2019 = getseasondata(2019)

In [10]:
PL_table_2019

Unnamed: 0,0,Player,Team,Nat,Pos
0,"<tr ><th scope=""row"" class=""right "" data-stat=...",Aanholt Patrick,Crystal Palace,NED,DF
1,"<tr ><th scope=""row"" class=""right "" data-stat=...",Aarons Max,Norwich City,ENG,DF
2,"<tr ><th scope=""row"" class=""right "" data-stat=...",Abraham Tammy,Chelsea,ENG,FW
3,"<tr ><th scope=""row"" class=""right "" data-stat=...",Adams Che,Southampton,SCO,FW
4,"<tr ><th scope=""row"" class=""right "" data-stat=...",Adrián,Liverpool,ESP,GK
...,...,...,...,...,...
517,"<tr ><th scope=""row"" class=""right "" data-stat=...",Zaha Wilfried,Crystal Palace,CIV,"FW,MF"
518,"<tr ><th scope=""row"" class=""right "" data-stat=...",Zimmermann Christoph,Norwich City,GER,DF
519,"<tr ><th scope=""row"" class=""right "" data-stat=...",Zinchenko Oleksandr,Manchester City,UKR,DF
520,"<tr ><th scope=""row"" class=""right "" data-stat=...",Živković Richairo,Sheffield Utd,CUW,FW


In [11]:
#prints out teams where 3 nationalities could be possible in the same line
def getpossibleteams(input_table):
    grouped = input_table.groupby(["Team", "Nat"])
    #local agg function for grouped
    def create_positions(group):
        return group['Pos'].tolist()
    #apply agg function above
    newdf = grouped.apply(create_positions).reset_index()
    newdf = newdf.rename(columns = {0:"Positions of Players"})
#     newdf[0] = newdf["Positions of Players"]
    
    #chooses teams where # of non-English players of one nationality
    #exceeds 3
    newdf["Number of Players"] = grouped.size().tolist()
    
    #Checks to see if there are 3 MF, FW, or DF from one country
    df_MF = newdf[newdf['Positions of Players'].apply(lambda x: sum([1 for i in x if 'MF' in i])>2)]
    df_FW = newdf[newdf['Positions of Players'].apply(lambda x: sum([1 for i in x if 'FW' in i])>2)]
    df_DF = newdf[newdf['Positions of Players'].apply(lambda x: sum([1 for i in x if 'DF' in i])>2)]
    
    df_DF.loc[:,"Position to Check"] = "DF"
    df_MF.loc[:,"Position to Check"] = "MF"
    df_FW.loc[:,"Position to Check"] = "FW"
    
    newdf = pd.concat([df_DF, df_MF, df_FW], ignore_index=True)
    
    #filter for non-English, and 3 or over players
    filtered = newdf[(newdf["Number of Players"]>2) & (newdf["Nat"]!="ENG")]

    return filtered

temp = getpossibleteams(PL_table_2019)
temp

Unnamed: 0,Team,Nat,Positions of Players,Number of Players,Position to Check
1,Arsenal,ESP,"[DF,MF, MF, DF, DF]",4,DF
10,Manchester City,ESP,"[DF, DF, DF, MF, MF]",5,DF
19,Wolves,ESP,"[MF,DF, FW,DF, DF]",3,DF
31,Norwich City,GER,"[GK, MF, FW,MF, MF,FW, MF, MF, DF]",7,MF
36,Wolves,POR,"[FW, FW, MF, FW,MF, MF, GK, FW,MF, MF,DF]",8,MF
46,Wolves,POR,"[FW, FW, MF, FW,MF, MF, GK, FW,MF, MF,DF]",8,FW


In [12]:
def find_all_possible_teams():
    all_possible_teams_table = getpossibleteams(getseasondata(2022))
    all_possible_teams_table["Year"] = 2022

    # get season data for all years, then get all possible teams
    # where 3 countrymen playing on same line by position group
    for x in np.arange(1992,2022):
        temp_table_year = getseasondata(x)
        temp_possible_teams = getpossibleteams(temp_table_year)
        temp_possible_teams["Year"] = x
        all_possible_teams_table = pd.concat([all_possible_teams_table, temp_possible_teams], ignore_index=True)
    return all_possible_teams_table
    
tek = find_all_possible_teams()
tek

Unnamed: 0,Team,Nat,Positions of Players,Number of Players,Position to Check,Year
0,Brentford,DEN,"[DF,MF, MF,FW, MF, DF, MF, DF]",6,DF,2022
1,Brentford,DEN,"[DF,MF, MF,FW, MF, DF, MF, DF]",6,MF,2022
2,Leeds United,USA,"[MF,FW, MF, MF]",3,MF,2022
3,Wolves,POR,"[DF, FW,MF, MF, FW,MF, MF, MF, FW,MF, GK, DF]",9,MF,2022
4,Crystal Palace,FRA,"[MF,FW, FW, FW, FW,MF]",4,FW,2022
...,...,...,...,...,...,...
250,Wolves,POR,"[DF,FW, DF, MF, FW,MF, MF, FW,MF, GK, MF,DF, F...",10,DF,2021
251,Brentford,DEN,"[DF, MF, MF, MF, DF, GK, MF, DF]",8,MF,2021
252,Wolves,POR,"[DF,FW, DF, MF, FW,MF, MF, FW,MF, GK, MF,DF, F...",10,MF,2021
253,Crystal Palace,FRA,"[FW, FW, FW,MF]",3,FW,2021


In [13]:
tek["Nat"].unique() #Non-EU countries: JAM, SEN, BRA, TRI, ARG, AUS
tek[(tek["Nat"] == 'TRI') | 
    (tek["Nat"] == 'JAM') | 
    (tek["Nat"] == 'USA') | 
    (tek["Nat"] == 'SEN') | 
    (tek["Nat"] == 'BRA') | 
    (tek["Nat"] == 'ARG') | 
    (tek["Nat"] == 'AUS')]

Unnamed: 0,Team,Nat,Positions of Players,Number of Players,Position to Check,Year
2,Leeds United,USA,"[MF,FW, MF, MF]",3,MF,2022
19,Wimbledon,JAM,"[MF, FW,MF, FW,MF]",3,MF,1995
22,Wimbledon,JAM,"[MF, FW,MF, FW,MF]",3,MF,1996
30,Wimbledon,JAM,"[MF, FW,MF, MF, FW,MF]",4,MF,1997
41,Wimbledon,JAM,"[MF, FW,MF, FW,MF]",3,MF,1998
52,Wimbledon,JAM,"[MF, FW,MF, MF, FW,MF]",4,MF,1999
75,Leeds United,AUS,"[MF, FW,MF, MF, FW]",4,MF,2002
79,Charlton Ath,JAM,"[FW,MF, FW,MF, FW]",3,FW,2002
89,Charlton Ath,JAM,"[FW,MF, FW,MF, FW]",3,FW,2003
107,Bolton,SEN,"[MF, DF,MF, FW,MF]",3,MF,2005


The only non-EU countries that were even candidates to have 3 of their countrymen in the same line of play on a Premier League field were Jamaica, Senegal, Brazil, Trinidad & Tobago, Australia, and Argentina.

Of course, both Brazil and Argentina are both giants in soccer, so this tracks. However, Trinidad and Tobago had 4 different players on Sunderland, 3 Forwards, but the forwards never saw the field together.

During Fulhamerica, Fulham FC had 4 Americans - GK Kasey Keller, DF Carlos Bocanegra, FW/MF Clint Dempsey, FW Brian McBride - in their side, with FW Eddie Johnson joining towards the end of the 2008 January Transfer window. This gave the USA 3 Forwards, but, despite each of them subbing in for one another, they were not all 3 on the field at the same time. There was one game during the season which saw all 5 Americans on the field though. Crazy!

This makes the American midfield iteration of Leeds a true outlier outside of non-European, non-powerhouse footballing nations in nearly 15 years.

Outside of Brazil and Argentina, the last time 3 countrymen were playing on the same premier league line of confrontation was the Senegalese contingent of Stoke City in the 2008-2009 Premier League Season. Truly, Major Leeds Soccer is an outlier and an incredible development in American soccer. 