In [5]:
# %pip install requests
# %pip install bs4
import requests
from bs4 import BeautifulSoup
import re

In [486]:
# initial webscraping solution

In [6]:
def get_player_info(player_name):
    player_url = f"https://en.wikipedia.org/wiki/{player_name.replace(' ', '_')}"

    try:
        response = requests.get(player_url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            player_career = soup.find('table', {'class': 'wikitable'})
            # if player_career.caption is not None:
            #     if 'Appearances' not in player_career.caption.get_text():
            #         player_career = player_career.find_next_sibling('table', {'class': 'wikitable'}, partial=False)
            player_info = []

            if player_career:
                rows = player_career.tbody.find_all("tr")
                for row in rows:
                    # team, season, league columns
                    cells = row.find_all("td")[:3] 
                    info = []
                    for cell in cells:
                        info.append(cell.text)
                    
                    if info:
                        player_info.append(info)
                player_info = [[process_text(elem) for elem in unique_season] for unique_season in player_info]
                player_info = clean_up_teams_for_multiple_years(player_info)
                return player_info
            else:
                print("No career. dry him, he's washed")
            return soup
        else:
            print("couldn't find player as written.")
            return None
    except Exception as e:
        print(f'Something went wrong...: {e}')
        return None
    
def process_text(input_text):
    # remove_square_brackets_and_newlines
    new_text = re.sub(r'\[.*?\]', '', input_text).replace('\n', '')
    
    return new_text

def clean_up_teams_for_multiple_years(player_info):
    n = len(player_info)
    last_team = ''

    for i in range(n):
        uniq_season = player_info[i]
        if '\u2013' in uniq_season[0] and not uniq_season[1][0].isdigit():
            player_info[i] = [uniq_season[0], last_team, uniq_season[2]]  
        else:
            player_info[i] = [uniq_season[1], uniq_season[0], uniq_season[2]]
            last_team = uniq_season[0]
    return player_info

def get_player_info_dict(player_info):
    player_info_dict = {}
    for uniq_season in player_info:
        season, team, league = uniq_season
        season = season.replace('\u2013', '-')
        if season not in player_info_dict:
            player_info_dict[season] = [[team, league]]
        else:
            # handling loan players
            if '(loan)' in team and '(loan)' not in player_info_dict[season][0][0]:
                player_info_dict[season] = [[team, league]]
            else:
                player_info_dict[season].append([team, league])
    
    return player_info_dict


def clean_loans(player_info_dict):
    for season in player_info_dict:
        for i in range(len(player_info_dict[season])):
            if '(loan)' in player_info_dict[season][i][0]:
                player_info_dict[season][i][0] = player_info_dict[season][i][0][:-7]
    
    return player_info_dict

In [488]:
a = get_player_info_dict(get_player_info('Cristiano Ronaldo'))
a
print(int(list(a)[0][:4]))

2002


In [7]:
def did_they_play_together(player1_name, player2_name):
    # Get career information for both players
    player1_career = clean_loans(get_player_info_dict(get_player_info(player1_name)))
    player2_career = clean_loans(get_player_info_dict(get_player_info(player2_name)))

    if player1_career and player2_career:
        # Compare career information to check if they played together
        player1_start, player2_start = list(player1_career)[0], list(player2_career)[0]
        # print(player1_start); print(player2_start)
        p1_start, p2_start = int(player1_start[:4]), int(player2_start[:4])

        start_check = max(p1_start, p2_start)
        if p1_start > p2_start:
            len_to_check = len(player1_career)
        else:
            len_to_check = len(player2_career)

        same_league = []

        for i in range(len_to_check):
            year = i + start_check
            season = convert_year_to_season(year)
            
            if season in player1_career and season in player2_career:
                teams_player_1 = [i[0] for i in player1_career[season]]
                teams_player_2 = [i[0] for i in player2_career[season]]

                leagues_player_1 = [i[1] for i in player1_career[season]]
                leagues_player_2 = [i[1] for i in player2_career[season]]

                if not set(teams_player_1).isdisjoint(teams_player_2):
                    common_team = find_common_element(teams_player_1, teams_player_2)
                    print(f"Yes. {player1_name} and {player2_name} first played together at club level in {season} for {common_team}.")
                    return True
                
                elif not set(leagues_player_1).isdisjoint(leagues_player_2):
                    common_league = find_common_element(leagues_player_1, leagues_player_2)
                    same_league.append([season, common_league])

        if same_league:    
            print(f"No. But {player1_name} and {player2_name} first played in the same league ({same_league[0][1]}) in {same_league[0][0]}.")
            return False
        
        print(f"No. {player1_name} and {player2_name} have never played together at club level.")

def convert_year_to_season(year):
    start_year = str(year)
    end_year = str(year + 1)

    fmt_year = f"{start_year}-{end_year[2:]}"

    return fmt_year

def find_common_element(list1, list2):
    for element in list1:
        if element in list2:
            return element
    return None

In [490]:
if __name__ == "__main__":
    player1_name = input("Enter the name of the first player: ")
    player2_name = input("Enter the name of the second player: ")

    did_they_play_together(player1_name, player2_name)

In [None]:
# handling neymar-type pages
# handling the Gundo-Reus effect
# handling iniesta type pages

# fixed all by directly accessing tables in the webpage addressed in new solution
# can now handle disambiguation pages as well

# will now tell you if players have played in the same squad at the same time
# if not have they played in the same league at the same time
# will add whether theyve played together internationally (called up in the same year)

In [248]:
"""V2"""

'V2'

In [8]:
# %pip install pandas
# %pip install lxml
import pandas as pd
import io
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [72]:
def get_player_info_new(player_name):
    api_url = f"https://en.wikipedia.org/w/api.php"

    params = {
        "action": "query",
        "titles": player_name,
        "prop": "categories",
        "format": "json",
    }

    try:
        response = requests.get(api_url, params=params)
        data = response.json()

        pages = data["query"]["pages"]
        page_id, page_info = next(iter(pages.items()))

        is_disambiguation_page = any("disambiguation" in cat["title"] for cat in page_info.get("categories", []))

        if is_disambiguation_page:
            links_params = {
                "action": "query",
                "titles": player_name,
                "prop": "links",
                "pllimit": "max",
                "format": "json",
            }

            response = requests.get(api_url, params=links_params)
            data = response.json()
            links = data["query"]["pages"][page_id]["links"]

            for idx, link in enumerate(links):
                print(f"{idx}. {link["title"]}")
            page_num = int(input("Which one do you mean? Enter the corresponding number:"))
            if 0 <= page_num <= len(links):
                player_name = links[page_num]["title"]
            else:
                print('invalid. please enter a valid choice next time.')
            
        page_title = player_name.replace(' ', '_')
        player_url = f"https://en.wikipedia.org/wiki/{page_title}"

        response = requests.get(player_url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tables = soup.find_all('table', {'class': 'wikitable'})
            player_info = []
            for tbl in tables:
                df = pd.read_html(io.StringIO(str(tbl)))
                df = pd.DataFrame(df[0])
                
                if 'Club' in df.columns:
                    club_info = []
                    for row in df.iterrows():
                        team, season, league = row[1][0], row[1][1], row[1][2]
                        row_info = [season, team, league]

                        if not any('total' in elem or 'Total' in elem for elem in row_info):
                            club_info.append(row_info)
                    club_info = [[process_text(elem) for elem in unique_season] for unique_season in club_info]
                    player_info.append(club_info)
                
                elif 'Team' in df.columns or 'National team' in df.columns:
                    # might need a new way to deal with international careers (slaven-bilic type pages)
                    intl_info = []
                    for row in df.iterrows():
                        nation, year = row[1][0], row[1][1]
                        row_info = [year, nation]
                        if not any('total' in elem or 'Total' in elem for elem in row_info):
                            intl_info.append(row_info)
                    
                    intl_info = [[process_text(elem) for elem in unique_season] for unique_season in intl_info]
                    player_info.append(intl_info)

            return player_info
                
            # print("No career. Dry him, he's washed.")
            # return None         
        else:
            print("Couldn't find player as written.")
            return None
       
    except Exception as e:
        print(f"Couldn't get url...: {e}")
        return None

In [67]:
def get_intl_player_info_dict(player_info):
    # players can switch nationalities before a certain point
    # would be far easier otherwise
    player_info_dict = {}
    for uniq_season in player_info:
        year, nation = uniq_season
        if year not in player_info_dict:
            player_info_dict[year] = [nation]
        else:
            player_info_dict[year].append(nation)
    
    return player_info_dict

def did_they_play_internationally(player1_intl_career, player2_intl_career):
    
    p1_start, p2_start = list(player1_intl_career)[0], list(player2_intl_career)[0]
    p1_end, p2_end = list(player1_intl_career)[-1], list(player2_intl_career)[-1]
    p1_start, p2_start = int(p1_start[:4]), int(p2_start[:4])
    p1_end, p2_end = int(p1_end[:4]), int(p2_end[:4])

    start_check = max(p1_start, p2_start)
    end_check = min(p1_end, p2_end)
    
    # handling overlap testing is easier since international careers are quoted in years only
    # a player transferring from a traditional league to the MLS for example will go from
    # seasons to years in their club career, making this kind of search likely to lead to errors
    for i in range(end_check - start_check + 1):
        year = str(start_check + i)

        if year in player1_intl_career and year in player2_intl_career:
            nation1 = player1_intl_career[year]
            nation2 = player2_intl_career[year]

            if not set(nation1).isdisjoint(set(nation2)):
                common_nation = find_common_element(nation1, nation2)
                res = (True, f"were both called up for {common_nation} in {year}.")
                return res
    res = (False, "Did not play for the same nation.")
    return res     

In [168]:
def did_they_play_together_new(player1_name, player2_name):
    # Get career information for both players
    player1_info = get_player_info_new(player1_name)
    player2_info = get_player_info_new(player2_name)

    player1_career = clean_loans(get_player_info_dict(player1_info[0]))
    player2_career = clean_loans(get_player_info_dict(player2_info[0]))

    if len(player1_info) > 1 and len(player2_info) > 1:
        intl1_career = get_intl_player_info_dict(player1_info[1])
        intl2_career = get_intl_player_info_dict(player2_info[1])
    
        intl_res = did_they_play_internationally(intl1_career, intl2_career)
        if intl_res[0]:
            print(f"{player1_name} and {player2_name} {intl_res[1]}")
            
    if player1_career and player2_career:
        # Compare career information to check if they played together
        player1_start, player2_start = list(player1_career)[0], list(player2_career)[0]
        player1_end, player2_end = list(player1_career)[-1], list(player2_career)[-1]
        p1_start, p2_start = int(player1_start[:4]), int(player2_start[:4])
        p1_end, p2_end = int(player1_end[:4]), int(player2_end[:4])

        start_check = max(p1_start, p2_start)
        if p1_end < p2_start or p2_end < p1_start:
            print(f"{player1_name} and {player2_name} have never played together at club level.")
            return None
        
        if p1_start > p2_start:
            len_to_check = len(player1_career)
        else:
            len_to_check = len(player2_career)

        same_league = []

        for i in range(len_to_check):
            year = i + start_check
            season = convert_year_to_season(year)
            
            if season in player1_career and season in player2_career:
                teams_player_1 = [i[0] for i in player1_career[season]]
                teams_player_2 = [i[0] for i in player2_career[season]]

                leagues_player_1 = [i[1] for i in player1_career[season]]
                leagues_player_2 = [i[1] for i in player2_career[season]]

                if not set(teams_player_1).isdisjoint(set(teams_player_2)):
                    common_team = find_common_element(teams_player_1, teams_player_2)
                    print(f"{player1_name} and {player2_name} first played together at club level in {season} for {common_team}.")
                    return True
                
                elif not set(leagues_player_1).isdisjoint(set(leagues_player_2)):
                    common_league = find_common_element(leagues_player_1, leagues_player_2)
                    p1_team = teams_player_1
                    p2_team = teams_player_2
                    same_league.append([season, common_league, p1_team, p2_team])

        if same_league:
            # result = "(" + ", ".join(my_list) + ")"
            # f"({', '.join(my_list)})"  
            print(f"{player1_name} ({', '.join(same_league[0][2])}) and {player2_name} ({', '.join(same_league[0][3])}) first played in the same league ({same_league[0][1]}) in {same_league[0][0]}.")
            return False
        
        print(f"{player1_name} and {player2_name} have never played together at club level.")

In [74]:
a = clean_loans(get_intl_player_info_dict(get_player_info_new('Zinedine Zidane')[1]))
a

{'1994': ['France'],
 '1995': ['France'],
 '1996': ['France'],
 '1997': ['France'],
 '1998': ['France'],
 '1999': ['France'],
 '2000': ['France'],
 '2001': ['France'],
 '2002': ['France'],
 '2003': ['France'],
 '2004': ['France'],
 '2005': ['France'],
 '2006': ['France']}

In [34]:
# if __name__ == "__main__":
#     player1_name = input("Enter the name of the first player: ")
#     player2_name = input("Enter the name of the second player: ")

#     did_they_play_together_new(player1_name, player2_name)

yee
0. Cristian Romero (Argentine footballer)
1. Cristian Romero (Paraguayan footballer)
2. Cristian Romero (writer)
3. Cristián Romero (Chilean footballer)
4. Talk:Cristian Romero
5. Help:Disambiguation
No. Cristian Romero and Zinedine Zidane have never played together at club level.


In [None]:
# maybe allow to set the range, did these players play together between (start, end)
# did these players ever play together in this specifc league
# database to speed up lookup w/ scheduled scraping to keep up to date (October is good, transfer markets closed, and again in February)

In [247]:
if __name__ == "__main__":
    player1_name = input("Enter the name of the first player: ")
    player2_name = input("Enter the name of the second player: ")

    did_they_play_together_new(player1_name, player2_name)

Raheem Sterling (Manchester City) and Cristiano Ronaldo (Juventus, Manchester United) first played in the same league (Premier League) in 2021-22.
