# Jugadores

In [1]:
import time
import pandas as pd
import re

def players(url, id, league, season, team):

    df = pd.read_html(url,
                    attrs={'id':id})[0]

    df.columns = df.columns.get_level_values(1)

    df['Edad'] = df['Edad'].apply(lambda x: x[:2]+'.'+x[-3:])

    df['Edad'] = pd.to_numeric(df['Edad'], errors='coerce')
    df_team = df.dropna(subset=['Edad'])


    # Usar una expresión regular para extraer el nombre
    
    df_team['País'] = df_team['País'].apply(lambda x: x[-3:])
    df_team['Equipo'] = team


    df.rename(columns={'Jugador': 'player'}, inplace=True)
    df['league'] = league
    df['season'] = season
    df['team'] = team

    df_team.to_csv('jugadores.csv', index=False)

    return df_team

In [2]:
url = 'https://fbref.com/es/equipos/e2f19203/2024/all_comps/Estadisticas-de-Instituto-Todas-las-competencias'
id = 'stats_standard_combined'

league = 'Liga Profesional Argentina '
season = '2024'
team = 'Instituto'


players(url, id, league, season, team)

Unnamed: 0,Jugador,País,Posc,Edad,PJ,Titular,Mín,90 s,Gls.,Ass,...,G+A,G-TP,G+A-TP,xG,xAG,xG+xAG,npxG,npxG+xAG,Partidos,Equipo
0,Manuel Roffo,ARG,PO,24.133,24,24,2160.0,24.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,Partidos,Instituto
1,Gastón Lodico,ARG,CC,26.079,24,24,2069.0,23.0,1.0,7.0,...,0.35,0.04,0.35,0.04,0.21,0.25,0.04,0.25,Partidos,Instituto
2,Fernando Alarcón,ARG,DF,30.06,21,21,1865.0,20.7,1.0,0.0,...,0.05,0.05,0.05,0.03,0.0,0.04,0.03,0.04,Partidos,Instituto
3,Damián Puebla,ARG,CC,21.335,23,23,1754.0,19.5,6.0,2.0,...,0.41,0.15,0.26,0.25,0.08,0.33,0.13,0.21,Partidos,Instituto
4,Juan Franco Arrellaga,PAR,DF,32.187,17,17,1443.0,16.0,2.0,0.0,...,0.12,0.12,0.12,0.09,0.04,0.13,0.09,0.13,Partidos,Instituto
5,Jonathan Bay,ARG,DF,31.196,18,15,1383.0,15.4,1.0,2.0,...,0.2,0.07,0.2,0.05,0.12,0.17,0.05,0.17,Partidos,Instituto
6,Jonás Acevedo,ARG,CC,27.191,18,15,1272.0,14.1,0.0,3.0,...,0.21,0.0,0.21,0.08,0.2,0.28,0.08,0.28,Partidos,Instituto
7,Facundo Suárez,ARG,DL,30.045,22,14,1184.0,13.2,5.0,1.0,...,0.46,0.38,0.46,0.46,0.1,0.55,0.46,0.55,Partidos,Instituto
8,Ignacio Russo,ARG,DL,23.246,22,13,1138.0,12.6,4.0,0.0,...,0.32,0.32,0.32,0.36,0.12,0.47,0.29,0.41,Partidos,Instituto
9,Roberto Bochi,ARG,CC,34.243,14,13,1114.0,12.4,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Partidos,Instituto


# Equipos

In [16]:
def teams(url, id, league, season):

    try:
        # Leer la tabla de la URL especificada con el atributo id
        df = pd.read_html(url, attrs={'id': id})[0]
        
        df['league'] = league
        df['season'] = season
        df.rename(columns={'Equipo': 'team'}, inplace=True)
    
        # Guardar el DataFrame en un archivo CSV
        df.to_csv('equipos.csv', index=False)

        return
                
    except Exception as e:
        print(f"Error al procesar id {id} en la liga {league}: {e}")
        return
    


In [17]:
url = 'https://fbref.com/es/comps/21/Estadisticas-de-Liga-Profesional-Argentina'
id = 'results2024211_overall'

league = 'Liga Profesional Argentina '
season = '2024'

teams(url, id, league, season)

# Stats

In [28]:
import pandas as pd
import time
import re

def stats(url, ids, league, season, delay=10):
    dfs = []

    for id in ids:
        # Esperar entre solicitudes
        time.sleep(delay)
        
        try:
            # Leer la tabla de la URL especificada con el atributo id
            df = pd.read_html(url, attrs={'id': id})[0]

            # Verificar si las columnas tienen múltiples niveles
            if isinstance(df.columns, pd.MultiIndex):
                # Separar los dos niveles de columnas en 'Nivel1' y 'Nivel2'
                df.columns = ['_'.join(col).strip() if 'Unnamed' not in col[0] else col[1] for col in df.columns]

            # Pivotear el DataFrame
            df_pivot = df.melt(id_vars=['Equipo'], var_name='Stat', value_name='Valor')

            # Separar los niveles si la columna 'Stat' tiene '_'
            if df_pivot['Stat'].str.contains('_').any():
                df_pivot[['Clase', 'Stat']] = df_pivot['Stat'].str.split('_', expand=True, n=1)
            else:
                df_pivot['Stat'] = df_pivot['Stat']
                df_pivot['Clase'] = ''

            # Extraer el id antes de '_for' o '_against'
            tabla_match = re.match(r'(.+)(_for|_against)', id)
            if tabla_match:
                df_pivot['Objeto'] = 'for' if '_for' in id else 'against'
                df_pivot['Tabla'] = tabla_match.group(1)
            else:
                df_pivot['Tabla'] = id
                df_pivot['Objeto'] = None

            # Agregar el DataFrame pivoteado a la lista de DataFrames
            
            df['league'] = league
            df['season'] = season
            df.rename(columns={'Equipo': 'team'}, inplace=True)
            
            dfs.append(df_pivot)
                    
        except Exception as e:
            print(f"Error al procesar id {id}: {e}")
    
    # Concatenar todos los DataFrames
    stats = pd.concat(dfs, ignore_index=True)
    stats.to_csv('stats.csv', index=False)
    
    return


In [6]:
url = 'https://fbref.com/es/comps/21/Estadisticas-de-Liga-Profesional-Argentina'
id = ('stats_squads_standard_for','stats_squads_standard_against',
      'stats_squads_keeper_for','stats_squads_keeper_against',
      'stats_squads_keeper_adv_for','stats_squads_keeper_adv_against',
      'stats_squads_shooting_for','stats_squads_shooting_against',
      'stats_squads_passing_for','stats_squads_passing_against',
      'stats_squads_passing_types_for','stats_squads_passing_types_against',
      'stats_squads_gca_for','stats_squads_gca_against',
      'stats_squads_defense_for', 'stats_squads_defense_against',
      'stats_squads_possession_for','stats_squads_possession_against',
      'stats_squads_playing_time_for','stats_squads_playing_time_against',
      'stats_squads_misc_for','stats_squads_misc_against'
)

stats(url, ids, league, season, delay=10)

# Percentiles

In [29]:
import LanusStats as ls  
fbref = ls.Fbref()
fotmob = ls.FotMob()
sofascore = ls.SofaScore()

def percentiles(player_urls, league, season, team, delay=10):
    dfs = []

    for player_url in player_urls:

        df = fbref.get_player_percentiles(player_url)
        player_name = player_url.split('/')[-1].replace('-', ' ')

        if df.loc[0, 'Estadísticas'] == 'PSxG-GA':
            bins = [0, 6, 11, 15]
            labels = ['Remates', 'Colectiva', 'Defensiva']
        else:
            bins = [0, 7, 15, 21]
            labels = ['Ofensiva', 'Colectiva', 'Defensiva']

        df = df.dropna(how='all')
        df['player'] = player_name
        df['league'] = league
        df['season'] = season
        df['team'] = team

        # Crear la columna con los valores basados en los intervalos
        df['clase'] = pd.cut(df.index, bins=bins, labels=labels, right=False)

        dfs.append(df)
        # df.to_csv(f'percentil_{player_name}.csv', index=False)
        time.sleep(delay)


    percentiles = pd.concat(dfs, ignore_index=True)
    percentiles.to_csv('percentiles.csv', index=False)

    return

In [8]:
league = 'Liga Profesional Argentina '
season = '2024'
team = 'Instituto'

player_urls = [
'https://fbref.com/es/jugadores/0f978661/Gonzalo-Requena',
'https://fbref.com/es/jugadores/0373879c/Gaston-Lodico',
'https://fbref.com/es/jugadores/bfaeb90e/Damian-Puebla',
'https://fbref.com/es/jugadores/bed3eed1/Nicolas-Dubersarsky',
'https://fbref.com/es/jugadores/0b419c37/Jonas-Acevedo',
'https://fbref.com/es/jugadores/1ebb7c72/Santiago-Rodriguez',
'https://fbref.com/es/jugadores/b4c8e4c5/Fernando-Alarcon',
'https://fbref.com/es/jugadores/2ca3d7f2/Ignacio-Russo',
'https://fbref.com/es/jugadores/f88ec6d6/Giuliano-Cerato',
'https://fbref.com/es/jugadores/b50e5848/Manuel-Roffo'
]

percentiles(player_urls, league, season, team, delay=10)

# Heatmap

In [9]:
def heatmap_handmade(players, league, season, team, delay=20):
    dfs = []

    for player in players:
        try:
            # Extraer el número del jugador
            player_number = re.search(r'\d+$', player).group()
            
            # Obtener el heatmap del jugador para la temporada especificada
            df = sofascore.get_player_season_heatmap(league, season, player_number)
            
            # Extraer el nombre del jugador y formatearlo
            player_name = re.search(r'\/([^\/]+)\/\d+$', player).group(1).replace("-", " ").title()
            
            df['player'] = player_name
            df['league'] = league
            df['season'] = season
            df['team'] = team

            dfs.append(df)

            # Guardar el DataFrame en un archivo CSV
            # df.to_csv(f'heatmap_{player_name}.csv', index=False)
            
            # Esperar entre solicitudes para evitar problemas con el servidor
            time.sleep(delay)
        
        except Exception as e:
            print(f"Error al procesar el jugador {player}: {e}")
    
    heatmap = pd.concat(dfs, ignore_index=True)
    heatmap.to_csv('heatmap.csv', index=False)

    return

In [10]:
league = 'Liga Profesional Argentina '
season = '2024'
team = 'Instituto'
players = (
    'https://www.sofascore.com/es/jugador/silvio-romero/174937',
    'https://www.sofascore.com/es/jugador/santiago-rodriguez/897000',
    'https://www.sofascore.com/es/jugador/facundo-suarez/992068',
    'https://www.sofascore.com/es/jugador/ignacio-russo/1087005',
    'https://www.sofascore.com/es/jugador/damian-batallini/830885',
    'https://www.sofascore.com/es/jugador/rodriguez-gregorio/1018414',
    'https://www.sofascore.com/es/jugador/gaston-lodico/922460',
    'https://www.sofascore.com/es/jugador/damian-puebla/1466522',
    'https://www.sofascore.com/es/jugador/franco-diaz/1018402',
    'https://www.sofascore.com/es/jugador/perez-rodrigo/1177546',
    'https://www.sofascore.com/es/jugador/jonas-acevedo/875895',
    'https://www.sofascore.com/es/jugador/stefano-moreyra/1086973',
    'https://www.sofascore.com/es/jugador/braian-cuello/896722',
    'https://www.sofascore.com/es/jugador/matias-romero/941096',
    'https://www.sofascore.com/es/jugador/roberto-bochi/897138',
    'https://www.sofascore.com/es/jugador/juan-franco/926812',
    'https://www.sofascore.com/es/jugador/miguel-brizuela/990283',
    'https://www.sofascore.com/es/jugador/fernando-alarcon/797020',
    'https://www.sofascore.com/es/jugador/victor-cabrera/537036',
    'https://www.sofascore.com/es/jugador/jonathan-bay/789358',
    'https://www.sofascore.com/es/jugador/lucas-rodriguez/146982',
    'https://www.sofascore.com/es/jugador/cerato-giuliano/1183795',
    'https://www.sofascore.com/es/jugador/manuel-roffo/881166',
    'https://www.sofascore.com/es/jugador/joaquin-matias-papaleo/789396',
    'https://www.sofascore.com/es/jugador/emanuel-sittaro/1018390'
)

heatmap_handmade(players, league, season, team)

Error al procesar el jugador https://www.sofascore.com/es/jugador/matias-romero/941096: Player in path 941096 doesn't have enough information for this functions, try with another one.
El jugador en el path 941096 no tiene la información para estas funciones, pruebe con otro.
Error al procesar el jugador https://www.sofascore.com/es/jugador/roberto-bochi/897138: Player in path 897138 doesn't have enough information for this functions, try with another one.
El jugador en el path 897138 no tiene la información para estas funciones, pruebe con otro.
Error al procesar el jugador https://www.sofascore.com/es/jugador/joaquin-matias-papaleo/789396: Player in path 789396 doesn't have enough information for this functions, try with another one.
El jugador en el path 789396 no tiene la información para estas funciones, pruebe con otro.
Error al procesar el jugador https://www.sofascore.com/es/jugador/emanuel-sittaro/1018390: Player in path 1018390 doesn't have enough information for this function

In [75]:
def heatmap(sofascorePlayers, delay=5):
    dfs = []

    for i in range(0,len(sofascorePlayers)):

        url = sofascorePlayers[i]['link']
        print(url)

        try:
            # Extraer el número del jugador
            player_id = sofascorePlayers[i]['id']
            season = str(sofascorePlayers[i]['season'])
            league = sofascorePlayers[i]['league']
            country = sofascorePlayers[i].get('country', 'Argentina')

            # Obtener el heatmap del jugador para la temporada especificada
            df = sofascore.get_player_season_heatmap(country+' '+league, season, player_id)
                      
            df['player'] = sofascorePlayers[i]['name']
            df['league'] = league
            df['season'] = season
            df['team'] = sofascorePlayers[i]['team']

            dfs.append(df)
           
            # Esperar entre solicitudes para evitar problemas con el servidor
            time.sleep(delay)
        
        except Exception as e:
            print(f"Error al procesar el jugador {url}: {e}")
    
    heatmap = pd.concat(dfs, ignore_index=True)
    heatmap.to_csv('heatmap.csv', index=False)

    return

In [83]:
heatmap(sofascorePlayer)

https://www.sofascore.com/es/jugador/juan-quintero/221162
https://www.sofascore.com/es/jugador/roger-martinez/344243
https://www.sofascore.com/es/jugador/martinez-adrian/906811
https://www.sofascore.com/es/jugador/carbonero-johan/925125
https://www.sofascore.com/es/jugador/maximiliano-salas/883933
https://www.sofascore.com/es/jugador/agustin-urzi/965775
https://www.sofascore.com/es/jugador/nicolas-reniero/992523
Error al procesar el jugador https://www.sofascore.com/es/jugador/nicolas-reniero/992523: Player in path 992523 doesn't have enough information for this functions, try with another one.
El jugador en el path 992523 no tiene la información para estas funciones, pruebe con otro.
https://www.sofascore.com/es/jugador/matias-bergara/1514635
Error al procesar el jugador https://www.sofascore.com/es/jugador/matias-bergara/1514635: Player in path 1514635 doesn't have enough information for this functions, try with another one.
El jugador en el path 1514635 no tiene la información para 

# Shotmap

In [13]:
def shotmap(players, league, season, team, delay=10):
    dfs = []

    for player in players:
        try:
            # Extraer el número del jugador
            player_number = re.search(r'/players/(\d+)/', player).group(1)
            
            # Obtener el shotmap del jugador para la temporada especificada
            df = fotmob.get_player_shotmap("0", "0", player_number)
            
            # Extraer el nombre del jugador y formatearlo
            player_name = re.search(r'/players/\d+/([^/]+)', player).group(1).replace("-", " ").title()
            
            df.rename(columns={'player_name': 'player'}, inplace=True)
            df['league'] = league
            df['season'] = season
            df['team'] = team

            dfs.append(df)

            # Guardar el DataFrame en un archivo CSV
            # df.to_csv(f'shotmap_{player_name}.csv', index=False)
            
            # Esperar entre solicitudes para evitar problemas con el servidor
            time.sleep(delay)
        
        except Exception as e:
            print(f"Error al procesar el jugador {player}: {e}")
    
    shotmap = pd.concat(dfs, ignore_index=True)
    shotmap.to_csv('shotmap.csv', index=False)

    return

In [14]:
players = ('https://www.fotmob.com/es/players/209304/lucas-rodriguez',
           'https://www.fotmob.com/es/players/416311/juan-franco',
           'https://www.fotmob.com/es/players/523418/victor-cabrera',
           'https://www.fotmob.com/es/players/647928/fernando-alarcon',
           'https://www.fotmob.com/es/players/760273/jonathan-bay',
           'https://www.fotmob.com/es/players/1075045/miguel-brizuela',
           'https://www.fotmob.com/es/players/1098253/giuliano-cerato',
           'https://www.fotmob.com/es/players/1453094/gonzalo-requena',
           'https://www.fotmob.com/es/players/1616638/lautaro-carrera',
           'https://www.fotmob.com/es/players/571621/roberto-agustin-bochi',
           'https://www.fotmob.com/es/players/660079/matias-romero',
           'https://www.fotmob.com/es/players/811111/nicolas-barrientos',
           'https://www.fotmob.com/es/players/882460/jonas-acevedo',
           'https://www.fotmob.com/es/players/889715/brahian-cuello',
           'https://www.fotmob.com/es/players/917619/gaston-lodico',
           'https://www.fotmob.com/es/players/1201767/stefano-moreyra',
           'https://www.fotmob.com/es/players/1265160/franco-diaz',
           'https://www.fotmob.com/es/players/1436129/rodrigo-perez',
           'https://www.fotmob.com/es/players/1437366/damian-puebla',
           'https://www.fotmob.com/es/players/1585863/nicolas-dubersarsky',
           'https://www.fotmob.com/es/players/1609545/jeremias-lazaro',
           'https://www.fotmob.com/es/players/213106/silvio-romero',
           'https://www.fotmob.com/es/players/690398/santiago-rodriguez',
           'https://www.fotmob.com/es/players/971269/facundo-suarez',
           'https://www.fotmob.com/es/players/1200023/ignacio-russo',
           'https://www.fotmob.com/es/players/1366607/gregorio-rodriguez'
)

shotmap(players, league, season, team, delay=10)

Error al procesar el jugador https://www.fotmob.com/es/players/1616638/lautaro-carrera: Match in path 1616638 doesn't have enough information for this functions, try with another one.
El partido en el path 1616638 no tiene la información para estas funciones, pruebe con otro.


# Average Position

In [62]:
import LanusStats as ls  
sofascore = ls.SofaScore()
import re


def average(urls, league, season, delay=10):

    dfs = []

    for url in urls: 
        
        time.sleep(delay)
        
        avg_position = sofascore.get_players_average_positions(url)
    
        # Usando expresión regular para extraer el ID
        match = re.search(r'id:(\d+)', url)
        match_id = match.group(1)

        home_avg_position = avg_position[0]
        away_avg_position = avg_position[1]

        df = pd.concat([home_avg_position, away_avg_position], ignore_index=True)
        
        df['match'] = match_id
        df['league'] = league
        df['season'] = season

        dfs.append(df)

    # Guardar el DataFrame en un archivo CSV
    df = pd.concat(dfs, ignore_index=True)
    df.to_csv('average_position.csv', index=False)
    
    return

In [17]:
urls = ['https://www.sofascore.com/es/football/match/platense-instituto-de-cordoba/MYbsMLo#id:11937238']
league = 'Liga Profesional Argentina '
season = '2024'

average(urls, league, season, delay=10)

# Lineups

In [18]:
import re
import time
import pandas as pd
import LanusStats as ls  

sofascore = ls.SofaScore()

def lineups_handmade(urls, season, league, delay=10):
    dfs = []

    for url in urls:
        time.sleep(delay)

        lineups = sofascore.get_lineups(url)

        match = re.search(r'id:(\d+)', url)
        match_id = match.group(1) if match else 'unknown'

        home_formation = lineups['home']['formation']
        away_formation = lineups['away']['formation']

        # Lista vacía para almacenar los datos
        home = []
        away = []

        # Iterar sobre los jugadores y recopilar los datos
        for i in range(0, len(lineups['home']['players'])):
            name = lineups['home']['players'][i]['player']['name']
            jersey = lineups['home']['players'][i]['shirtNumber']
            position = lineups['home']['players'][i]['position']
            substitute = lineups['home']['players'][i]['substitute']
            minutes = lineups['home']['players'][i]['statistics'].get('minutesPlayed', 0)
            
            # Agregar los datos a la lista
            home.append([name, jersey, position, substitute, minutes])

        for i in range(0, len(lineups['away']['players'])):
            name = lineups['away']['players'][i]['player']['name']
            jersey = lineups['away']['players'][i]['shirtNumber']
            position = lineups['away']['players'][i]['position']
            substitute = lineups['away']['players'][i]['substitute']
            minutes = lineups['away']['players'][i]['statistics'].get('minutesPlayed', 0)
            
            # Agregar los datos a la lista
            away.append([name, jersey, position, substitute, minutes])

        # Crear un DataFrame a partir de las listas de datos

        match_data = sofascore.get_match_data(url)
        homeTeam_name = match_data['event']['homeTeam']['shortName']
        awayTeam_name = match_data['event']['awayTeam']['shortName']

        home_df = pd.DataFrame(home, columns=['player', 'jersey', 'position', 'substitute', 'minutes'])
        home_df['local'] = 'Home'
        home_df['team'] = homeTeam_name
        home_df['formation'] = home_formation

        away_df = pd.DataFrame(away, columns=['player', 'jersey', 'position', 'substitute', 'minutes'])
        away_df['local'] = 'Away'
        away_df['team'] = awayTeam_name
        away_df['formation'] = away_formation

        # Posiciones promedio
        avg_position = sofascore.get_players_average_positions(url)
        home_avg_position = avg_position[0]
        away_avg_position = avg_position[1]
        df_avg_position = pd.concat([home_avg_position, away_avg_position], ignore_index=True)
        
        df_avg_position = df_avg_position[['name', 'averageX', 'averageY']]
        df_avg_position.rename(columns={'name': 'player', 'averageX':'x', 'averageY':'y'}, inplace=True)

        # Crear un DataFrame a partir de la lista de datos
        df = pd.concat([home_df, away_df], ignore_index=True)
        df['match'] = match_id
        df['league'] = league
        df['season'] = season

        df_merged = pd.merge(df, df_avg_position, on='player', how='left')

        dfs.append(df_merged)

    # Concatenar todos los DataFrames en uno solo y guardar en un archivo CSV
    final_df = pd.concat(dfs, ignore_index=True)
    final_df.to_csv('lineup.csv', index=False)

    return final_df


In [19]:
urls = ['https://www.sofascore.com/es/football/match/platense-instituto-de-cordoba/MYbsMLo#id:11937238']

lineups_handmade(urls, season, league, delay=10)

Unnamed: 0,player,jersey,position,substitute,minutes,local,team,formation,match,league,season,x,y
0,Manuel Roffo,28,G,False,90,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,12.365217,47.206522
1,Juan Franco,2,D,False,90,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,47.026761,15.415493
2,Gonzalo Requena,31,D,False,90,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,32.114286,29.215476
3,Fernando Alarcón,6,D,False,90,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,34.76,64.8825
4,Lucas Rodríguez,18,D,False,72,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,43.289796,86.393878
5,Jonas Acevedo,8,M,False,86,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,65.693333,22.006667
6,Stefano Moreyra,34,M,False,72,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,42.769091,46.443636
7,Gastón Lódico,19,M,False,90,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,52.369072,51.162887
8,Damián Puebla,22,M,False,90,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,61.082222,67.504444
9,Santiago Rodríguez,11,F,False,86,Home,Instituto,4-4-2,11937238,Argentina Liga Profesional,2024,73.093333,35.976667


# Match

In [20]:
import pandas as pd
import time
import re  # Si necesitas usar expresiones regulares

def match(urls, season, league, delay=10):
    # Lista para almacenar los DataFrames
    dfs = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        match = re.search(r'id:(\d+)', url)
        match_id = match.group(1) if match else 'unknown'

        # Obtener match_data usando la URL
        match_data = sofascore.get_match_data(url)

        # Datos extraídos del match_data (ejemplo)
        homeTeam_name = match_data['event']['homeTeam']['shortName']
        homeTeam_id = match_data['event']['homeTeam']['id']
        homeScore = match_data['event']['homeScore']['display']
        homeScore_p1 = match_data['event']['homeScore']['period1']
        homeScore_p2 = match_data['event']['homeScore']['period2']

        awayTeam_name = match_data['event']['awayTeam']['shortName']
        awayTeam_id = match_data['event']['awayTeam']['id']
        awayScore = match_data['event']['awayScore']['display']
        awayScore_p1 = match_data['event']['awayScore']['period1']
        awayScore_p2 = match_data['event']['awayScore']['period2']

        round_info = match_data['event']['roundInfo']
        tournament = match_data['event']['tournament']['name']
        year = match_data['event']['season']['year']
        stadium = match_data['event']['venue']['stadium']['name']
        country = match_data['event']['venue']['country']['name']
        city = match_data['event']['venue']['city']['name']
        date_time = match_data['event']['startTimestamp']

        # Crear un diccionario con los datos
        data = {
            'match' : [match_id],
            'homeTeam_name': [homeTeam_name],
            'homeTeam_id': [homeTeam_id],
            'homeScore': [homeScore],
            'homeScore_p1': [homeScore_p1],
            'homeScore_p2': [homeScore_p2],
            'awayTeam_name': [awayTeam_name],
            'awayTeam_id': [awayTeam_id],
            'awayScore': [awayScore],
            'awayScore_p1': [awayScore_p1],
            'awayScore_p2': [awayScore_p2],
            'round': [round_info],
            'tournament': [tournament],
            'year': [year],
            'stadium': [stadium],
            'country': [country],
            'city': [city],
            'date_time': [date_time],
            'season': [season],
            'league': [league]
        }

        # Convertir el diccionario en un DataFrame
        df = pd.DataFrame(data)

        # Añadir el DataFrame a la lista
        dfs.append(df)

    # Concatenar todos los DataFrames en uno solo
    final_df = pd.concat(dfs, ignore_index=True)

    # Guardar el DataFrame en un archivo CSV si es necesario
    final_df.to_csv('match.csv', index=False)

    return final_df


# Result

In [21]:
import pandas as pd
import time
import re  # Si necesitas usar expresiones regulares

def result(urls, season, league, delay=10):
    # Lista para almacenar los DataFrames
    dfs = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        match = re.search(r'id:(\d+)', url)
        match_id = match.group(1) if match else 'unknown'

        # Obtener match_data usando la URL
        match_data = sofascore.get_match_data(url)

        # Datos extraídos del match_data (ejemplo)
        homeTeam_name = match_data['event']['homeTeam']['shortName']
        homeTeam_id = match_data['event']['homeTeam']['id']
        homeScore = match_data['event']['homeScore']['display']

        awayTeam_name = match_data['event']['awayTeam']['shortName']
        awayTeam_id = match_data['event']['awayTeam']['id']
        awayScore = match_data['event']['awayScore']['display']

        # Crear un diccionario con los datos
        data_home = {
            'match' : [match_id],
            'team': [homeTeam_name],
            'team_id': [homeTeam_id],
            'score_pos': [homeScore],
            'score_neg': [awayScore],
            'win': [homeScore] > [awayScore],
            'draw': [homeScore] == [awayScore],
            'loose': [homeScore] < [awayScore],
            'local': 'Home',
            'season': [season],
            'league': [league]
        }

        data_away = {
            'match' : [match_id],
            'team': [awayTeam_name],
            'team_id': [awayTeam_id],
            'score_pos': [awayScore],
            'score_neg': [homeScore],
            'win': [homeScore] < [awayScore],
            'draw': [homeScore] == [awayScore],
            'loose': [homeScore] > [awayScore],
            'local': 'Away',
            'season': [season],
            'league': [league]
        }

        # Convertir el diccionario en un DataFrame
        df_home = pd.DataFrame(data_home)
        df_away = pd.DataFrame(data_away)
        df = pd.concat([df_home, df_away], ignore_index=True)

        # Añadir el DataFrame a la lista
        dfs.append(df)

    # Concatenar todos los DataFrames en uno solo
    final_df = pd.concat(dfs, ignore_index=True)

    # Guardar el DataFrame en un archivo CSV si es necesario
    final_df.to_csv('result.csv', index=False)

    return final_df

In [22]:
urls = ['https://www.sofascore.com/es/football/match/platense-instituto-de-cordoba/MYbsMLo#id:11937238']

result(urls, season, league, delay=10)

Unnamed: 0,match,team,team_id,score_pos,score_neg,win,draw,loose,local,season,league
0,11937238,Instituto,4937,2,0,True,False,False,Home,2024,Argentina Liga Profesional
1,11937238,Platense,36837,0,2,False,False,True,Away,2024,Argentina Liga Profesional


# URLs

## Sofascore

### In: Liga - Out: Partidos

In [139]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def sofascoreResults(url):
    
    if not url.endswith(',tab:matches'):
        url += ',tab:matches'
    
    
    # Configura el driver
    driver = webdriver.Chrome()  # O usa `webdriver.Firefox()` si estás usando Firefox
    results = {}
    
    try:
        # Ir a la URL
        driver.get(url)

        # Esperar a que la pestaña "Por Rondas" esté disponible y hacer clic en ella
        wait = WebDriverWait(driver, 30)
        rounds_tab = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div[data-tabid="2"]')))
        rounds_tab.click()

        # Esperar a que el contenido de la pestaña "Por Rondas" se cargue
        wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[data-testid="event_cell"]')))

        # Obtener el texto de la temporada
        season_element = driver.find_element(By.CSS_SELECTOR, 'div.Box.Flex.eJCdjm.bnpRyo .Text.nZQAT')
        season_text = season_element.text
        
        # Extraer el número de temporada del texto
        season = season_text.split(' ')[-1]  # Obtiene el último elemento después del espacio

        # Función para extraer enlaces de la ronda actual
        def extract_links_from_current_round():
            event_cells = driver.find_elements(By.CSS_SELECTOR, '[data-testid="event_cell"]')
            links = []
            for cell in event_cells:
                href = cell.get_attribute('href')
                if href:
                    links.append(href)
            return links

        # Inicializar variables
        round_number = None
        
        while True:
            # Obtener el texto de la ronda
            round_container = driver.find_element(By.CSS_SELECTOR, 'div.Box.gRmPLj')
            round_items = round_container.find_elements(By.CSS_SELECTOR, 'div.Text.nZQAT')
            
            selected_round = None
            for item in round_items:
                round_text = item.text
                if 'Ronda' in round_text:
                    selected_round = round_text
                    break

            # Extraer el número de ronda del texto
            current_round_number = selected_round.split(' ')[-1] if selected_round else 'No encontrada'
            
            if current_round_number == 'No encontrada':
                break
            
            round_number = int(current_round_number)
            
            # Guardar los enlaces de la ronda actual
            results[round_number] = {
                'season': season,
                'links': extract_links_from_current_round()
            }

            # Verificar si hemos llegado a la ronda 1 y salir del bucle si es así
            if round_number <= 1:
                break

            # Intentar ir a la ronda anterior
            try:
                previous_round_button = driver.find_element(By.CSS_SELECTOR, 'button.Button.iCnTrv')
                previous_round_button.click()
                # Esperar un momento para que la página cargue
                WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[data-testid="event_cell"]')))
            except Exception as e:
                print(f"Error al ir a la ronda anterior: {e}")
                break

    finally:
        # Cerrar el navegador
        driver.quit()
    
    return results

In [25]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def sofascoreResults(url):
    
    if not url.endswith(',tab:matches'):
        url += ',tab:matches'
    
    
    # Configura el driver
    driver = webdriver.Chrome()  # O usa `webdriver.Firefox()` si estás usando Firefox
    results = {}
    
    try:
        # Ir a la URL
        driver.get(url)

        # Esperar a que la pestaña "Por Rondas" esté disponible y hacer clic en ella
        wait = WebDriverWait(driver, 30)
        rounds_tab = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div[data-tabid="2"]')))
        rounds_tab.click()

        # Esperar a que el contenido de la pestaña "Por Rondas" se cargue
        wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[data-testid="event_cell"]')))

        # Obtener el texto de la temporada
        season_element = driver.find_element(By.CSS_SELECTOR, 'div.Box.Flex.eJCdjm.bnpRyo .Text.nZQAT')
        season_text = season_element.text
        
        # Extraer el número de temporada del texto
        season = season_text.split(' ')[-1]  # Obtiene el último elemento después del espacio

        # Función para extraer enlaces de la ronda actual
        def extract_links_from_current_round():
            event_cells = driver.find_elements(By.CSS_SELECTOR, '[data-testid="event_cell"]')
            links = []
            for cell in event_cells:
                href = cell.get_attribute('href')
                if href:
                    links.append(href)
            return links

        # Inicializar variables
        round_number = None
        
        while True:
            # Obtener el texto de la ronda
            round_container = driver.find_element(By.CSS_SELECTOR, 'div.Box.gRmPLj')
            round_items = round_container.find_elements(By.CSS_SELECTOR, 'div.Text.nZQAT')
            
            selected_round = None
            for item in round_items:
                round_text = item.text
                if 'Ronda' in round_text:
                    selected_round = round_text
                    break

            # Extraer el número de ronda del texto
            current_round_number = selected_round.split(' ')[-1] if selected_round else 'No encontrada'
            
            if current_round_number == 'No encontrada':
                break
            
            round_number = int(current_round_number)
            
            # Guardar los enlaces de la ronda actual
            results = {
                'round' : round_number,
                'season': season,
                'links': extract_links_from_current_round()
            }

            # Verificar si hemos llegado a la ronda 1 y salir del bucle si es así
            if round_number <= 1:
                break

            # Intentar ir a la ronda anterior
            try:
                previous_round_button = driver.find_element(By.CSS_SELECTOR, 'button.Button.iCnTrv')
                previous_round_button.click()
                # Esperar un momento para que la página cargue
                WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[data-testid="event_cell"]')))
            except Exception as e:
                print(f"Error al ir a la ronda anterior: {e}")
                break

    finally:
        # Cerrar el navegador
        driver.quit()
    
    return results

In [87]:
url = 'https://www.sofascore.com/es/torneo/futbol/argentina/liga-profesional-de-futbol/155#id:57478,tab:matches'

results = sofascoreResults(url)
results

{'round': 1,
 'season': '2024',
 'links': ['https://www.sofascore.com/es/football/match/sarmiento-instituto-de-cordoba/MYbsNWq#id:11937117',
  'https://www.sofascore.com/es/football/match/rosario-central-argentinos-juniors/qobsrob#id:11937112',
  'https://www.sofascore.com/es/football/match/platense-newells-old-boys/mobsMLo#id:11937121',
  'https://www.sofascore.com/es/football/match/defensa-y-justicia-huracan/EcdsOLo#id:11937119',
  'https://www.sofascore.com/es/football/match/barracas-central-godoy-cruz/zwcstnB#id:11937116',
  'https://www.sofascore.com/es/football/match/talleres-independiente/jobskob#id:11937122',
  'https://www.sofascore.com/es/football/match/central-cordoba-river-plate/lobsBnB#id:11937110',
  'https://www.sofascore.com/es/football/match/deportivo-riestra-san-lorenzo/bobsyUAb#id:11937114',
  'https://www.sofascore.com/es/football/match/tigre-estudiantes-de-la-plata/gobsDcd#id:11937113',
  'https://www.sofascore.com/es/football/match/racing-club-belgrano/dobspob#id:

### In: Liga - Out: Equipos

In [7]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def sofascoreURLteam(url):
    
    sofascoreTeam = []
    repetidos = []
    j = 0

    # Extraer tournament_id y season_id de la URL
    parts = url.rstrip('/').split('/')
    tournament_id = parts[-1].split('#id:')[0]
    season_id = parts[-1].split('#id:')[1]

    # Construir la URL de la API
    api_url = f'https://www.sofascore.com/api/v1/unique-tournament/{tournament_id}/season/{season_id}/standings/total'

    try:
        # Realizar la solicitud GET
        response = requests.get(api_url)
        response.raise_for_status()
        data = response.json()

        league = data['standings'][0]['tournament']['name']
        country = data['standings'][0]['tournament']['category']['name']
        season = datetime.fromtimestamp(data['standings'][0]['updatedAtTimestamp']).year

        # Extraer los nombres de los equipos
        teams_name = [row['team']['name'] for row in data['standings'][0]['rows']]
        teams_id = [row['team']['id'] for row in data['standings'][0]['rows']]

        # Obtener enlaces de la misma URL de la API
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']

            if href not in repetidos:
                repetidos.append(href)

                if '/es/equipo/futbol/' in href:
                    full_link = 'https://www.sofascore.com' + href

                    if j < len(teams_name):
                        sofascoreTeam_dic = {
                            'team': teams_name[j],
                            'id': teams_id[j],
                            'logo': f'https://api.sofascore.app/api/v1/team/{teams_id[j]}/image',
                            'league': league,
                            'country': country,
                            'season': season,
                            'link': full_link
                        }
                        sofascoreTeam.append(sofascoreTeam_dic)
                        j += 1

    except requests.exceptions.RequestException as e:
        print(f'Error durante la solicitud: {e}')
    
    return sofascoreTeam

In [36]:
url = 'https://www.sofascore.com/es/torneo/futbol/argentina/liga-profesional-de-futbol/155#id:57478'
sofascoreTeam = sofascoreURLteam(url)
sofascoreTeam

[{'team': 'Racing Club',
  'id': 3215,
  'logo': 'https://api.sofascore.app/api/v1/team/3215/image',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.sofascore.com/es/equipo/futbol/racing-club/3215'},
 {'team': 'Huracán',
  'id': 7629,
  'logo': 'https://api.sofascore.app/api/v1/team/7629/image',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.sofascore.com/es/equipo/futbol/huracan/7629'},
 {'team': 'Unión Santa Fé',
  'id': 3204,
  'logo': 'https://api.sofascore.app/api/v1/team/3204/image',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.sofascore.com/es/equipo/futbol/club-atletico-union/3204'},
 {'team': 'Atlético Tucumán',
  'id': 36833,
  'logo': 'https://api.sofascore.app/api/v1/team/36833/image',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.sofascore.com/es/equipo/futbol/atleti

### In: Equipo - Out: Jugadores

In [79]:
import requests
from bs4 import BeautifulSoup

def sofascoreURLplayer_handmade(urls, season, league, delay=5):
    sofascorePlayer = []
    for url in urls:

        time.sleep(delay)  # Respetar el retraso entre solicitudes

        team = url.rstrip('/').split('/')[-2].title()

        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']
            if '/es/jugador/' in href:
                full_link = 'https://www.sofascore.com' + href
                id = href.rstrip('/').split('/')[-1]
                name = href.rstrip('/').split('/')[-2].replace('-', ' ').title()
                profile = f'https://api.sofascore.app/api/v1/player/{id}/image'
                
                sofascorePlayer_dic = {'name': name, 'id':id, 'profile': profile, 'team': team, 'league': league, 'season': season, 'link': full_link}
                sofascorePlayer.append(sofascorePlayer_dic)
            
    return sofascorePlayer


In [58]:
urls = ['https://www.sofascore.com/es/equipo/futbol/huracan/7629']
league= 'LPA'
sofascorePlayer = sofascoreURLplayer_handmade(urls, season, league, delay=10)
sofascorePlayer

[{'name': 'Ramon Abila',
  'id': '788935',
  'profile': 'https://api.sofascore.app/api/v1/player/788935/image',
  'team': 'Huracan',
  'league': 'LPA',
  'season': '2024',
  'link': 'https://www.sofascore.com/es/jugador/ramon-abila/788935'},
 {'name': 'Leandro Garate',
  'id': '364980',
  'profile': 'https://api.sofascore.app/api/v1/player/364980/image',
  'team': 'Huracan',
  'league': 'LPA',
  'season': '2024',
  'link': 'https://www.sofascore.com/es/jugador/leandro-garate/364980'},
 {'name': 'Perez Marcelo',
  'id': '980586',
  'profile': 'https://api.sofascore.app/api/v1/player/980586/image',
  'team': 'Huracan',
  'league': 'LPA',
  'season': '2024',
  'link': 'https://www.sofascore.com/es/jugador/perez-marcelo/980586'},
 {'name': 'Nicolas Cordero',
  'id': '860051',
  'profile': 'https://api.sofascore.app/api/v1/player/860051/image',
  'team': 'Huracan',
  'league': 'LPA',
  'season': '2024',
  'link': 'https://www.sofascore.com/es/jugador/nicolas-cordero/860051'},
 {'name': 'Eri

### Ejecución

In [56]:
import requests
from bs4 import BeautifulSoup

def sofascoreURLplayer(teams, delay=5):
    
    sofascorePlayer = []
    repetidos = []

    for i in range(0,len(teams)):

        time.sleep(delay)  # Respetar el retraso entre solicitudes

        team = teams[i]['team']
        season = teams[i]['season']
        league = teams[i]['league']
        country = teams[i]['country']
        url = teams[i]['link']

        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']

            if href not in repetidos:
                repetidos.append(href)

                if '/es/jugador/' in href:
                    full_link = 'https://www.sofascore.com' + href
                    id = href.rstrip('/').split('/')[-1]
                    name = href.rstrip('/').split('/')[-2].replace('-', ' ').title()
                    profile = f'https://api.sofascore.app/api/v1/player/{id}/image'
                    
                    sofascorePlayer_dic = {'name': name, 'id':id, 'profile': profile, 'team': team, 'league': league, 'country': country, 'season': season, 'link': full_link}
                    sofascorePlayer.append(sofascorePlayer_dic)
            
    return sofascorePlayer

In [81]:
url = 'https://www.sofascore.com/es/torneo/futbol/argentina/liga-profesional-de-futbol/155#id:57478'

results = sofascoreResults(url)
df_sofascoreResults = pd.DataFrame(results)

sofascoreTeam = sofascoreURLteam(url)
df_sofascoreTeam = pd.DataFrame(sofascoreTeam)

sofascorePlayer = sofascoreURLplayer(sofascoreTeam)
df_sofascorePlayer = pd.DataFrame(sofascorePlayer)

In [57]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def sofascoreResults(url):
    
    if not url.endswith(',tab:matches'):
        url += ',tab:matches'
    
    # Configura el driver
    driver = webdriver.Chrome()  # O usa `webdriver.Firefox()` si estás usando Firefox
    all_results = []  # Lista para almacenar cada enlace como una fila separada
    
    try:
        # Ir a la URL
        driver.get(url)

        # Esperar a que la pestaña "Por Rondas" esté disponible y hacer clic en ella
        wait = WebDriverWait(driver, 30)
        rounds_tab = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div[data-tabid="2"]')))
        rounds_tab.click()

        # Esperar a que el contenido de la pestaña "Por Rondas" se cargue
        wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[data-testid="event_cell"]')))

        # Obtener el texto de la temporada
        season_element = driver.find_element(By.CSS_SELECTOR, 'div.Box.Flex.eJCdjm.bnpRyo .Text.nZQAT')
        season_text = season_element.text
        
        # Extraer el número de temporada del texto
        season = season_text.split(' ')[-1]  # Obtiene el último elemento después del espacio

        # Inicializar variables
        round_number = None
        
        while True:
            # Obtener el texto de la ronda
            round_container = driver.find_element(By.CSS_SELECTOR, 'div.Box.gRmPLj')
            round_items = round_container.find_elements(By.CSS_SELECTOR, 'div.Text.nZQAT')
            
            selected_round = None
            for item in round_items:
                round_text = item.text
                if 'Ronda' in round_text:
                    selected_round = round_text
                    break

            # Extraer el número de ronda del texto
            current_round_number = selected_round.split(' ')[-1] if selected_round else 'No encontrada'
            
            if current_round_number == 'No encontrada':
                break
            
            round_number = int(current_round_number)
            
            # Extraer los enlaces de la ronda actual
            event_cells = driver.find_elements(By.CSS_SELECTOR, '[data-testid="event_cell"]')
            for cell in event_cells:
                href = cell.get_attribute('href')
                if href and 'summary' not in href:  # Excluir enlaces que contengan 'summary'
                    round_result = {
                        'round': round_number,
                        'season': season,
                        'link': href
                    }
                    all_results.append(round_result)

            # Verificar si hemos llegado a la ronda 1 y salir del bucle si es así
            if round_number <= 1:
                break

            # Intentar ir a la ronda anterior
            try:
                previous_round_button = driver.find_element(By.CSS_SELECTOR, 'button.Button.iCnTrv')
                previous_round_button.click()
                # Esperar un momento para que la página cargue
                WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[data-testid="event_cell"]')))
            except Exception as e:
                print(f"Error al ir a la ronda anterior: {e}")
                break

    finally:
        # Cerrar el navegador
        driver.quit()
    
    return all_results

In [159]:
def get_lineups(id):
    api_url = f'https://www.sofascore.com/api/v1/event/{id}/lineups'
    response = requests.get(api_url)
    response.raise_for_status()
    return response.json()

def get_average_positions(id):
    api_url = f'https://www.sofascore.com/api/v1/event/{id}/average-positions'
    response = requests.get(api_url)
    response.raise_for_status()
    return response.json()

def get_event_data(id):
    api_url = f'https://www.sofascore.com/api/v1/event/{id}'
    response = requests.get(api_url)
    response.raise_for_status()
    return response.json()

In [145]:
https://www.sofascore.com/api/v1/event/11937261

In [146]:
average_positions = get_average_positions(11937261)

{'home': [{'player': {'name': 'Franco Minerva',
    'firstName': 'Franco Minerva',
    'slug': 'franco-minerva',
    'shortName': 'F. Minerva',
    'position': 'M',
    'jerseyNumber': '29',
    'userCount': 38,
    'id': 1650800},
   'averageX': 70.935294117647,
   'averageY': 74.098039215686,
   'pointsCount': 51},
  {'player': {'name': 'Guido Mainero',
    'slug': 'guido-mainero',
    'shortName': 'G. Mainero',
    'position': 'M',
    'jerseyNumber': '7',
    'userCount': 31,
    'id': 895232},
   'averageX': 66.595652173913,
   'averageY': 17.424637681159,
   'pointsCount': 69},
  {'player': {'name': 'Augusto Lotti',
    'slug': 'augusto-lotti',
    'shortName': 'A. Lotti',
    'position': 'F',
    'jerseyNumber': '33',
    'userCount': 84,
    'id': 824456},
   'averageX': 68.683333333333,
   'averageY': 53.766666666667,
   'pointsCount': 30},
  {'player': {'name': 'Ignacio Vazquez',
    'slug': 'ignacio-vazquez',
    'shortName': 'I. Vazquez',
    'position': 'D',
    'jerseyNum

In [181]:
match_id = 11937261
average_positions = get_average_positions(match_id)
j=0
for j in range (0,5):
    averageX = average_positions['home'][j]['averageX']
    averageY = average_positions['home'][j]['averageY']

In [182]:
len(average_positions['home'])

16

In [184]:
average_positions['home'][16]

IndexError: list index out of range

In [144]:
tupla = sofascore.get_players_average_positions('https://www.sofascore.com/es/football/match/barracas-central-platense/MLostnB#id:11937261')
tupla[0]

Unnamed: 0,name,firstName,slug,shortName,position,jerseyNumber,userCount,id,lastName,averageX,averageY,pointsCount,team
0,Franco Minerva,Franco Minerva,franco-minerva,F. Minerva,M,29,38,1650800,,70.935294,74.098039,51,Platense
1,Guido Mainero,,guido-mainero,G. Mainero,M,7,31,895232,,66.595652,17.424638,69,Platense
2,Augusto Lotti,,augusto-lotti,A. Lotti,F,33,84,824456,,68.683333,53.766667,30,Platense
3,Ignacio Vazquez,,ignacio-vazquez,I. Vazquez,D,13,43,883922,,41.474545,37.970909,55,Platense
4,Carlos Villalba,,carlos-villalba,C. Villalba,M,5,46,975704,,63.858065,57.251613,31,Platense
5,Gastón Suso,,gaston-suso,G. Suso,D,6,37,850925,,34.630303,65.254545,33,Platense
6,Juan Cozzani,,juan-cozzani,J. Cozzani,G,31,27,874969,,13.83,47.63,30,Platense
7,Oscar Salomón,,oscar-salomon,O. Salomón,D,24,34,992226,,59.463636,17.618182,11,Platense
8,Franco Baldassarra,,franco-baldassarra,F. Baldassarra,M,11,34,981340,,71.611111,75.055556,9,Platense
9,Sasha Marcich,,sasha-marcich,S. Marcich,D,18,56,992126,,65.926866,81.959701,67,Platense


In [132]:
match_id = 11937261
print(f'https://www.sofascore.com/api/v1/event/{match_id}/average-positions')

https://www.sofascore.com/api/v1/event/11937261/average-positions


In [189]:
import re
import time
import pandas as pd
import LanusStats as ls  

sofascore = ls.SofaScore()

def lineups(matches, delay=5):
    dfs = []

    for i in range(0,len(matches)):
        time.sleep(delay)

        match = re.search(r'id:(\d+)', matches[i]['link'])
        match_id = match.group(1) if match else 'unknown'
        
        try:
            lineups = get_lineups(match_id)
            average_positions = get_average_positions(match_id)
            data = get_event_data(match_id)

            status = data['event']['status']['type']

            if status == 'finished':

                league = data['event']['tournament']['name']
                country = data['event']['tournament']['category']['name']
                season = data['event']['season']['year']

                season = matches[i]['season']
                round = data['event']['roundInfo']['round']

                home_formation = lineups['home']['formation']
                away_formation = lineups['away']['formation']

                # Positions home
                home_groups_formation = home_formation.split('-')
                home_def = int(home_groups_formation[0])
                home_ata = int(home_groups_formation[-1])

                if len(home_groups_formation)==4:
                    home_mid_0 = int(home_groups_formation[1])
                    home_mid_1 = 0
                    home_mid_2 = int(home_groups_formation[2])
                else:
                    home_mid_0 = 0
                    home_mid_1 = int(home_groups_formation[1])
                    home_mid_2 = 0

                # Positions away
                away_groups_formation = away_formation.split('-')
                away_def = int(away_groups_formation[0])
                away_ata = int(away_groups_formation[-1])

                if len(away_groups_formation)==4:
                    away_mid_0 = int(away_groups_formation[1])
                    away_mid_1 = 0
                    away_mid_2 = int(away_groups_formation[2])
                else:
                    away_mid_0 = 0
                    away_mid_1 = int(away_groups_formation[1])
                    away_mid_2 = 0
            

                # Lista vacía para almacenar los datos
                home = []
                away = []
                home_avg = []
                away_avg = []

                # Iterar sobre los jugadores y recopilar los datos
                for j in range(0, len(lineups['home']['players'])):
                    name = lineups['home']['players'][j]['player']['name']
                    id = lineups['home']['players'][j]['player']['id']
                    jersey = lineups['home']['players'][j]['shirtNumber']
                    position = lineups['home']['players'][j].get('position', '')
                    substitute = lineups['home']['players'][j]['substitute']
                    if 'statistics' in lineups['home']['players'][j]:
                        minutes = lineups['home']['players'][j]['statistics'].get('minutesPlayed', 0)
                    else:
                        minutes = 0  # Valor predeterminado si no hay estadísticas disponibles))

                    if j < len(average_positions['home']):
                        avg_id = average_positions['home'][j]['player']['id']
                        averageX = average_positions['home'][j]['averageX']
                        averageY = average_positions['home'][j]['averageY']
                        pointsCount = average_positions['home'][j]['pointsCount']

                    order = j+1
                    
                    home_por = 1

                    if order == home_por:
                        line = 'por'
                        lat = '1/1'
                        pos = 'POR'
                    elif order <= home_por + home_def:
                        line = 'def'
                        lat = f'{order - home_por}/{home_def}'
                        pos = 'DEF'
                    elif order <= home_por + home_def + home_mid_0:
                        line = 'mid_0'
                        lat = f'{order - home_por - home_def}/{home_mid_0}'
                        pos = 'MED'
                    elif order <= home_por + home_def + home_mid_0 + home_mid_1:
                        line = 'mid_1'
                        lat = f'{order - home_por - home_def - home_mid_0}/{home_mid_1}'
                        pos = 'MED'
                    elif order <= home_por + home_def + home_mid_0 + home_mid_1 + home_mid_2:
                        line = 'mid_2'
                        lat = f'{order - home_por - home_def - home_mid_0 - home_mid_1}/{home_mid_2}'
                        pos = 'MED'
                    elif order <= home_por + home_def + home_mid_0 + home_mid_1 + home_mid_2 + home_ata:
                        line = 'ata'
                        lat = f'{order - home_por - home_def - home_mid_0 - home_mid_1 - home_mid_2}/{home_ata}'
                        pos = 'ATA'
                    elif order > 11 and substitute == True:
                        line = None
                        lat = None
                        pos = 'SUS'
                    elif order > 11 and substitute == False:
                        line = None
                        lat = None
                        pos = 'RES'       
                    else:
                        line = None
                        lat = None
                        pos = None
                    
                    # Agregar los datos a la lista
                    home.append([name, id, jersey, position, substitute, minutes, order, line, lat, pos])
                    home_avg.append([avg_id, averageX, averageY, pointsCount])

                for k in range(0, len(lineups['away']['players'])):
                    name = lineups['away']['players'][k]['player']['name']
                    id = lineups['away']['players'][k]['player']['id']
                    jersey = lineups['away']['players'][k]['shirtNumber']
                    position = lineups['away']['players'][k].get('position', '')
                    substitute = lineups['away']['players'][k]['substitute']
                    if 'statistics' in lineups['away']['players'][k]:
                        minutes = lineups['away']['players'][k]['statistics'].get('minutesPlayed', 0)
                    else:
                        minutes = 0  # Valor predeterminado si no hay estadísticas disponibles)
                    
                    if k < len(average_positions['away']):
                        avg_id = average_positions['away'][k]['player']['id']
                        averageX = average_positions['away'][k]['averageX']
                        averageY = average_positions['away'][k]['averageY']
                        pointsCount = average_positions['away'][k]['pointsCount']                    
                                        
                    order = k+1
                    
                    away_por = 1

                    if order == away_por:
                        line = 'por'
                        lat = '1/1'
                        pos = 'POR'
                    elif order <= away_por + away_def:
                        line = 'def'
                        lat = f'{order - away_por}/{away_def}'
                        pos = 'DEF'
                    elif order <= away_por + away_def + away_mid_0:
                        line = 'med_0'
                        lat = f'{order - away_por - away_def}/{away_mid_0}'
                        pos = 'MED'
                    elif order <= away_por + away_def + away_mid_0 + away_mid_1:
                        line = 'med_1'
                        lat = f'{order - away_por - away_def - away_mid_0}/{away_mid_1}'
                        pos = 'MED'
                    elif order <= away_por + away_def + away_mid_0 + away_mid_1 + away_mid_2:
                        line = 'med_2'
                        lat = f'{order - away_por - away_def - away_mid_0 - away_mid_1}/{away_mid_2}'
                        pos = 'MED'
                    elif order <= away_por + away_def + away_mid_0 + away_mid_1 + away_mid_2 + away_ata:
                        line = 'ata'
                        lat = f'{order - away_por - away_def - away_mid_0 - away_mid_1 - away_mid_2}/{away_ata}'
                        pos = 'ATA'
                    elif order > 11 and substitute == True:
                        line = None
                        lat = None
                        pos = 'SUS'
                    elif order > 11 and substitute == False:
                        line = None
                        lat = None
                        pos = 'RES'       
                    else:
                        line = None
                        lat = None
                        pos = None

                    # Agregar los datos a la lista
                    away.append([name, id, jersey, position, substitute, minutes, order, line, lat, pos])
                    away_avg.append([avg_id, averageX, averageY, pointsCount])

                # Crear un DataFrame a partir de las listas de datos

                home_df = pd.DataFrame(home, columns=['player', 'id', 'jersey', 'position', 'substitute', 'minutes', 'order', 'line', 'lat', 'pos'])
                home_df['local'] = 'Home'
                home_df['team'] = data['event']['homeTeam']['shortName']
                home_df['formation'] = home_formation
                home_df['defense'] = home_def
                home_df['midfield'] = home_mid_0 + home_mid_1 + home_mid_2
                home_df['attack'] = home_ata

                away_df = pd.DataFrame(away, columns=['player', 'id', 'jersey', 'position', 'substitute', 'minutes', 'order', 'line', 'lat', 'pos'])
                away_df['local'] = 'Away'
                away_df['team'] = data['event']['awayTeam']['shortName']
                away_df['formation'] = away_formation
                away_df['defense'] = away_def
                away_df['midfield'] = away_mid_0 + away_mid_1 + away_mid_2
                away_df['attack'] = away_ata

                home_avg_position = pd.DataFrame(home_avg, columns=['avg_id', 'averageX', 'averageY', 'pointsCount'])
                away_avg_position = pd.DataFrame(away_avg, columns=['avg_id', 'averageX', 'averageY', 'pointsCount'])
              
                # Posiciones promedio
 
                df_avg_position = pd.concat([home_avg_position, away_avg_position], ignore_index=True)
                df_avg_position.rename(columns={'avg_id': 'id'}, inplace=True)
              
                # Crear un DataFrame a partir de la lista de datos
                df = pd.concat([home_df, away_df], ignore_index=True)
                df['match'] = match_id
                df['league'] = league
                df['season'] = season
                df['country'] = country
                df['round'] = round

                df_merged = pd.merge(df, df_avg_position, on='id', how='left')

                dfs.append(df_merged)

        except Exception as e:
            print(f"Error en la alineación del partido: {e}")

    # Concatenar todos los DataFrames en uno solo y guardar en un archivo CSV
    final_df = pd.concat(dfs, ignore_index=True)
    final_df.to_csv('lineup.csv', index=False)

    return final_df


In [59]:
url = 'https://www.sofascore.com/es/torneo/futbol/argentina/liga-profesional-de-futbol/155#id:57478'

matches = sofascoreResults(url)

In [190]:
lineups(matches)

Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937267/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937276/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937274/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937265/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937275/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937277/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.com/api/v1/event/11937268/lineups
Error en la alineación del partido: 404 Client Error: Not Found for url: https://www.sofascore.co

Unnamed: 0,player,id,jersey,position,substitute,minutes,order,line,lat,pos,...,midfield,attack,match,league,season,country,round,averageX,averageY,pointsCount
0,Juan Cozzani,874969,31,G,False,90,1,por,1/1,POR,...,5,1,11937261,Liga Profesional,2024,Argentina,11,13.830000,47.630000,30.0
1,Bautista Barros Schelotto,1085929,4,D,False,78,2,def,1/4,DEF,...,5,1,11937261,Liga Profesional,2024,Argentina,11,48.172340,15.806383,47.0
2,Ignacio Vazquez,883922,13,D,False,90,3,def,2/4,DEF,...,5,1,11937261,Liga Profesional,2024,Argentina,11,41.474545,37.970909,55.0
3,Gastón Suso,850925,6,D,False,90,4,def,3/4,DEF,...,5,1,11937261,Liga Profesional,2024,Argentina,11,34.630303,65.254545,33.0
4,Sasha Marcich,992126,18,D,False,90,5,def,4/4,DEF,...,5,1,11937261,Liga Profesional,2024,Argentina,11,65.926866,81.959701,67.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9295,Gabriel Aranda,1131027,47,D,True,0,19,,,SUS,...,5,1,11937118,Liga Profesional,2024,Argentina,1,,,
9296,Franco Quinteros,926415,3,M,True,0,20,,,SUS,...,5,1,11937118,Liga Profesional,2024,Argentina,1,,,
9297,Lautaro Villegas,1114489,38,M,True,0,21,,,SUS,...,5,1,11937118,Liga Profesional,2024,Argentina,1,,,
9298,Ezequiel Cañete,1017455,24,M,True,0,22,,,SUS,...,5,1,11937118,Liga Profesional,2024,Argentina,1,,,


## Fotmob

### In: Liga - Out: Equipos

In [2]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def fotmobURLteam(urls, delay=10):
    
    fotmobTeam = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        parts = url.rstrip('/').split('/')
        leagues_index = parts.index('leagues')
        tournament_id = parts[leagues_index + 1]

        # Construir la URL de la API
        api_url = f'https://www.fotmob.com/api/tltable?leagueId={tournament_id}'

        try:
            # Realizar la solicitud GET
            response = requests.get(api_url)
            response.raise_for_status()
            data = response.json()
       
            league = data[0]['data']['leagueName']
            country = data[0]['data']['ccode']
            season = datetime.now().year

            for i in range(0,len(data[0]['data']['table']['all'])):

                name = data[0]['data']['table']['all'][i]['name']
                id = data[0]['data']['table']['all'][i]['id']
                logo = f'https://images.fotmob.com/image_resources/logo/teamlogo/{id}_xsmall.png'
                link = 'https://www.fotmob.com/es'+data[0]['data']['table']['all'][0]['pageUrl']

                fotmobTeam_dic = {
                                'team': name,
                                'id': id,
                                'logo': logo,
                                'league': league,
                                'country': country,
                                'season': season,
                                'link': link
                                            }

                fotmobTeam.append(fotmobTeam_dic)

        except requests.exceptions.RequestException as e:
            print(f'Error durante la solicitud: {e}')
    
        return fotmobTeam

In [None]:
urls = ['https://www.fotmob.com/es/leagues/112/overview/liga-profesional']
fotmobTeam = fotmobURLteam(urls)

### In: Equipos - Out: Jugadores

In [4]:
import requests
from bs4 import BeautifulSoup

def fotmobURLplayer(urls, season, league, delay=10):
    fotmobPlayer = []
    for url in urls:

        time.sleep(delay)  # Respetar el retraso entre solicitudes

        url = url.replace('overview', 'squad')
        team = url.rstrip('/').split('/')[-1].title()
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']
            if '/es/players/' in href:
                full_link = 'https://www.fotmob.com' + href
                id = href.rstrip('/').split('/')[-2]
                name = href.rstrip('/').split('/')[-1].replace('-', ' ').title()
                profile = f'https://www.fotmob.com/_next/image?url=https%3A%2F%2Fimages.fotmob.com%2Fimage_resources%2Fplayerimages%2F{id}.png&w=96&q=75'
                
                fotmobPlayer_dic = {'name': name, 'id':id, 'profile': profile, 'team': team, 'league': league, 'season': season, 'link': full_link}
                fotmobPlayer.append(fotmobPlayer_dic)
            
    return fotmobPlayer

In [50]:
urls = ['https://www.fotmob.com/es/teams/10081/overview/huracan']
league= 'LPA'
fotmobPlayer = fotmobURLplayer(urls, season, league, delay=10)
fotmobPlayer

[{'name': 'Frank Kudelka',
  'id': '306387',
  'profile': 'https://www.fotmob.com/_next/image?url=https%3A%2F%2Fimages.fotmob.com%2Fimage_resources%2Fplayerimages%2F306387.png&w=96&q=75',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'https://www.fotmob.com/es/players/306387/frank-kudelka'},
 {'name': 'Hernan Galindez',
  'id': '146801',
  'profile': 'https://www.fotmob.com/_next/image?url=https%3A%2F%2Fimages.fotmob.com%2Fimage_resources%2Fplayerimages%2F146801.png&w=96&q=75',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'https://www.fotmob.com/es/players/146801/hernan-galindez'},
 {'name': 'Sebastian Meza',
  'id': '1209083',
  'profile': 'https://www.fotmob.com/_next/image?url=https%3A%2F%2Fimages.fotmob.com%2Fimage_resources%2Fplayerimages%2F1209083.png&w=96&q=75',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'https://www.fotmob.com/es/players/1209083/sebastian-meza'},
 {'name': 'Nazareno Duran',
  'id': '1510567',

### Ejecución

In [32]:
def fotmob_teams(league_url, delay=5):

    fotmobTeam = []
    
    # Extraer tournament_id y season_id de la URL
    parts = league_url.rstrip('/').split('/')
    leagues_index = parts.index('leagues')
    tournament_id = parts[leagues_index + 1]

    # Construir la URL de la API
    api_url = f'https://www.fotmob.com/api/tltable?leagueId={tournament_id}'

    try:
        # Realizar la solicitud GET
        response = requests.get(api_url)
        response.raise_for_status()
        data = response.json()
    
        league = data[0]['data']['leagueName']
        country = data[0]['data']['ccode']
        season = datetime.now().year

        time.sleep(delay)

        for i in range(0,len(data[0]['data']['table']['all'])):

            name = data[0]['data']['table']['all'][i]['name']
            id = data[0]['data']['table']['all'][i]['id']
            logo = f'https://images.fotmob.com/image_resources/logo/teamlogo/{id}_xsmall.png'
            link = 'https://www.fotmob.com/es'+data[0]['data']['table']['all'][i]['pageUrl']

            fotmobTeam_dic = {
                            'team': name,
                            'id': id,
                            'logo': logo,
                            'league': league,
                            'country': country,
                            'season': season,
                            'link': link
                                        }

            fotmobTeam.append(fotmobTeam_dic)

    except requests.exceptions.RequestException as e:
        print(f'Error durante la solicitud: {e}')
    
        
    return fotmobTeam

In [19]:
def fotmob_get_player_data(id):
    api_url = f'https://www.fotmob.com/api/playerData?id={id}'
    response = requests.get(api_url)
    response.raise_for_status()
    return response.json()

In [36]:
def fotmob_players(teams, delay=5):
    
    fotmobPlayer = []
    repetidos = []

    for i in range(0,len(teams)):

        time.sleep(delay)  # Respetar el retraso entre solicitudes

        season = teams[i]['season']
        league = teams[i]['league']
        team = teams[i]['team']
        url = teams[i]['link'].replace('overview', 'squad')
       
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']

            if href not in repetidos:
                repetidos.append(href)

                if '/es/players/' in href:
                    full_link = 'https://www.fotmob.com' + href
                    id = href.rstrip('/').split('/')[-2]
                    profile = f'https://www.fotmob.com/_next/image?url=https%3A%2F%2Fimages.fotmob.com%2Fimage_resources%2Fplayerimages%2F{id}.png&w=96&q=75'
                    
                    data = fotmob_get_player_data(id)

                    name = data['name']
                    coach = data['isCoach']

                    fotmobPlayer_dic = {'name': name, 'id':id, 'profile': profile, 'coach': coach, 'team': team, 'league': league, 'season': season, 'link': full_link}
                    fotmobPlayer.append(fotmobPlayer_dic)
            
    return fotmobPlayer

In [22]:
def fotmob_shotmap(players, delay=5):
    dfs = []

    for i in range(0,len(players)):

        time.sleep(delay)

        try:
            player_id = players[i]['id']
            display_season = 0
            display_league = 0
            
            api_url = f'https://www.fotmob.com/api/playerStats?playerId={player_id}&seasonId={display_season}-{display_league}&isFirstSeason=false'

            response = requests.get(api_url)
            response.raise_for_status()
            data = response.json()
        
            df = pd.DataFrame(data['shotmap'][0])
            
            df.rename(columns={'playerName': 'player'}, inplace=True)
            df['league'] = players[i]['league']
            df['season'] = players[i]['season']
            df['team'] = players[i]['team']

            dfs.append(df)
        
        except Exception as e:
            print(f"Error al procesar el jugador {players[i]['link']}: {e}")
    
    shotmap = pd.concat(dfs, ignore_index=True)
    shotmap.to_csv('shotmap.csv', index=False)

    return

In [23]:
def fotmob_players_positions(players, delay=5):

    fotmobPlayer_positions = []

    for i in range(len(players)):
        time.sleep(delay)

        id = players[i]['id']
        name = players[i]['name']
        link = players[i]['link']

        data = fotmob_get_player_data(id)

        try:
            if 'positionDescription' in data and 'positions' in data['positionDescription']:
                for j in range(len(data['positionDescription']['positions'])):
                    position = data['positionDescription']['positions'][j]['strPos']['label']
                    pos = data['positionDescription']['positions'][j]['strPosShort']['label']
                    pos_id = data['positionDescription']['positions'][j]['position']
                    occurences = data['positionDescription']['positions'][j]['occurences']
                    main = data['positionDescription']['positions'][j]['isMainPosition']

                    fotmobPlayer_positions_dic = {
                        'player_name': name, 
                        'player_id': id, 
                        'pos_id': pos_id, 
                        'position': position, 
                        'pos': pos, 
                        'occurences': occurences, 
                        'main': main
                    }
                    fotmobPlayer_positions.append(fotmobPlayer_positions_dic)
            else:
                print(f"No se encontraron posiciones para el jugador {name} con id {id}")

        except Exception as e:
            print(f"Error al procesar el jugador {link} con id {id}: {e}")
            print(f"Datos de respuesta: {data}")

    df = pd.DataFrame(fotmobPlayer_positions)
    df.to_csv('player_positions.csv', index=False)

    return


In [33]:
league_url = 'https://www.fotmob.com/es/leagues/112/overview/liga-profesional'
fotmob_teams = fotmob_teams(league_url)
fotmob_players = fotmob_players(fotmob_teams)
fotmob_shotmap(fotmob_players)
fotmob_players_positions(fotmob_players)

## FBRef

### In: Liga - Out: Equipos

In [12]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def fbrefURLteam(urls, delay=10):
    
    fbrefTeam = []
    repetidos = []
    j = 0

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        parts = url.rstrip('/').split('/').replace('Estadisticas-de-' , '')
        tournament_id = parts[-2]
        season = datetime.now().year
        final = parts[-1].rstrip('-').split('-')
        league = f'{final[-3]} {final[-2]}'
        country = final[-1]

        # Obtener enlaces de la misma URL de la API
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']

            if href not in repetidos:
                repetidos.append(href)

                if '/es/equipos/' in href:
                                   
                    fbrefTeam_dic = {
                        'team': href.rstrip('/').split('/')[-1].replace('Estadisticas-de-' , ''),
                        'id': href.rstrip('/').split('/')[-2],
                        'league': league,
                        'country': country,
                        'season': season,
                        'link': 'https://www.fbref.com' + href
                    }
                    
                    fbrefTeam.append(fbrefTeam_dic)

   
    return fbrefTeam

In [13]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def fbrefURLteam(urls, delay=10):
    fbrefTeam = []
    repetidos = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        parts = url.rstrip('/').split('/')
        tournament_id = parts[-2]
        season = datetime.now().year
        final = parts[-1].rstrip('-').replace('Estadisticas-de-', '').split('-')
        league = f'{final[-3]} {final[-2]}'
        country = final[-1]

        # Obtener enlaces de la misma URL de la API
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']

            if href not in repetidos:
                repetidos.append(href)

                if '/es/equipos/' in href:
                    team_name = href.rstrip('/').split('/')[-1].replace('Estadisticas-de-', '')
                    team_id = href.rstrip('/').split('/')[-2]
                    full_link = 'https://www.fbref.com' + href

                    fbrefTeam_dic = {
                        'team': team_name,
                        'id': team_id,
                        'logo': f'https://cdn.ssref.net/req/202408052/tlogo/fb/{id}.png',
                        'league': league,
                        'country': country,
                        'season': season,
                        'link': full_link
                    }
                    
                    fbrefTeam.append(fbrefTeam_dic)

    return fbrefTeam


In [14]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def fbrefURLteam(urls, delay=10):
    fbrefTeam = []
    repetidos = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        parts = url.rstrip('/').split('/')
        tournament_id = parts[-2]
        season = datetime.now().year
        final = parts[-1].rstrip('-').replace('Estadisticas-de-', '').split('-')
        league = f'{final[-3]} {final[-2]}'
        country = final[-1]

        # Obtener la página HTML
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Buscar el contenedor específico por su ID
        table_container = soup.find('div', id='div_results2024211_overall')

        if table_container:
            links = table_container.find_all('a', href=True)  # Buscar enlaces solo dentro del contenedor

            for link in links:
                href = link['href']

                if href not in repetidos:
                    repetidos.append(href)

                    if '/es/equipos/' in href:
                        team_name = href.rstrip('/').split('/')[-1].replace('Estadisticas-de-', '').replace('-', ' ')
                        team_id = href.rstrip('/').split('/')[-2]
                        full_link = 'https://www.fbref.com' + href

                        fbrefTeam_dic = {
                            'team': team_name,
                            'id': team_id,
                            'logo': f'https://cdn.ssref.net/req/202408052/tlogo/fb/{team_id}.png',
                            'league': league,
                            'country': country,
                            'season': season,
                            'link': full_link
                        }

                        fbrefTeam.append(fbrefTeam_dic)

    return fbrefTeam


In [15]:
urls = ['https://fbref.com/es/comps/21/Estadisticas-de-Liga-Profesional-Argentina']
fbrefTeam = fbrefURLteam(urls)

fbrefTeam

[{'team': 'Huracan',
  'id': '1d3d37ae',
  'logo': 'https://cdn.ssref.net/req/202408052/tlogo/fb/1d3d37ae.png',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.fbref.com/es/equipos/1d3d37ae/Estadisticas-de-Huracan'},
 {'team': 'Atletico Tucuman',
  'id': '42a1ab8b',
  'logo': 'https://cdn.ssref.net/req/202408052/tlogo/fb/42a1ab8b.png',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.fbref.com/es/equipos/42a1ab8b/Estadisticas-de-Atletico-Tucuman'},
 {'team': 'Velez Sarsfield',
  'id': '41c139b6',
  'logo': 'https://cdn.ssref.net/req/202408052/tlogo/fb/41c139b6.png',
  'league': 'Liga Profesional',
  'country': 'Argentina',
  'season': 2024,
  'link': 'https://www.fbref.com/es/equipos/41c139b6/Estadisticas-de-Velez-Sarsfield'},
 {'team': 'Racing Club',
  'id': '8e20e13d',
  'logo': 'https://cdn.ssref.net/req/202408052/tlogo/fb/8e20e13d.png',
  'league': 'Liga Profesional',
  'country': '

### In: Equipos - Out: Jugadores

In [16]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def fbrefURLplayer(urls, league, season, delay=10):
    fbrefPlayer = []
    repetidos = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        parts = url.rstrip('/').split('/')
        team_id = parts[-2]
        season = season
        league = league
        team = parts[-1].replace('Estadisticas-de-', '').replace('-', ' ')

        # Obtener la página HTML
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Buscar el contenedor específico por su ID
        table_container = soup.find('div', id='all_stats_standard')

        if table_container:
            links = table_container.find_all('a', href=True)  # Buscar enlaces solo dentro del contenedor

            for link in links:
                href = link['href']

                if href not in repetidos:
                    repetidos.append(href)

                    if '/es/jugadores/' in href:
                        player_name = href.rstrip('/').split('/')[-1].replace('-', ' ')
                        player_id = href.rstrip('/').split('/')[-2]
                        full_link = 'https://www.fbref.com' + href

                        fbrefplayer_dic = {
                            'player': player_name,
                            'id': player_id,
                            'profile': f'https://fbref.com/req/202302030/images/headshots/{player_id}_2022.jpg',
                            'team': team,
                            'league': league,
                            'season': season,
                            'link': full_link
                        }

                        fbrefPlayer.append(fbrefplayer_dic)

    return fbrefPlayer


In [17]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

def fbrefURLplayer(urls, league, season, delay=10):
    fbrefPlayer = []
    repetidos = []

    for url in urls:
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        parts = url.rstrip('/').split('/')
        team_id = parts[-2]
        season = season
        league = league
        team = parts[-1].replace('Estadisticas-de-', '').replace('-', ' ')

        # Obtener la página HTML
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Buscar el contenedor específico por su ID
        table_container = soup.find('div', id='all_stats_standard')

        if table_container:
            links = table_container.find_all('a', href=True)  # Buscar enlaces solo dentro del contenedor

            for link in links:
                href = link['href']

                if href not in repetidos and 'summary' not in href:  # Excluir enlaces que contienen 'summary'
                    repetidos.append(href)

                    if '/es/jugadores/' in href:
                        player_name = href.rstrip('/').split('/')[-1].replace('-', ' ')
                        player_id = href.rstrip('/').split('/')[-2]
                        full_link = 'https://www.fbref.com' + href

                        fbrefplayer_dic = {
                            'player': player_name,
                            'id': player_id,
                            'profile': f'https://fbref.com/req/202302030/images/headshots/{player_id}_2022.jpg',
                            'team': team,
                            'league': league,
                            'season': season,
                            'link': full_link
                        }

                        fbrefPlayer.append(fbrefplayer_dic)

    return fbrefPlayer


In [64]:
urls = ['https://fbref.com/es/equipos/1d3d37ae/Estadisticas-de-Huracan']
fbrefURLplayer = fbrefURLplayer(urls, league, season, delay=10)
fbrefURLplayer

[{'player': 'Walter Mazzantti',
  'id': '55ce9bfd',
  'profile': 'https://fbref.com/req/202302030/images/headshots/55ce9bfd_2022.jpg',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'https://www.fbref.com/es/jugadores/55ce9bfd/Walter-Mazzantti'},
 {'player': 'Lucas Carrizo',
  'id': '2bfc68c3',
  'profile': 'https://fbref.com/req/202302030/images/headshots/2bfc68c3_2022.jpg',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'https://www.fbref.com/es/jugadores/2bfc68c3/Lucas-Carrizo'},
 {'player': 'William Alarcon',
  'id': '325d990a',
  'profile': 'https://fbref.com/req/202302030/images/headshots/325d990a_2022.jpg',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'https://www.fbref.com/es/jugadores/325d990a/William-Alarcon'},
 {'player': 'Rodrigo Cabral',
  'id': '9c874e17',
  'profile': 'https://fbref.com/req/202302030/images/headshots/9c874e17_2022.jpg',
  'team': 'Huracan',
  'league': 'LPA',
  'season': 2024,
  'link': 'ht

### Ejecución

In [141]:
def fbref_teams(league_url):
    fbrefTeam = []
    repetidos = []

    # Extraer tournament_id y season_id de la URL
    parts = league_url.rstrip('/').split('/')
    league_id = parts[-2]
    season = datetime.now().year
    final = parts[-1].rstrip('-').replace('Estadisticas-de-', '').split('-')
    league = f'{final[-3]} {final[-2]}'
    country = final[-1]

    # Obtener la página HTML
    response = requests.get(league_url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Buscar el contenedor específico por su ID
    table_container = soup.find('div', id='div_results2024211_overall')

    if table_container:
        links = table_container.find_all('a', href=True)  # Buscar enlaces solo dentro del contenedor

        for link in links:
            href = link['href']

            if href not in repetidos:
                repetidos.append(href)

                if '/es/equipos/' in href:
                    team_name = href.rstrip('/').split('/')[-1].replace('Estadisticas-de-', '').replace('-', ' ')
                    team_id = href.rstrip('/').split('/')[-2]
                    full_link = 'https://www.fbref.com' + href

                    fbrefTeam_dic = {
                        'name': team_name,
                        'id': team_id,
                        'logo': f'https://cdn.ssref.net/req/202408052/tlogo/fb/{team_id}.png',
                        'league': league,
                        'league_id': league_id,
                        'country': country,
                        'season': season,
                        'link': full_link
                    }

                    fbrefTeam.append(fbrefTeam_dic)

    return fbrefTeam

In [142]:
def fbref_players(teams, delay=10):
    fbrefPlayer = []
    repetidos = []

    for i in range(0,len(teams)):
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        team = teams[i]['name']
        season = teams[i]['season']
        league = teams[i]['league']
        country = teams[i]['country']

        # Obtener la página HTML
        response = requests.get(teams[i]['link'])
        soup = BeautifulSoup(response.content, 'html.parser')

        # Buscar el contenedor específico por su ID
        table_container = soup.find('div', id='all_stats_standard')

        if table_container:
            links = table_container.find_all('a', href=True)  # Buscar enlaces solo dentro del contenedor

            for link in links:
                href = link['href']

                if href not in repetidos and 'summary' not in href:  # Excluir enlaces que contienen 'summary'
                    repetidos.append(href)

                    if '/es/jugadores/' in href:
                        player_name = href.rstrip('/').split('/')[-1].replace('-', ' ')
                        player_id = href.rstrip('/').split('/')[-2]
                        full_link = 'https://www.fbref.com' + href

                        fbrefplayer_dic = {
                            'player': player_name,
                            'id': player_id,
                            'profile': f'https://fbref.com/req/202302030/images/headshots/{player_id}_2022.jpg',
                            'team': team,
                            'league': league,
                            'season': season,
                            'country': country,
                            'link': full_link
                        }
                        
                        fbrefPlayer.append(fbrefplayer_dic)

    return fbrefPlayer

In [143]:
def positions(league_url):

    parts = league_url.rstrip('/').split('/')
    id = parts[-2]
    season = datetime.now().year
    final = parts[-1].rstrip('-').replace('Estadisticas-de-', '').split('-')
    league = f'{final[-3]} {final[-2]}'
    country = final[-1]
    
    try:
        # Leer la tabla de la URL especificada con el atributo id
        df = pd.read_html(league_url, attrs={'id': f'results{season}{id}1_overall'})[0]
        
        df['league'] = league
        df['season'] = season
        df['country'] = country

        df.rename(columns={'Equipo': 'team'}, inplace=True)
    
        # Guardar el DataFrame en un archivo CSV
        df.to_csv('positions.csv', index=False)

        return
                
    except Exception as e:
        print(f"Error al procesar id {id} en la liga {league_url}: {e}")
        return

In [144]:
def squads(teams, delay=10):

    dfs = []

    for i in range(0,len(teams)):
        time.sleep(delay)  # Respetar el retraso entre solicitudes

        # Extraer tournament_id y season_id de la URL
        team = teams[i]['name']
        season = teams[i]['season']
        league = teams[i]['league']
        country = teams[i]['country']
        id = 'stats_standard_'+teams[i]['league_id']

        df = pd.read_html(teams[i]['link'],
                        attrs={'id':id})[0]

        df.columns = df.columns.get_level_values(1)

        df['Edad'] = df['Edad'].apply(lambda x: float(x.rstrip('-').split('-')[0]) + float(x.rstrip('-').split('-')[1])/365 if isinstance(x, str) and '-' in x else x)
        df['Edad'] = pd.to_numeric(df['Edad'], errors='coerce')
        df_team = df.dropna(subset=['Edad'])

        # Usar una expresión regular para extraer el nombre
        
        df_team['País'] = df_team['País'].apply(lambda x: str(x)[-3:])

        df_team.rename(columns={'Jugador': 'player'}, inplace=True)
        df_team['league'] = league
        df_team['season'] = season
        df_team['team'] = team
        df_team['country'] = country

        dfs.append(df_team)

    squads = pd.concat(dfs, ignore_index=True)
    squads.to_csv('squads.csv', index=False)

    return

In [145]:
def stats(league_url, delay=10):
    
    parts = league_url.rstrip('/').split('/')
    id = parts[-2]
    season = datetime.now().year
    final = parts[-1].rstrip('-').replace('Estadisticas-de-', '').split('-')
    league = f'{final[-3]} {final[-2]}'
    country = final[-1]

    ids = ('stats_squads_standard_for','stats_squads_standard_against',
        'stats_squads_keeper_for','stats_squads_keeper_against',
        'stats_squads_keeper_adv_for','stats_squads_keeper_adv_against',
        'stats_squads_shooting_for','stats_squads_shooting_against',
        'stats_squads_passing_for','stats_squads_passing_against',
        'stats_squads_passing_types_for','stats_squads_passing_types_against',
        'stats_squads_gca_for','stats_squads_gca_against',
        'stats_squads_defense_for', 'stats_squads_defense_against',
        'stats_squads_possession_for','stats_squads_possession_against',
        'stats_squads_playing_time_for','stats_squads_playing_time_against',
        'stats_squads_misc_for','stats_squads_misc_against'
    )

    dfs = []

    for id in ids:
        # Esperar entre solicitudes
        time.sleep(delay)
        
        try:
            # Leer la tabla de la URL especificada con el atributo id
            df = pd.read_html(league_url, attrs={'id': id})[0]

            # Verificar si las columnas tienen múltiples niveles
            if isinstance(df.columns, pd.MultiIndex):
                # Separar los dos niveles de columnas en 'Nivel1' y 'Nivel2'
                df.columns = ['_'.join(col).strip() if 'Unnamed' not in col[0] else col[1] for col in df.columns]

            # Pivotear el DataFrame
            df_pivot = df.melt(id_vars=['Equipo'], var_name='stat', value_name='value')

            # Separar los niveles si la columna 'stat' tiene '_'
            if df_pivot['stat'].str.contains('_').any():
                df_pivot[['class', 'stat']] = df_pivot['stat'].str.split('_', expand=True, n=1)
            else:
                df_pivot['stat'] = ''
                df_pivot['class'] = df_pivot['stat']

            df_pivot['stat'] = df_pivot.apply(
                lambda row: row['class'] if pd.isna(row['stat']) else row['stat'], axis=1
            )
            df_pivot['class'] = df_pivot.apply(
                lambda row: np.nan if row['stat'] == row['class'] else row['class'], axis=1
            )

            # Extraer el id antes de '_for' o '_against'
            tabla_match = re.match(r'(.+)(_for|_against)', id)
            table = re.sub(r'stats_squads_', '', tabla_match.group(1))
            table = table.replace('_', ' ').title()

            if tabla_match:
                df_pivot['target'] = 'for' if '_for' in id else 'against'
                df_pivot['table'] = table
            else:
                df_pivot['table'] = table
                df_pivot['target'] = None

            # Agregar el DataFrame pivoteado a la lista de DataFrames
            
            df_pivot['target'] = df_pivot['target'].str.title()
            df_pivot['league'] = league
            df_pivot['country'] = country
            df_pivot['season'] = season
            df_pivot.rename(columns={'Equipo': 'team'}, inplace=True)
            
            dfs.append(df_pivot)
                    
        except Exception as e:
            print(f"Error al procesar id {id}: {e}")
    
    # Concatenar todos los DataFrames
    stats = pd.concat(dfs, ignore_index=True)
    stats.to_csv('stats.csv', index=False)
    
    return

In [146]:
league = 'https://fbref.com/es/comps/21/Estadisticas-de-Liga-Profesional-Argentina'
teams = fbref_teams(league)
players = fbref_players(teams)
positions(league)
squads(teams)
stats(league)
