# Import Required Libraries
Import necessary libraries such as pandas, torch, unicodedata, and itertools.

In [5]:
# Import necessary libraries
import pandas as pd  # For data manipulation and analysis
import torch  # For GPU-based tensor computations
import unicodedata  # For text normalization
from itertools import product  # For generating Cartesian products

# Configure Device
Set up the device to use GPU if available, otherwise default to CPU.

In [6]:
# Configure Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available, otherwise CPU
print(f"Using device: {device}")  # Display the selected device

Using device: cpu


# Define Helper Functions
Define the `normalizar` function to normalize text by removing accents and special characters.

In [7]:
# Define Helper Functions
def normalizar(nombre):
    """
    Normalize text by removing accents and special characters.
    
    Args:
        nombre (str): Input string to normalize.
        
    Returns:
        str: Normalized string.
    """
    return ''.join(c for c in unicodedata.normalize('NFKD', str(nombre)) if not unicodedata.combining(c))

# Load and Normalize Data
Load CSV files for `tabla_posiciones` and `partidos_faltantes`, and normalize their text columns using the `normalizar` function.

In [8]:
# Load CSV files
tabla_posiciones = pd.read_csv("Tabla_posiciones.csv", encoding="latin1")  # Load 'tabla_posiciones' dataset
partidos_faltantes = pd.read_csv("PartidosFaltantes.csv", encoding="latin1")  # Load 'partidos_faltantes' dataset

# Normalize text columns
tabla_posiciones["Country"] = tabla_posiciones["Country"].apply(normalizar)  # Normalize 'Country' column
partidos_faltantes["Local"] = partidos_faltantes["Local"].apply(normalizar)  # Normalize 'Local' column
partidos_faltantes["Visitor"] = partidos_faltantes["Visitor"].apply(normalizar)  # Normalize 'Visitor' column

# Apply Match Results
Update the points table to reflect the Bolivia vs. Uruguay match result and remove the match from the pending matches list.

In [None]:
# Apply Match Results

# Update points for Bolivia and Uruguay to reflect the 0-0 match result
#tabla_posiciones.loc[tabla_posiciones["Country"] == "Bolivia", "PTS"] += 1
#tabla_posiciones.loc[tabla_posiciones["Country"] == "Uruguay", "PTS"] += 1

# Remove the Bolivia vs. Uruguay match from the pending matches list
#partidos_faltantes = partidos_faltantes[~(
#    (partidos_faltantes["Local"] == "Bolivia") &
#    (partidos_faltantes["Visitor"] == "Uruguay")
#)]

# Prepare Base Structures
Prepare lists of teams, matches, and indices, and create a tensor for base points.

In [9]:
# Prepare Base Structures

# Create a sorted list of team names
equipos = sorted(tabla_posiciones["Country"].tolist())

# Create a list of matches as tuples of (Local, Visitor)
partidos = list(zip(partidos_faltantes["Local"], partidos_faltantes["Visitor"]))

# Get the number of matches and teams
num_partidos = len(partidos)
num_equipos = len(equipos)

# Create a dictionary mapping team names to their indices
equipo_idx = {e: i for i, e in enumerate(equipos)}

# Create a tensor for base points of each team
puntos_base = torch.tensor(
    [tabla_posiciones.loc[tabla_posiciones["Country"] == team, "PTS"].values[0] for team in equipos],
    dtype=torch.int16,
    device=device
)

# Generate Match Combinations
Generate all possible score combinations for the matches or use Monte Carlo sampling if the total combinations exceed a threshold.

In [10]:
# Generate Match Combinations

# Define the range of possible goals (0 to 4)
goles = torch.arange(5, device=device)

# Generate all possible score combinations for a single match
comb_por_partido = torch.cartesian_prod(goles, goles)

# Calculate the total number of combinations for all matches
total_combs = 25 ** num_partidos  # 25 = 5 (local goals) * 5 (visitor goals)

# Define the maximum number of combinations to consider
max_combs = 100000

# Check if the total combinations exceed the threshold
if total_combs > max_combs:
    # Use Monte Carlo sampling to generate a subset of combinations
    print("Demasiadas combinaciones, usando muestra Montecarlo.")
    comb_idx = torch.randint(0, 25, (max_combs, num_partidos), device=device)
else:
    # Generate all possible combinations
    comb_idx = torch.tensor(list(product(range(25), repeat=num_partidos)), device=device)

# Retrieve the goal combinations for all matches
goles_comb = comb_por_partido[comb_idx]  # Shape: [N, num_partidos, 2]

Demasiadas combinaciones, usando muestra Montecarlo.


# Initialize Counters
Initialize tensors for points and goal differences for all teams.

In [11]:
# Initialize Counters

# Get the number of combinations (N) from the shape of goles_comb
N = goles_comb.shape[0]

# Initialize the points tensor by repeating the base points for all combinations
puntos = puntos_base.repeat(N, 1)

# Initialize the goal difference (gd) tensor with zeros
gd = torch.zeros_like(puntos)

# Calculate Points and Goal Differences
Iterate through matches to calculate points and goal differences for each team based on the simulated results.

In [12]:
# Calculate Points and Goal Differences

# Iterate through each match to calculate points and goal differences
for i, (local, visitante) in enumerate(partidos):
    # Extract goals for local and visitor teams from the combinations
    gl = goles_comb[:, i, 0]  # Goals scored by the local team
    gv = goles_comb[:, i, 1]  # Goals scored by the visitor team
    
    # Get the indices of the local and visitor teams
    idx_l = equipo_idx[local]
    idx_v = equipo_idx[visitante]
    
    # Update goal differences for local and visitor teams
    gd[:, idx_l] += gl - gv  # Local team's goal difference
    gd[:, idx_v] += gv - gl  # Visitor team's goal difference
    
    # Update points for local and visitor teams based on match results
    puntos[:, idx_l] += (gl > gv) * 3 + (gl == gv)  # Local team points
    puntos[:, idx_v] += (gv > gl) * 3 + (gv == gl)  # Visitor team points

# Generate Rankings
Sort teams based on points and goal differences, and calculate position and classification probabilities.

In [13]:
# Generate Rankings

# Sort teams based on points and goal differences
ranking = torch.argsort(torch.stack((puntos, gd), dim=2), dim=1, descending=True)

# Initialize tensors to count positions and classifications
pos_counts = torch.zeros((num_equipos, num_equipos), device=device)
class_counts = torch.zeros((num_equipos, 3), device=device)

# Calculate position probabilities
for pos in range(num_equipos):
    for t in range(num_equipos):
        pos_counts[t, pos] += (ranking[:, pos] == t).sum()

# Calculate classification probabilities
for i in range(num_equipos):
    class_counts[i, 0] = pos_counts[i, :6].sum()  # Top 6 positions (classified)
    class_counts[i, 1] = pos_counts[i, 6]        # 7th position (playoff)
    class_counts[i, 2] = pos_counts[i, 7:].sum() # 8th and below (eliminated)

# Convert position probabilities to a DataFrame
df_pos = pd.DataFrame(
    (pos_counts / N * 100).cpu().numpy(),
    index=equipos,
    columns=[f"Pos {i+1}" for i in range(num_equipos)]
).round(2)

# Convert classification probabilities to a DataFrame
df_class = pd.DataFrame(
    (class_counts / N * 100).cpu().numpy(),
    index=equipos,
    columns=["Clasificado", "Repechaje", "Eliminado"]
).round(2)

# Display results
print("\n=== Probabilidades por Posición (%) ===")
print(df_pos)
print("\n=== Probabilidades de Clasificación (%) ===")
print(df_class)


=== Probabilidades por Posición (%) ===
               Pos 1      Pos 2      Pos 3      Pos 4      Pos 5      Pos 6  \
Argentina  96.940002  21.820000  13.520000  11.280000  10.290000   9.880000   
Bolivia    11.300000  11.100000  11.390000  12.420000  15.250000  21.469999   
Brasil     15.080000  33.770000  35.070000  30.440001  24.629999  19.750000   
Chile      10.350000  10.620000  10.220000  10.050000  10.390000  11.150000   
Colombia   11.020000  21.240000  24.559999  28.030001  30.680000  32.939999   
Ecuador    16.400000  39.950001  32.770000  26.969999  23.360001  18.860001   
Paraguay   11.240000  21.180000  27.830000  31.879999  30.820000  28.280001   
Peru        9.340000   9.130000   9.690000   9.980000  10.230000  11.750000   
Uruguay     9.720000  22.420000  25.480000  28.780001  32.790001  30.490000   
Venezuela   8.610000   8.750000   9.470000  10.170000  11.570000  15.420000   

               Pos 7      Pos 8      Pos 9     Pos 10  
Argentina   9.550000   9.150000  

# Display Results
Create and display dataframes showing probabilities by position and classification.

In [14]:
# Display Results

# Display the probabilities by position as a DataFrame
print("\n=== Probabilidades por Posición (%) ===")
display(df_pos)

# Display the probabilities of classification as a DataFrame
print("\n=== Probabilidades de Clasificación (%) ===")
display(df_class)


=== Probabilidades por Posición (%) ===


Unnamed: 0,Pos 1,Pos 2,Pos 3,Pos 4,Pos 5,Pos 6,Pos 7,Pos 8,Pos 9,Pos 10
Argentina,96.940002,21.82,13.52,11.28,10.29,9.88,9.55,9.15,9.02,8.54
Bolivia,11.3,11.1,11.39,12.42,15.25,21.469999,43.5,35.830002,24.139999,13.59
Brasil,15.08,33.77,35.07,30.440001,24.629999,19.75,12.61,10.26,9.38,9.01
Chile,10.35,10.62,10.22,10.05,10.39,11.15,16.77,27.370001,39.709999,53.380001
Colombia,11.02,21.24,24.559999,28.030001,30.68,32.939999,19.309999,12.18,10.28,9.76
Ecuador,16.4,39.950001,32.77,26.969999,23.360001,18.860001,11.33,10.0,10.22,10.12
Paraguay,11.24,21.18,27.83,31.879999,30.82,28.280001,16.33,11.62,10.44,10.38
Peru,9.34,9.13,9.69,9.98,10.23,11.75,21.559999,33.560001,38.549999,46.209999
Uruguay,9.72,22.42,25.48,28.780001,32.790001,30.49,15.92,12.02,11.26,11.12
Venezuela,8.61,8.75,9.47,10.17,11.57,15.42,33.119999,38.009998,37.0,27.879999



=== Probabilidades de Clasificación (%) ===


Unnamed: 0,Clasificado,Repechaje,Eliminado
Argentina,163.75,9.55,26.700001
Bolivia,82.940002,43.5,73.559998
Brasil,158.740005,12.61,28.65
Chile,62.77,16.77,120.459999
Colombia,148.470001,19.309999,32.220001
Ecuador,158.320007,11.33,30.35
Paraguay,151.229996,16.33,32.439999
Peru,60.119999,21.559999,118.309998
Uruguay,149.679993,15.92,34.400002
Venezuela,63.990002,33.119999,102.889999
