In [2]:
import pandas as pd
import numpy as np
import time

In [19]:
df_men = pd.read_excel(
    "OlympicRankings2.xlsx", sheet_name="March men's Olympic standings"
)

In [15]:
df_men.fillna(0, inplace=True)
df_men.drop(df_men.index[-2:], inplace=True)

tournaments_left = [
    "Challenge Recife, Brazil",
    "Challenge Saquarema, Brazil",
    "Challenge Guadalajara, Mexico",
    "Elite16 Tepic, Mexico",
    "Challenge Xiamen, China",
    "Elite16 Natal, Brazil",
    "Elite16 Espinho, Portugal",
    "Challenge Stare Jablonki, Poland",
    "Elite 16 Ostrava, Czech Republic",
]


df_men[tournaments_left] = pd.DataFrame(
    [[float("nan")] * len(tournaments_left)], index=df_men.index
)

df_men = df_men[~df_men["Team"].str.contains("Ondrej Perusic")]
df_men.reset_index(inplace=True, drop=True)

In [20]:
df_men.columns

Index(['Team', 'Country', 'Total', 'Final Evet 2024', 'Doha Elite16',
       'Mollymook', 'NORCECA Champs', 'Nuvali Challenge',
       'Joao Pessoa Elite16', 'Chiang Mai Challenge', 'Haikou Challenge',
       'Goa Challenge', 'World Championships', 'Paris Elite16',
       'Hamburg Elite16', 'European Championships', 'Montreal Elite16',
       'Edmonton Challenge', 'Asian Continental Cup', 'South American Cup',
       'Espinho Challenge', 'Gstaad Elite16', 'Jurmala Challenge',
       'Ostrava Elite 16', 'Uberlandia Elite 16', 'Saquarema Challenge',
       'Itapema Challenge', 'Tepic Elite 16', 'La Paz Challenge',
       'Doha Elite 16'],
      dtype='object')

In [5]:
tournament_cols = df_men.drop(["Team", "Country", "Total "], axis=1).columns


point_elite = [
    1200,
    1100,
    1000,
    900,
    760,
    760,
    760,
    760,
    600,
    600,
    600,
    600,
    460,
    460,
    460,
    460,
    400,
    400,
    400,
    340,
    340,
    340,
    340,
    340,
    340,
    340,
    340,
]


point_chal = [
    800,
    760,
    720,
    680,
    600,
    600,
    600,
    600,
    460,
    460,
    460,
    460,
    460,
    460,
    460,
    460,
    360,
    360,
    300,
    300,
    300,
    300,
    300,
    300,
    220,
    220,
    220,
    220,
    220,
    220,
    220,
    140,
    140,
    140,
    140,
    140,
    140,
]


point_elite = (point_elite) + list(np.zeros(64 - len(point_elite)))

point_chal = point_chal + list(np.zeros(64 - len(point_chal)))


point_elite = np.array(point_elite)

point_chal = np.array(point_chal)

In [165]:
time_dict = {
    "fill_remaining_tournaments": [],
    "calc_total_points": [],
    "check_qualified": [],
}


def fill_remaining_tournaments(df, verbose=False):
    start_time = time.time()
    for t in tournaments_left:
        if "Challenge" in t:
            random_list_outcomes_c = np.random.choice(
                point_chal, size=64, replace=False
            )

            df[t] = random_list_outcomes_c
        else:
            random_list_outcomes_e16 = np.random.choice(
                point_elite, size=64, replace=False
            )

            df[t] = random_list_outcomes_e16

    end_time = time.time()
    elapsed_time = end_time - start_time
    if verbose:
        time_dict["fill_remaining_tournaments"].append(elapsed_time)
    return df


def calc_total_points(df, verbose=False):
    start_time = time.time()
    df["new_total_points"] = None

    totals = -1 * (
        np.sum(
            np.partition(-df_men[tournament_cols].values, 12, axis=1)[:, :12], axis=1
        )
    )
    df["new_total_points"] = totals
    end_time = time.time()
    elapsed_time = end_time - start_time
    if verbose:
        time_dict["calc_total_points"].append(elapsed_time)
    return df


def create_qualified_dict(df, verbose=False):
    start_time = time.time()

    team_map = {team: [] for team in df["Team"]}
    end_time = time.time()
    elapsed_time = end_time - start_time
    return team_map


def check_qualified(df, team_map: dict, verbose=False) -> dict:
    start_time = time.time()
    df_test = df.copy()
    # df_sorted = df_test.sort_values(by="new_total_points", ascending=False)
    # df_top17 = df_sorted[:17]

    column_to_sort = df_test["new_total_points"].values
    sorted_is = np.argsort(column_to_sort)
    sorted_index = sorted_is[0:][::-1]
    index_top_17 = sorted_index[:16]
    df_top17 = df_test.iloc[index_top_17]

    while any(df_top17["Country"].value_counts() >= 3):
        country_counts = df_top17["Country"].value_counts()

        countries_over_3 = list(country_counts[country_counts > 2].index)
        if countries_over_3 != 0:
            for country in countries_over_3:
                df_top17.reset_index(drop=True, inplace=True)
                drop_index = df_top17[df_top17["Country"] == country].index[2:]

                # df_sorted.drop(drop_index, inplace=True)
                # df_top17 = df_sorted[:17]
                # print(drop_index)
                sorted_index = np.delete(sorted_index, drop_index)
                index_top_17 = sorted_index[:16]
                df_top17 = df_test.iloc[index_top_17]

    top17_teams_set = set(df_top17["Team"])
    for team in df["Team"]:
        team_map[team].append(team in top17_teams_set)

    end_time = time.time()
    elapsed_time = end_time - start_time
    if verbose:
        time_dict["check_qualified"].append(elapsed_time)

    return team_map

In [166]:
def simulate(df, num_simulations=100, verbose=False):
    np.random.seed(42)
    team_qualified_map = create_qualified_dict(df, verbose)

    for _ in range(num_simulations):
        df = fill_remaining_tournaments(df, verbose)
        df = calc_total_points(df, verbose)

        team_qualified_map = check_qualified(df, team_qualified_map, verbose)

    return team_qualified_map

In [167]:
time_dict = {
    "fill_remaining_tournaments": [],
    "calc_total_points": [],
    "check_qualified": [],
}

team_qualified_map = simulate(df_men, 10000, verbose=True)

In [168]:
sum_dict = {key: np.sum(value) for key, value in time_dict.items()}
mean_dict = {key: np.mean(value) for key, value in time_dict.items()}

In [169]:
sum_dict

{'fill_remaining_tournaments': 39.78605246543884,
 'calc_total_points': 24.727661848068237,
 'check_qualified': 36.93440318107605}

In [170]:
mean_dict

{'fill_remaining_tournaments': 0.003978605246543884,
 'calc_total_points': 0.002472766184806824,
 'check_qualified': 0.003693440318107605}

In [175]:
standings = pd.Series(
    {team: np.mean(bool_list) for team, bool_list in team_qualified_map.items()}
)


standings.sort_values(ascending=False).head(17)

Kusti Nolvak, Mart Tiisaar               0.2722
Audrius Knasas, Patrikas Stankevicius    0.2690
Thomas Hodges, Zac Schubert              0.2686
Jorge Alayo, Noslen Diaz                 0.2681
Artur Hajos, Bence Streli                0.2657
                                          ...  
Saymon Barbosa, Vinicius Rezende         0.2141
Arthur da Silva, Adrielson Dos Santos    0.2114
Felipe Alves, Gabriel Dos Reis           0.2079
Andre Loyola, George Wanderley           0.2077
Vitor Felipe, Renato Lima                0.2037
Length: 64, dtype: float64

In [172]:
df_men.head(17)

Unnamed: 0,Team,Country,Total,Final Evet 2024,NORCECA Champs,Nuvali Challenge,Joao Pessoa Elite16,Chiang Mai Challenge,Haikou Challenege,Goa Challenge,...,"Challenge Recife, Brazil","Challenge Saquarema, Brazil","Challenge Guadalajara, Mexico","Elite16 Tepic, Mexico","Challenge Xiamen, China","Elite16 Natal, Brazil","Elite16 Espinho, Portugal","Challenge Stare Jablonki, Poland","Elite 16 Ostrava, Czech Republic",new_total_points
0,"Anders Mol, Christian Sorum",Norway,9460,460.0,0.0,0.0,760.0,600.0,0.0,300.0,...,220.0,460.0,140.0,0.0,0.0,0.0,760.0,140.0,0.0,8000.0
1,"David Ahman, Jonatan Hellvig",Sweeden,8920,0.0,0.0,220.0,0.0,0.0,0.0,0.0,...,220.0,220.0,0.0,0.0,460.0,0.0,0.0,460.0,340.0,3620.0
2,"Sam Cottafava, Paolo Nicolai",Italy,8440,1200.0,340.0,460.0,760.0,140.0,0.0,0.0,...,0.0,0.0,140.0,460.0,0.0,760.0,0.0,300.0,340.0,7840.0
3,"Andre Loyola, George Wanderley",Brazil,8380,1000.0,0.0,0.0,0.0,0.0,0.0,220.0,...,0.0,0.0,460.0,0.0,140.0,0.0,460.0,680.0,0.0,5680.0
4,"Nils Ehlers, Clemens Wickler",Germany,8200,0.0,0.0,720.0,0.0,300.0,0.0,0.0,...,600.0,0.0,220.0,760.0,0.0,400.0,340.0,300.0,0.0,7340.0
5,"Evandro Goncalves, Arthur Mariano",Brazil,7800,340.0,0.0,300.0,340.0,140.0,340.0,0.0,...,0.0,0.0,0.0,1100.0,0.0,760.0,0.0,0.0,340.0,7440.0
6,"Andy Benesh, Miles Partain",United States,7620,460.0,1000.0,460.0,0.0,460.0,0.0,0.0,...,0.0,600.0,0.0,0.0,300.0,340.0,0.0,0.0,340.0,6840.0
7,"Alex Brouwer, Robert Meeuwsen",Netherlands,7560,340.0,0.0,140.0,0.0,0.0,0.0,0.0,...,800.0,360.0,600.0,400.0,0.0,340.0,0.0,0.0,0.0,6400.0
8,"Adrian Gavira, Pablo Herrera",Spain,7500,0.0,0.0,140.0,0.0,360.0,600.0,460.0,...,0.0,360.0,0.0,0.0,220.0,0.0,0.0,800.0,340.0,6160.0
9,"Thomas Hodges, Zac Schubert",Austrailia,7260,0.0,400.0,300.0,600.0,0.0,0.0,460.0,...,460.0,140.0,0.0,0.0,0.0,0.0,400.0,0.0,340.0,4880.0


In [173]:
point_elite = [
    1200,
    1100,
    1000,
    900,
    760,
    760,
    760,
    760,
    600,
    600,
    600,
    600,
    460,
    460,
    460,
    460,
    400,
    400,
    400,
    340,
    340,
    340,
    340,
    340,
    340,
    340,
    340,
]

point_chal = [
    800,
    760,
    720,
    680,
    600,
    600,
    600,
    600,
    460,
    460,
    460,
    460,
    460,
    460,
    460,
    460,
    360,
    360,
    300,
    300,
    300,
    300,
    300,
    300,
    220,
    220,
    220,
    220,
    220,
    220,
    220,
    140,
    140,
    140,
    140,
    140,
    140,
]

In [174]:
def find_combination_local_search(lists, target):
    # initial solution - all 0's
    combo_list = np.zeros(len(lists))
    if sum(combo_list) > target:
        return combo_list
    
    original_combo_list = combo_list
    # neighbors - changing by 1 number in one list at a time
    while True:
        original_combo_list[list_num] = lists[list_num][point_num]
        original_sum = sum(combo_list)
        original_diff = abs(target - original_sum)
        list_num = 0
        point_num = 0
        new_combo_list = original_combo_list
        new_combo_list[list_num] = lists[list_num][point_num]
        new_sum = sum(combo_list)
        new_diff = abs(target - new_sum)
        if new_diff < original_diff:

    

# Example usage:
# lists = [
#     point_elite,
#     point_chal,
# ]
# target_value = 1000
# result = find_combination_local_search(lists, target_value)
# print(result)

IndentationError: expected an indented block after 'if' statement on line 19 (3908103000.py, line 24)