### Configuración Inicial

In [1]:
!pip uninstall -y numpy
!pip install numpy==1.26

Found existing installation: numpy 1.26.0
Uninstalling numpy-1.26.0:
  Successfully uninstalled numpy-1.26.0
Collecting numpy==1.26
  Using cached numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)
Using cached numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.9 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.0 which is incompatible.
shap 0.50.0 requires numpy>=2, but you have numpy 1.26.0 which is incompatible.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.0 which is incompatible.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.0 which 

In [2]:
!pip install scikit-surprise --no-build-isolation --no-deps
!pip install memory_profiler

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp312-cp312-linux_x86_64.whl size=2708558 sha256=e44fa3913df5b81e3b7f6c3d5e4c9f61a565f551d4318f3adc17467d8468ce09
  Stored in directory: /root/.cache/pip/wheels/75/fa/bc/739bc2cb1fbaab6061854e6cfbb81a0ae52c92a502a7fa454b
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4
Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


### Instalación de Librerías

In [3]:
import pandas as pd
import gdown
from surprise import Dataset, Reader, SVDpp, accuracy
from surprise.model_selection import train_test_split
import time
from memory_profiler import memory_usage
from collections import defaultdict

### Importación de los Datos

In [4]:
gdown.download(id='1eGDDR1wlvR99eoCZG2owChy2dhkPp4yx', output='training_ratings.csv', quiet=False)
gdown.download(id='1oHo9HLB6SzeqZs76FCkfQ1irSQepqp16', output='validation_ratings.csv', quiet=False)

# dataset mechanics
gdown.download(id='1cVGSLNVqxrAoKzeqxt_FfQ4Ggs9VvCDO', output='mechanics.csv', quiet=False)
df_mechanics = pd.read_csv('mechanics.csv')

df_train = pd.read_csv('training_ratings.csv')
df_val = pd.read_csv('validation_ratings.csv')

Downloading...
From (original): https://drive.google.com/uc?id=1eGDDR1wlvR99eoCZG2owChy2dhkPp4yx
From (redirected): https://drive.google.com/uc?id=1eGDDR1wlvR99eoCZG2owChy2dhkPp4yx&confirm=t&uuid=f3c8339b-d0b0-4ef9-a460-68585e9aeb05
To: /content/training_ratings.csv
100%|██████████| 205M/205M [00:02<00:00, 102MB/s]
Downloading...
From: https://drive.google.com/uc?id=1oHo9HLB6SzeqZs76FCkfQ1irSQepqp16
To: /content/validation_ratings.csv
100%|██████████| 64.4M/64.4M [00:00<00:00, 95.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1cVGSLNVqxrAoKzeqxt_FfQ4Ggs9VvCDO
To: /content/mechanics.csv
100%|██████████| 7.05M/7.05M [00:00<00:00, 45.5MB/s]


### Preprocesamiento de Datos

In [5]:
df_train.drop_duplicates(inplace=True, subset=['user', 'item'])
df_val.drop_duplicates(inplace=True, subset=['user', 'item'])

In [6]:
print(f"Tamaño original del training set: {len(df_train)}")

# se obtiene un sample debido a que hay muchos datos y se demora mucho
df_train_sample = df_train.sample(n=1000000, random_state=42)
print(f"Tamaño del nuevo training set (muestra): {len(df_train_sample)}")


Tamaño original del training set: 10200445
Tamaño del nuevo training set (muestra): 1000000


In [7]:
df_mechanics.set_index('BGGId', inplace=True)
print("Datos de mecánicas cargados y listos.")

# --- Calcular la popularidad de los ítems ---
item_popularity = df_train['item'].value_counts().to_dict()
total_interactions = len(df_train)

# Convertimos las cuentas en probabilidades para el cálculo de novedad
item_popularity_prob = {item_id: count / total_interactions for item_id, count in item_popularity.items()}
print(f"Popularidad calculada para {len(item_popularity)} ítems.")

Datos de mecánicas cargados y listos.
Popularidad calculada para 16748 ítems.


In [8]:
from sklearn.metrics.pairwise import cosine_similarity

def novelty_at_k(group, k, popularity_prob):
    """Calcula la Novedad@K para un solo usuario/grupo."""
    group = group.sort_values('score', ascending=False)
    topk_items = group.head(k)['itemID']

    novelty_scores = []
    for item_id in topk_items:
        # Usar una probabilidad pequeña si el ítem no se vio en el entrenamiento
        prob = popularity_prob.get(item_id, 1e-6)
        novelty_scores.append(-np.log2(prob))

    return np.mean(novelty_scores) if novelty_scores else 0.0

def diversity_at_k(group, k, mechanics_df):
    """Calcula la Diversidad@K (Intra-List Diversity) para un solo usuario/grupo."""
    group = group.sort_values('score', ascending=False)
    topk_items = group.head(k)['itemID'].tolist()

    # Filtra ítems para asegurar que existan en el dataframe de mecánicas
    topk_items = [item for item in topk_items if item in mechanics_df.index]

    if len(topk_items) < 2:
        return 0.0

    # Obtiene los vectores de mecánicas para los ítems recomendados
    item_vectors = mechanics_df.loc[topk_items].values

    # Calcula la disimilitud del coseno (1 - similitud)
    dissimilarity_sum = 0
    num_pairs = 0
    for i in range(len(item_vectors)):
        for j in range(i + 1, len(item_vectors)):
            sim = cosine_similarity([item_vectors[i]], [item_vectors[j]])[0][0]
            dissimilarity_sum += (1 - sim)
            num_pairs += 1

    return dissimilarity_sum / num_pairs if num_pairs > 0 else 0.0

### Configuración de Experimentos

In [9]:
reader = Reader(rating_scale=(0, 10))
train_data = Dataset.load_from_df(df_train_sample[['user', 'item', 'rating']], reader)
trainset = train_data.build_full_trainset()

validation_tuples = [tuple(x) for x in df_val[['user', 'item', 'rating']].to_numpy()]

### Predicción de ratings y top N



In [10]:
def evaluar_svdpp(trainset, testset, n_factors, n_epochs, lr_all, reg_all, verbose):
    """
    Entrena, predice y evalúa el modelo SVD++, retornando el RMSE y MAE.
    """
    algo = SVDpp(n_factors=n_factors, n_epochs=n_epochs, lr_all=lr_all, reg_all=reg_all, verbose=verbose)

    algo.fit(trainset)

    predictions = algo.test(testset)

    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)

    return rmse, mae, predictions

start_time = time.time()

rmse_resultado, mae_resultado, predictions = evaluar_svdpp(trainset, validation_tuples, 50, 20, 0.01, 0.02, True)

end_time = time.time()
elapsed_time = end_time - start_time

memoria_usada = memory_usage((evaluar_svdpp, (trainset, validation_tuples, 50, 20, 0.01, 0.02, False)))
memoria_max = max(memoria_usada) - min(memoria_usada)

print(f"\n--- Resumen de Rendimiento (SVD++) ---")
print(f"RMSE final: {rmse_resultado:.4f}")
print(f"MAE final: {mae_resultado:.4f}")
print(f"Tiempo de ejecución: {elapsed_time:.2f} segundos")
print(f"Memoria utilizada: {memoria_max:.2f} MB")

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
RMSE: 1.3031
MAE:  0.9908
RMSE: 1.3032
MAE:  0.9911

--- Resumen de Rendimiento (SVD++) ---
RMSE final: 1.3031
MAE final: 0.9908
Tiempo de ejecución: 201.93 segundos
Memoria utilizada: 1506.14 MB


In [11]:
def get_top_n(predictions, n=10):
    """Devuelve las N-mejores recomendaciones para cada usuario."""
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n_predictions = get_top_n(predictions, n=10)

user_id_ejemplo = df_val['user'].iloc[0]
if user_id_ejemplo in top_n_predictions:
    print(f"\nTop 10 predicciones para el usuario '{user_id_ejemplo}':")
    for iid, est_rating in top_n_predictions[user_id_ejemplo]:
        print(f"  Ítem ID: {iid}, Rating Predicho: {est_rating:.2f}")


Top 10 predicciones para el usuario ' beastvol':
  Ítem ID: 3076, Rating Predicho: 7.56


Grupales:

In [12]:
import numpy as np
from sklearn.metrics import ndcg_score

print("Convirtiendo predicciones de Surprise a DataFrame...")
preds_df = pd.DataFrame(predictions, columns=['userID', 'itemID', 'rating', 'score', 'details'])

# Un rating >= 7 se considera una interacción positiva (relevante).
preds_df['label'] = (preds_df['rating'] >= 7).astype(int)

df_eval = preds_df[['userID', 'itemID', 'label', 'score']]
print("DataFrame de evaluación creado con éxito.")
print(df_eval.head())



print("\nCreando grupos sintéticos...")
user_counts = df_eval['userID'].value_counts()
# Nos quedamos con usuarios que tienen al menos 10 ratings para formar grupos más consistentes
valid_users = user_counts[user_counts >= 10].index.tolist()

np.random.seed(42)
num_groups = 1000
group_size = 4
groups = [np.random.choice(valid_users, group_size, replace=False) for _ in range(num_groups)]

print(f"Se crearon {len(groups)} grupos sintéticos de tamaño {group_size}.")
print("Ejemplo de un grupo:", groups[0])


# --- PASO 3: Agregar Predicciones para cada Grupo ---

print("\nAgregando predicciones para cada grupo...")
all_group_recs = []

for group_id, user_ids in enumerate(groups):
    group_predictions = df_eval[df_eval['userID'].isin(user_ids)]

    item_scores_per_group = group_predictions.groupby('itemID').agg(
        # Estrategias de agregación
        avg_score=('score', 'mean'),
        min_score=('score', 'min'),
        max_score=('score', 'max'),

        # Ground Truth del grupo: Un ítem es relevante si AL MENOS UN miembro le dio un rating positivo.
        group_label=('label', lambda x: 1 if x.sum() > 0 else 0)
    ).reset_index()

    item_scores_per_group['group_id'] = group_id
    all_group_recs.append(item_scores_per_group)

df_group_eval = pd.concat(all_group_recs, ignore_index=True)
print("Agregación completada.")
print(df_group_eval.head())


# --- PASO 4: Funciones de Métrica y Evaluación Final ---

def precision_recall_at_k(group, k):
    """Calcula Precision@K y Recall@K para un solo grupo."""
    group = group.sort_values('score', ascending=False)
    topk = group.head(k)
    hits = topk['label'].sum()
    total_relevant = group['label'].sum()
    precision = hits / k if k > 0 else 0
    recall = hits / total_relevant if total_relevant > 0 else 0
    return precision, recall

def ndcg_at_k(group, k):
    """Calcula nDCG@K para un solo grupo."""
    if group['label'].sum() == 0:
        return 0.0
    ranked_group = group.sort_values('score', ascending=False).head(k)
    if len(ranked_group) < 2:
        return 0.0
    true_relevance = np.asarray([ranked_group['label'].values])
    predicted_scores = np.asarray([ranked_group['score'].values])
    return ndcg_score(true_relevance, predicted_scores)

strategies = {
    'Average': 'avg_score',
    'Least Misery': 'min_score',
    'Most Pleasure': 'max_score'
}

group_results = []
K_values = [10]
for strategy_name, score_column in strategies.items():
    print(f"\nEvaluando estrategia: {strategy_name}...")
    df_strategy_eval = df_group_eval[['group_id', 'itemID', 'group_label']].copy()
    df_strategy_eval.rename(columns={'group_label': 'label'}, inplace=True)
    df_strategy_eval['score'] = df_group_eval[score_column]

    grouped_strategy = df_strategy_eval.groupby('group_id')

    for k in K_values:
        metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
        avg_precision = np.mean([m[0] for m in metrics])
        avg_recall = np.mean([m[1] for m in metrics])
        ndcg_scores = grouped_strategy.apply(lambda x: ndcg_at_k(x, k))
        avg_ndcg = np.mean(ndcg_scores)
        group_results.append({
            'Strategy': strategy_name,
            'K': k,
            'Precision@K': avg_precision,
            'Recall@K': avg_recall,
            'nDCG@K': avg_ndcg
        })

group_results_df = pd.DataFrame(group_results)

print("\n--- Resultados de Evaluación Grupal para SVD++ ---")
print(group_results_df)

Convirtiendo predicciones de Surprise a DataFrame...
DataFrame de evaluación creado con éxito.
      userID  itemID  label     score
0   beastvol    3076      1  7.562782
1    mycroft    3284      0  7.679667
2    mycroft    5336      1  7.487979
3   -=Yod@=-  264295      1  7.056427
4   -=Yod@=-  167791      1  7.101579

Creando grupos sintéticos...
Se crearon 1000 grupos sintéticos de tamaño 4.
Ejemplo de un grupo: ['dojhar' 'Xellir' 'Novastinger' 'jasshill']

Agregando predicciones para cada grupo...
Agregación completada.
   itemID  avg_score  min_score  max_score  group_label  group_id
0       5   7.111313   7.111313   7.111313            1         0
1     112   7.192929   7.192929   7.192929            1         0
2     199   6.101544   6.101544   6.101544            0         0
3     220   6.692581   6.692581   6.692581            0         0
4     256   6.280980   6.280980   6.280980            0         0

Evaluando estrategia: Average...


  metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
  ndcg_scores = grouped_strategy.apply(lambda x: ndcg_at_k(x, k))



Evaluando estrategia: Least Misery...


  metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
  ndcg_scores = grouped_strategy.apply(lambda x: ndcg_at_k(x, k))



Evaluando estrategia: Most Pleasure...


  metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))



--- Resultados de Evaluación Grupal para SVD++ ---
        Strategy   K  Precision@K  Recall@K    nDCG@K
0        Average  10       0.9022  0.143456  0.965680
1   Least Misery  10       0.9007  0.143193  0.965407
2  Most Pleasure  10       0.9090  0.144599  0.967759


  ndcg_scores = grouped_strategy.apply(lambda x: ndcg_at_k(x, k))


Individuales:

In [13]:
from sklearn.metrics import ndcg_score
import numpy as np

preds_df = pd.DataFrame(predictions, columns=['userID', 'itemID', 'rating', 'score', 'details'])
preds_df['label'] = (preds_df['rating'] >= 7).astype(int)
df_eval = preds_df[['userID', 'itemID', 'label', 'score']]

# --- Funciones de métrica (las mismas de tu sección grupal) ---
def precision_recall_at_k(group, k):
    group = group.sort_values('score', ascending=False)
    topk = group.head(k)
    hits = topk['label'].sum()
    total_relevant = group['label'].sum()
    precision = hits / k if k > 0 else 0
    recall = hits / total_relevant if total_relevant > 0 else 0
    return precision, recall

def ndcg_at_k(group, k):
    if group['label'].sum() == 0: return 0.0
    ranked_group = group.sort_values('score', ascending=False).head(k)
    # nDCG requiere al menos 2 ítems para ser calculado
    if len(ranked_group) < 2: return 0.0
    true_relevance = np.asarray([ranked_group['label'].values])
    predicted_scores = np.asarray([ranked_group['score'].values])
    return ndcg_score(true_relevance, predicted_scores)

# --- Evaluación ---
K_values = [10]
individual_results = []
print("Calculando métricas individuales para SVD++...")

grouped_users = df_eval.groupby('userID')

for k in K_values:
    # Métricas de precisión y ranking
    metrics = grouped_users.apply(lambda x: precision_recall_at_k(x, k))
    avg_precision = np.mean([m[0] for m in metrics])
    avg_recall = np.mean([m[1] for m in metrics])
    avg_ndcg = grouped_users.apply(lambda x: ndcg_at_k(x, k)).mean()

    # Nuevas métricas de Novedad y Diversidad
    avg_novelty = grouped_users.apply(lambda x: novelty_at_k(x, k, item_popularity_prob)).mean()
    avg_diversity = grouped_users.apply(lambda x: diversity_at_k(x, k, df_mechanics)).mean()

    individual_results.append({
        'K': k,
        'Precision@K': avg_precision,
        'Recall@K': avg_recall,
        'nDCG@K': avg_ndcg,
        'Novelty@K': avg_novelty,
        'Diversity@K': avg_diversity
    })

individual_results_df = pd.DataFrame(individual_results)
print("\n--- Resultados de Evaluación Individual para SVD++ ---")
print(individual_results_df)

Calculando métricas individuales para SVD++...


  metrics = grouped_users.apply(lambda x: precision_recall_at_k(x, k))
  avg_ndcg = grouped_users.apply(lambda x: ndcg_at_k(x, k)).mean()
  avg_novelty = grouped_users.apply(lambda x: novelty_at_k(x, k, item_popularity_prob)).mean()



--- Resultados de Evaluación Individual para SVD++ ---
    K  Precision@K  Recall@K    nDCG@K  Novelty@K  Diversity@K
0  10     0.468084  0.835879  0.829852  10.715596      0.74006


  avg_diversity = grouped_users.apply(lambda x: diversity_at_k(x, k, df_mechanics)).mean()


Grupales

In [14]:
import numpy as np
from sklearn.metrics import ndcg_score

# --- PASO 1: Crear DataFrame de Evaluación ---
print("Convirtiendo predicciones de Surprise a DataFrame...")
preds_df = pd.DataFrame(predictions, columns=['userID', 'itemID', 'rating', 'score', 'details'])
preds_df['label'] = (preds_df['rating'] >= 7).astype(int)
df_eval = preds_df[['userID', 'itemID', 'label', 'score']]
print("DataFrame de evaluación creado con éxito.")

# --- PASO 2: Crear Grupos Sintéticos ---
print("\nCreando grupos sintéticos...")
user_counts = df_eval['userID'].value_counts()
valid_users = user_counts[user_counts >= 10].index.tolist()
np.random.seed(42)
num_groups = 1000
group_size = 4
groups = [np.random.choice(valid_users, group_size, replace=False) for _ in range(num_groups)]
print(f"Se crearon {len(groups)} grupos sintéticos de tamaño {group_size}.")

# --- PASO 3: Agregar Predicciones para cada Grupo ---
print("\nAgregando predicciones para cada grupo...")
all_group_recs = []
for group_id, user_ids in enumerate(groups):
    group_predictions = df_eval[df_eval['userID'].isin(user_ids)]
    item_scores_per_group = group_predictions.groupby('itemID').agg(
        avg_score=('score', 'mean'),
        min_score=('score', 'min'),
        max_score=('score', 'max'),
        group_label=('label', lambda x: 1 if x.sum() > 0 else 0)
    ).reset_index()
    item_scores_per_group['group_id'] = group_id
    all_group_recs.append(item_scores_per_group)

df_group_eval = pd.concat(all_group_recs, ignore_index=True)
print("Agregación completada.")

# --- PASO 4: Evaluación de Estrategias con Todas las Métricas ---
strategies = {
    'Average': 'avg_score',
    'Least Misery': 'min_score',
    'Most Pleasure': 'max_score'
}

group_results = []
K_values = [10]

for strategy_name, score_column in strategies.items():
    print(f"\nEvaluando estrategia: {strategy_name}...")
    df_strategy_eval = df_group_eval[['group_id', 'itemID', 'group_label']].copy()
    df_strategy_eval.rename(columns={'group_label': 'label'}, inplace=True)
    df_strategy_eval['score'] = df_group_eval[score_column]

    grouped_strategy = df_strategy_eval.groupby('group_id')

    for k in K_values:
        # Métricas existentes
        metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
        avg_precision = np.mean([m[0] for m in metrics])
        avg_recall = np.mean([m[1] for m in metrics])
        avg_ndcg = grouped_strategy.apply(lambda x: ndcg_at_k(x, k)).mean()

        # Nuevas métricas
        avg_novelty = grouped_strategy.apply(lambda x: novelty_at_k(x, k, item_popularity_prob)).mean()
        avg_diversity = grouped_strategy.apply(lambda x: diversity_at_k(x, k, df_mechanics)).mean()

        group_results.append({
            'Strategy': strategy_name,
            'K': k,
            'Precision@K': avg_precision,
            'Recall@K': avg_recall,
            'nDCG@K': avg_ndcg,
            'Novelty@K': avg_novelty,
            'Diversity@K': avg_diversity
        })

group_results_df = pd.DataFrame(group_results)

print("\n--- Resultados de Evaluación Grupal para SVD++ ---")
print(group_results_df)

Convirtiendo predicciones de Surprise a DataFrame...
DataFrame de evaluación creado con éxito.

Creando grupos sintéticos...
Se crearon 1000 grupos sintéticos de tamaño 4.

Agregando predicciones para cada grupo...
Agregación completada.

Evaluando estrategia: Average...


  metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
  avg_ndcg = grouped_strategy.apply(lambda x: ndcg_at_k(x, k)).mean()
  avg_novelty = grouped_strategy.apply(lambda x: novelty_at_k(x, k, item_popularity_prob)).mean()
  avg_diversity = grouped_strategy.apply(lambda x: diversity_at_k(x, k, df_mechanics)).mean()



Evaluando estrategia: Least Misery...


  metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
  avg_ndcg = grouped_strategy.apply(lambda x: ndcg_at_k(x, k)).mean()
  avg_novelty = grouped_strategy.apply(lambda x: novelty_at_k(x, k, item_popularity_prob)).mean()
  avg_diversity = grouped_strategy.apply(lambda x: diversity_at_k(x, k, df_mechanics)).mean()



Evaluando estrategia: Most Pleasure...


  metrics = grouped_strategy.apply(lambda x: precision_recall_at_k(x, k))
  avg_ndcg = grouped_strategy.apply(lambda x: ndcg_at_k(x, k)).mean()
  avg_novelty = grouped_strategy.apply(lambda x: novelty_at_k(x, k, item_popularity_prob)).mean()



--- Resultados de Evaluación Grupal para SVD++ ---
        Strategy   K  Precision@K  Recall@K    nDCG@K  Novelty@K  Diversity@K
0        Average  10       0.9022  0.143456  0.965680  10.466366     0.839938
1   Least Misery  10       0.9007  0.143193  0.965407  10.487870     0.840269
2  Most Pleasure  10       0.9090  0.144599  0.967759  10.385792     0.839156


  avg_diversity = grouped_strategy.apply(lambda x: diversity_at_k(x, k, df_mechanics)).mean()
