# Practica 4: Agrupamiento de jugadores en videojuegos

Nombres: Ignacio Tejero, Miguel Angle Lobo, Diego Viñals

In [27]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances_argmin_min
import matplotlib.pyplot as plt
from sklearn.metrics import pairwise_distances

import warnings

# Desactivar los warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('./Dataset_lab04/PUBG.csv')
df

Unnamed: 0,player_name,tracker_id,solo_KillDeathRatio,solo_WinRatio,solo_TimeSurvived,solo_RoundsPlayed,solo_Wins,solo_WinTop10Ratio,solo_Top10s,solo_Top10Ratio,...,squad_RideDistance,squad_MoveDistance,squad_AvgWalkDistance,squad_AvgRideDistance,squad_LongestKill,squad_Heals,squad_Revives,squad_Boosts,squad_DamageDealt,squad_DBNOs
0,BreakNeck,4405,3.14,17.65,18469.14,17,3,0.83,4,23.5,...,3751590.99,5194786.58,2626.97,4372.64,536.98,2186,234,1884,242132.73,1448
1,Blackwalk,8199,4.41,18.18,33014.86,33,6,0.36,11,33.3,...,4295917.30,6051783.67,2422.48,6009.73,734.85,2041,276,2340,269795.75,1724
2,mercedes_benz,4454,3.60,0.00,4330.44,5,0,0.00,1,20.0,...,3935265.63,5589608.74,1871.89,3011.87,725.44,1766,210,2193,292977.07,1897
3,DORA,7729,14.00,50.00,13421.82,8,4,0.67,6,75.0,...,2738998.00,3796916.00,2154.62,5578.41,587.28,1214,142,1252,181106.90,1057
4,n2tstar,0,10.50,33.33,9841.04,6,2,0.40,5,83.3,...,2347295.00,3220260.00,2098.47,5642.54,546.10,1245,120,923,160029.80,1077
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,jungwook,11690,3.37,17.33,581263.40,427,74,0.38,195,45.7,...,585880.94,1010687.19,2015.37,2582.05,796.52,520,71,513,69526.76,456
95,Sitton,6042,6.00,27.27,15639.28,11,3,0.60,5,45.5,...,1370085.00,2075036.00,2049.28,3982.80,511.08,823,109,891,97551.31,573
96,PANDATV-TYF,22188,3.05,9.84,243659.10,193,19,0.25,77,39.9,...,1079497.60,1870113.40,1750.42,2107.95,560.26,998,166,741,135111.34,856
97,Great_Cornholio,2260,0.00,0.00,561.04,1,0,0.00,0,0.0,...,1280647.63,2002011.90,1848.11,1898.83,730.65,1088,74,573,119974.01,754


In [28]:

# Seleccionar columnas relevantes para el clustering
columns = ['solo_KillDeathRatio', 'solo_WinRatio', 'solo_TimeSurvived', 'solo_RoundsPlayed']

# Convertir columnas relevantes a tipo numérico
df[columns] = df[columns].apply(pd.to_numeric, errors='coerce')

# Eliminar filas con valores no numéricos
df = df.dropna(subset=columns)

# Preprocesamiento de datos
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[columns])

# Función para calcular las distancias y asignar las etiquetas
def assign_labels(data, centroids):
    distances = pairwise_distances(data, centroids)
    labels = np.argmin(distances, axis=1)
    return labels

# Función para ejecutar el algoritmo de k-means con diferentes configuraciones
def run_kmeans(data, n_clusters, metric):
    kmeans = KMeans(n_clusters=n_clusters, init='random', random_state=42, algorithm='full')
    kmeans.fit(data)
    kmeans.labels_ = assign_labels(data, kmeans.cluster_centers_)
    kmeans.inertia_ = kmeans.inertia_
    return kmeans

# Configuraciones a probar
num_clusters = [2, 3, 4, 5]
metrics = ['euclidean', 'cosine']

# Tabla para almacenar los resultados
results = pd.DataFrame(columns=['Configuración', 'Iteraciones'])

# Ejecutar el algoritmo para diferentes configuraciones
for n_clusters in num_clusters:
    for metric in metrics:
        kmeans = run_kmeans(df_scaled, n_clusters, metric)
        results = results.append({'Configuración': f"{n_clusters} grupos, {metric} distancia",
                                  'Iteraciones': kmeans.n_iter_}, ignore_index=True)

# Mostrar los resultados
print(results)



                   Configuración Iteraciones
0  2 grupos, euclidean distancia           3
1     2 grupos, cosine distancia           3
2  3 grupos, euclidean distancia           5
3     3 grupos, cosine distancia           5
4  4 grupos, euclidean distancia           7
5     4 grupos, cosine distancia           7
6  5 grupos, euclidean distancia           6
7     5 grupos, cosine distancia           6


In [29]:
from sklearn.cluster import KMeans
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the data
datos = pd.read_csv('./Dataset_lab04/PUBG.csv')

# Preprocess the data
# Step 1: Remove irrelevant columns if needed


# Step 2: Encode categorical variables if needed
datos = pd.get_dummies(datos)  # Performs one-hot encoding on categorical columns

# Step 3: Handle missing values if needed
datos = datos.dropna()  # Drop rows with missing values
# or
datos = datos.fillna(datos.mean())  # Impute missing values with column means

# Scale the data
scaler = StandardScaler()
datos_scaled = scaler.fit_transform(datos)

# Number of clusters
num_grupos = 3

# Create the KMeans model
kmeans = KMeans(n_clusters=num_grupos)

# Fit the model to the scaled data
kmeans.fit(datos_scaled)

# Get the assigned cluster labels for each player
etiquetas = kmeans.labels_

# Get the cluster centers
centroides = kmeans.cluster_centers_

# Get the number of iterations required to converge
num_iteraciones = kmeans.n_iter_

# Print the results
for i in range(num_grupos):
    jugadores_grupo_i = datos[etiquetas == i]
    print(f"Grupo {i+1}:")
    print(jugadores_grupo_i)
    print("")

print(f"Número de iteraciones: {num_iteraciones}")


Grupo 1:
    tracker_id  solo_KillDeathRatio  solo_WinRatio  solo_TimeSurvived  \
10      142394                 3.12           6.03          106330.23   
11       16256                 1.20           1.19          232995.54   
12      142401                 4.46          10.34           91380.40   
13       46547                 2.60           5.56           71910.46   
19        6455                 2.49           4.44           45916.37   
20       11729                 4.25          12.73           62763.42   
21      196013                 2.54           8.89           43124.07   
23        8040                 3.58          14.29           18488.62   
26        9142                 2.12           5.19           87514.41   
28      212886                 2.00           5.36           47966.09   
31      108625                 2.79           7.69           28584.16   
35      162887                 3.55           5.56           41927.88   
37      196291                 1.83       