In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [44]:
df_Abyss = pd.read_csv('../data/team_stats/teams_Abyss.csv')
df_Ascent = pd.read_csv('../data/team_stats/teams_Ascent.csv')
df_Bind = pd.read_csv('../data/team_stats/teams_Bind.csv')
df_Breeze = pd.read_csv('../data/team_stats/teams_Breeze.csv')
df_Haven = pd.read_csv('../data/team_stats/teams_Haven.csv')
df_Icebox = pd.read_csv('../data/team_stats/teams_Icebox.csv')
df_Lotus = pd.read_csv('../data/team_stats/teams_Lotus.csv')
df_Split = pd.read_csv('../data/team_stats/teams_Split.csv')
df_Sunset = pd.read_csv('../data/team_stats/teams_Sunset.csv')

data = pd.concat([df_Abyss, df_Ascent, df_Bind, df_Breeze, df_Haven, df_Icebox, df_Lotus, df_Split, df_Sunset])

In [45]:
team_code = data['Team'].astype('category').cat.codes
map_code = data['Map'].astype('category').cat.codes

data.insert(1, 'team_code', team_code)
data.insert(3, 'map_code', map_code)
data.head(3)

Unnamed: 0,Team,team_code,Map,map_code,times_played,maps_won,maps_lost,map_win_rate,rounds_won,rounds_lost,avg_round_win_per_game,avg_round_lost_per_game,map_pick_rate,map_ban_rate
0,FNATIC,3,Abyss,0,0,0,0,0.0,0,0,0.0,0.0,0.0,4.65
1,Bilibili Gaming,0,Abyss,0,0,0,0,0.0,0,0,0.0,0.0,0.0,2.94
2,DRX,1,Abyss,0,2,1,1,50.0,20,17,10.0,8.5,9.52,0.0


In [46]:
# data[data['Map'] == 'Bind']

In [47]:
fnc_bind = data[(data['Team'] == 'FNATIC') & (data['Map'] == 'Bind')]

print(f'Las estadísticas de FNATIC en Bind son: \n' + 
    f'Veces jugado: {fnc_bind["times_played"][0]}\n' + 
    f'Victorias: {fnc_bind["maps_won"][0]}\n' +
    f'Derrotas: {fnc_bind["maps_lost"][0]}\n' +
    f'Porcentaje de victorias: {fnc_bind["map_win_rate"][0]}%\n' +
    f'Rondas Ganadas: {fnc_bind["rounds_won"][0]}\n' +
    f'Rondas Perdidas: {fnc_bind["rounds_lost"][0]}\n' +
    f'Promedio de rondas ganadas por partido: {fnc_bind["avg_round_win_per_game"][0]}\n' + 
    f'Promedio de rondas perdidas por partido: {fnc_bind["avg_round_lost_per_game"][0]}\n' +
    f'Porcentaje de selección de mapa: {fnc_bind["map_pick_rate"][0]}%\n' +
    f'Porcentaje de baneo de mapa: {fnc_bind["map_ban_rate"][0]}%\n')


Las estadísticas de FNATIC en Bind son: 
Veces jugado: 9
Victorias: 5
Derrotas: 4
Porcentaje de victorias: 55.56%
Rondas Ganadas: 102
Rondas Perdidas: 88
Promedio de rondas ganadas por partido: 11.33
Promedio de rondas perdidas por partido: 9.78
Porcentaje de selección de mapa: 15.38%
Porcentaje de baneo de mapa: 11.63%



In [48]:
th_bind = data[(data['Team'] == 'Team Heretics') & (data['Map'] == 'Bind')]
th_bind

print(f'Las estadísticas de Heretics en Bind son: \n' + 
    f'Veces jugado: {th_bind["times_played"]}\n' + 
    f'Victorias: {th_bind["maps_won"]}\n' +
    f'Derrotas: {th_bind["maps_lost"]}\n' +
    f'Porcentaje de victorias: {th_bind["map_win_rate"]}%\n' +
    f'Rondas Ganadas: {th_bind["rounds_won"]}\n' +
    f'Rondas Perdidas: {th_bind["rounds_lost"]}\n' +
    f'Promedio de rondas ganadas por partido: {th_bind["avg_round_win_per_game"]}\n' + 
    f'Promedio de rondas perdidas por partido: {th_bind["avg_round_lost_per_game"]}\n' +
    f'Porcentaje de selección de mapa: {th_bind["map_pick_rate"]}%\n' +
    f'Porcentaje de baneo de mapa: {th_bind["map_ban_rate"]}%\n')

Las estadísticas de Heretics en Bind son: 
Veces jugado: 7    13
Name: times_played, dtype: int64
Victorias: 7    10
Name: maps_won, dtype: int64
Derrotas: 7    3
Name: maps_lost, dtype: int64
Porcentaje de victorias: 7    76.92
Name: map_win_rate, dtype: float64%
Rondas Ganadas: 7    163
Name: rounds_won, dtype: int64
Rondas Perdidas: 7    124
Name: rounds_lost, dtype: int64
Promedio de rondas ganadas por partido: 7    12.54
Name: avg_round_win_per_game, dtype: float64
Promedio de rondas perdidas por partido: 7    9.54
Name: avg_round_lost_per_game, dtype: float64
Porcentaje de selección de mapa: 7    5.41
Name: map_pick_rate, dtype: float64%
Porcentaje de baneo de mapa: 7    8.47
Name: map_ban_rate, dtype: float64%



In [49]:
# Preprocesamiento de datos
data['win'] = (data['maps_won'] > data['maps_lost']).astype(int)

# Seleccionar características relevantes
features = ['times_played', 'map_win_rate', 'rounds_won', 'rounds_lost', 
            'avg_round_win_per_game', 'avg_round_lost_per_game', 
            'map_pick_rate', 'map_ban_rate']

X = data[features]
y = data['win']

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizar las características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [50]:
# Definir el modelo
model = LogisticRegression()

# Definir el rango de hiperparámetros para ajustar
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],  # Inverso de la regularización
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],  # Algoritmos de optimización
    'max_iter': [100, 200, 300, 400, 500]  # Número máximo de iteraciones
}

In [51]:
# Configurar GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

# Ejecutar GridSearchCV
grid_search.fit(X_train_scaled, y_train)



In [52]:
# Ver los mejores hiperparámetros encontrados por GridSearchCV
print("Best Hyperparameters:", grid_search.best_params_)

# Evaluar el modelo en el conjunto de prueba
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Test Set Accuracy: {accuracy:.2f}")

Best Hyperparameters: {'C': 1000, 'max_iter': 100, 'solver': 'newton-cg'}
Test Set Accuracy: 100.00


In [53]:
# Ejemplo de uso
team_name = 'Team Heretics'  # Nombre del equipo
map_name = 'Bind'  # Nombre del mapa

In [54]:
# Función para predecir la probabilidad de victoria de un equipo en un mapa
def predict_win_probability(team_name, map_name, df, model, scaler):
    team_stats = df[(df['Team'] == team_name) & (df['Map'] == map_name)]
    if team_stats.empty:
        return None
    team_stats = team_stats[features].values[0]
    team_stats_scaled = scaler.transform([team_stats])
    win_probability = model.predict_proba(team_stats_scaled)[:, 1][0]
    win_probability = win_probability * 100
    return win_probability


win_probability = predict_win_probability(team_name, map_name, data, best_model, scaler)
if win_probability is not None:
    print(f"Probability of {team_name} winning on {map_name}: {win_probability:.2f}%")
else:
    print(f"No data available for {team_name} on {map_name}")


Probability of Team Heretics winning on Bind: 100.00%




In [55]:
accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Test Set Accuracy: {accuracy:.2f} %")

Test Set Accuracy: 100.00 %


In [56]:
# import matplotlib.pyplot as plt
# import numpy as np
# import seaborn as sns

# # Obtener las predicciones del modelo en el conjunto de prueba
# y_pred_prob = best_model.predict_proba(X_test_scaled)[:, 1]

# # Crear un DataFrame para facilitar la visualización
# results_df = pd.DataFrame({'Real': y_test, 'Predicted': y_pred_prob})

# # Crear el gráfico de barras agrupadas
# plt.figure(figsize=(14, 7))
# bar_width = 0.35
# index = np.arange(len(y_test))

# plt.bar(index, y_test, bar_width, label='Real')
# plt.bar(index + bar_width, y_pred_prob, bar_width, label='Predicted')

# plt.xlabel('Samples')
# plt.ylabel('Values')
# plt.title('Comparison of Real vs Predicted Values')
# plt.legend()
# plt.show()