In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 读取数据
df = pd.read_csv("sample_data/games.csv")

# 1. 游戏时长分析
game_durations = df["gameDuration"] / 60  # 转换为分钟
mean_duration = game_durations.mean()
median_duration = game_durations.median()
max_duration = game_durations.max()
min_duration = game_durations.min()

# 绘制游戏时长分布
plt.figure(figsize=(10, 5))
sns.histplot(game_durations, bins=30, kde=True, color="blue")
plt.axvline(mean_duration, color='r', linestyle='dashed', linewidth=2, label=f"Mean: {mean_duration:.2f} min")
plt.axvline(median_duration, color='g', linestyle='dashed', linewidth=2, label=f"Median: {median_duration:.2f} min")
plt.xlabel("Game Duration (minutes)")
plt.ylabel("Count")
plt.title("Game Duration Distribution")
plt.legend()
plt.show()

# 输出时长统计信息
print(f"Mean Duration: {mean_duration:.2f} min")
print(f"Median Duration: {median_duration:.2f} min")
print(f"Max Duration: {max_duration:.2f} min")
print(f"Min Duration: {min_duration:.2f} min")


: 

In [None]:
# 计算整体胜率
win_rate_team1 = (df["winner"] == 1).mean() * 100
win_rate_team2 = (df["winner"] == 2).mean() * 100

print(f"Team blues Win Rate: {win_rate_team1:.2f}%")
print(f"Team red Win Rate: {win_rate_team2:.2f}%")

# 按游戏时长分组，查看胜率
df["gameDuration_min"] = df["gameDuration"] / 60
df["duration_group"] = pd.cut(df["gameDuration_min"], bins=[0, 20, 30, 40, 50, 100], labels=["0-20", "20-30", "30-40", "40-50", "50+"])

win_rate_by_duration = df.groupby("duration_group")["winner"].value_counts(normalize=True).unstack() * 100

# 可视化不同时长的胜率
win_rate_by_duration.plot(kind="bar", stacked=True, figsize=(10, 5), colormap="coolwarm")
plt.xlabel("Game Duration (minutes)")
plt.ylabel("Win Rate (%)")
plt.title("Win Rate by Game Duration")
plt.legend(["Team 1", "Team 2"])
plt.show()


In [None]:
# 计算首个目标对胜率的影响
objectives = ["firstBlood", "firstTower", "firstInhibitor", "firstBaron", "firstDragon"]
win_rates = []

for obj in objectives:
    win_rate = (df[df[obj] == df["winner"]].shape[0] / df.shape[0]) * 100
    win_rates.append(win_rate)

# 创建 DataFrame 方便可视化
win_rate_df = pd.DataFrame({"Objective": objectives, "Win Rate (%)": win_rates})

# 绘制柱状图
plt.figure(figsize=(10, 5))
plt.bar(win_rate_df["Objective"], win_rate_df["Win Rate (%)"], color="skyblue")
plt.xlabel("First Objective Taken")
plt.ylabel("Win Rate (%)")
plt.title("Win Rate When Taking First Objective")
plt.ylim(40, 100)
plt.grid(axis="y", linestyle="--", alpha=0.7)

# 显示数值标签
for i, v in enumerate(win_rates):
    plt.text(i, v + 1, f"{v:.2f}%", ha="center", fontsize=10)

plt.show()

In [None]:
from collections import Counter

# 统计英雄出现次数
champ_columns = [f"t1_champ{i}id" for i in range(1, 6)] + [f"t2_champ{i}id" for i in range(1, 6)]
all_champs = df[champ_columns].values.flatten()
champ_counts = Counter(all_champs)

# 统计英雄禁用次数
ban_columns = [f"t1_ban{i}" for i in range(1, 6)] + [f"t2_ban{i}" for i in range(1, 6)]
all_bans = df[ban_columns].values.flatten()
ban_counts = Counter(all_bans)

In [None]:
# 假设这个字典包含了英雄 ID 对应的名称
champions_data = {
    412: "Thresh",
    18: "Tristana",
    67: "Yasuo",
    141: "Kayn",
    64: "Lee Sin",
    29: "Twitch",
    40: "Janna",
    236: "Jhin",
    202: "Jhin",
    222: "Jinx",
    157: "Yasuo",
    238: "Zed",
    31: "Ezreal",
    122: "Thresh",
    119: "Draven",
    53: "Blitzcrank",
    154: "Zac",
}


# 统计英雄出现次数
champ_columns = [f"t1_champ{i}id" for i in range(1, 6)] + [f"t2_champ{i}id" for i in range(1, 6)]
all_champs = df[champ_columns].values.flatten()
champ_counts = Counter(all_champs)

# 统计英雄禁用次数
ban_columns = [f"t1_ban{i}" for i in range(1, 6)] + [f"t2_ban{i}" for i in range(1, 6)]
all_bans = df[ban_columns].values.flatten()
ban_counts = Counter(all_bans)

# 输出最常用英雄
print("Most Picked Champions:")
for champ_id, count in champ_counts.most_common(10):
    # 使用字典映射直接获取英雄名称
    champ_name = champions_data.get(champ_id, "Unknown Champion")
    print(f"{champ_name}: Picked {count} times")

print("---------------------------------------")

# 输出最常禁用英雄
print("Most Banned Champions:")
for champ_id, count in ban_counts.most_common(10):
    # 使用字典映射直接获取英雄名称
    champ_name = champions_data.get(champ_id, "Unknown Champion")
    print(f"{champ_name}: Banned {count} times")


In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# 读取数据
df = pd.read_csv('sample_data/games.csv')

# 处理firstXXX特征为二进制
first_columns = ['firstBlood', 'firstTower', 'firstInhibitor', 'firstBaron', 'firstDragon', 'firstRiftHerald']
for col in first_columns:
    df[f'{col}_t1'] = (df[col] == 1).astype(int)
    df[f'{col}_t2'] = (df[col] == 2).astype(int)
df.drop(columns=first_columns, inplace=True)

# 处理英雄ID的频率编码
champ_columns = [f't{i}_champ{j}id' for i in [1,2] for j in range(1,6)]
all_champs = df[champ_columns].stack().reset_index(drop=True)
champ_freq = all_champs.value_counts(normalize=True).to_dict()
for col in champ_columns:
    df[col] = df[col].map(champ_freq).fillna(0)

# 处理召唤师技能频率编码
summoner_columns = [f't{i}_champ{j}_sum{k}' for i in [1,2] for j in range(1,6) for k in [1,2]]
all_sums = df[summoner_columns].stack().reset_index(drop=True)
sum_freq = all_sums.value_counts(normalize=True).to_dict()
for col in summoner_columns:
    df[col] = df[col].map(sum_freq).fillna(0)

# 处理禁用英雄频率编码
ban_columns = [f't{i}_ban{j}' for i in [1,2] for j in range(1,6)]
all_bans = df[ban_columns].stack().reset_index(drop=True)
ban_freq = all_bans.value_counts(normalize=True).to_dict()
for col in ban_columns:
    df[col] = df[col].map(ban_freq).fillna(0)

# 标准化数值特征
numeric_columns = [
    'gameDuration', 't1_towerKills', 't1_inhibitorKills', 't1_baronKills',
    't1_dragonKills', 't1_riftHeraldKills', 't2_towerKills',
    't2_inhibitorKills', 't2_baronKills', 't2_dragonKills', 't2_riftHeraldKills'
]
scaler = StandardScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# 构建特征矩阵（排除非特征列）
X = df.drop(columns=['gameId', 'creationTime', 'seasonId', 'winner'])
y = df['winner']  # 用于可视化着色

# PCA降维
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# 可视化
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, alpha=0.6, cmap='viridis')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(scatter, label='Winner (1: Team 1, 2: Team 2)')
plt.title('PCA Visualization of League of Legends Games')
plt.show()