### objective清洗v1.0
按队伍和事件进行统计，并进行归一化处理

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# 加载数据
file_path_objective = 'objective_table_train.csv'  # 原始 CSV 文件路径
objective_data = pd.read_csv(file_path_objective)

# 定义八种事件类型
event_types = [
    'CHAT_MESSAGE_BARRACKS_KILL',
    'CHAT_MESSAGE_FIRSTBLOOD',
    'CHAT_MESSAGE_DENIED_AEGIS',
    'CHAT_MESSAGE_TOWER_KILL',
    'CHAT_MESSAGE_AEGIS',
    'CHAT_MESSAGE_ROSHAN_KILL',
    'CHAT_MESSAGE_TOWER_DENY',
    'CHAT_MESSAGE_AEGIS_STOLEN'
]

# 初始化 Radiant 和 Dire 的统计列
radiant_cols = [f'radiant_{event}' for event in event_types]
dire_cols = [f'dire_{event}' for event in event_types]
result_cols = radiant_cols + dire_cols

# 创建统计结果的 DataFrame
objective_result = pd.DataFrame(columns=result_cols)

# 遍历每一局比赛的事件
for index, row in objective_data.iterrows():
    # 初始化当前局的统计数据
    stats = {col: 0 for col in result_cols}

    # 遍历每一列 (objective-*-type, objective-*-player_slot, objective-*-team)
    for i in range(1, 44):
        type_col = f'objective-{i}-type'
        slot_col = f'objective-{i}-player_slot'
        team_col = f'objective-{i}-team'

        # 确保事件类型列存在
        if type_col in row:
            event_type = row[type_col]
            player_slot = row[slot_col] if slot_col in row else None
            team = row[team_col] if team_col in row else None

            # 判断事件类型是否有效
            if event_type in event_types:
                if player_slot is not None and not pd.isna(player_slot):  # 如果 player_slot 存在
                    if 0 <= player_slot <= 4:  # Radiant
                        stats[f'radiant_{event_type}'] += 1
                    elif 128 <= player_slot <= 132:  # Dire
                        stats[f'dire_{event_type}'] += 1
                elif team is not None and not pd.isna(team):  # 如果 player_slot 为空，使用 team 判断
                    if team == 2:  # Radiant
                        stats[f'radiant_{event_type}'] += 1
                    elif team == 3:  # Dire
                        stats[f'dire_{event_type}'] += 1

    # 将统计数据添加到结果数据框
    objective_result = pd.concat([objective_result, pd.DataFrame([stats])], ignore_index=True)

# 填充缺失值为 0
objective_result.fillna(0, inplace=True)
objective_result.to_csv('objective_statistics.csv', index=False)
print("合并后的大表已保存为 'objective_statistics.csv'")


In [None]:
# 加载数据
file_path = 'objective_statistics.csv'
objective_data = pd.read_csv(file_path)

# 初始化归一化器
scaler = MinMaxScaler()

# 对所有数值列进行归一化
numerical_columns = objective_data.select_dtypes(include=['float64', 'int64']).columns
objective_data[numerical_columns] = scaler.fit_transform(objective_data[numerical_columns])

# 保存归一化后的数据
output_csv_path = 'objective_statistics_normalized.csv'
objective_data.to_csv(output_csv_path, index=False)
print(f"归一化后的数据已保存为: {output_csv_path}")

# 查看结果
print(objective_data.head())


### player清洗v2.0
删除obs、obs_log和sen_log  
计算obs_left_log和sen_left_log的长度

In [None]:
# 加载 player_table_deleted.csv 数据
file_path_player = 'players_table_train_newest.csv'
player_table_data = pd.read_csv(file_path_player)

# 定义需要处理的日志列
log_columns = [
    'obs_left_log',
    'obs_log',
    'sen_left_log',
    'sen_log',
    'obs'
]

for player_num in range(1, 11):
    # 构造该玩家的相关列名
    player_cols = [f'players-{player_num}-{col}' for col in log_columns]

    # 对每个列进行处理
    for col in player_cols:
        if col in player_table_data.columns:
            # 如果是 players-1-obs_log、players-1-sen_log 或 players-1-obs 列直接删除
            if (col.endswith('obs_log') or col.endswith('sen_log') or col.endswith('obs')) and 'left_log' not in col:
                player_table_data.drop(columns=[col], inplace=True)
            # 如果是日志列 (包含 'log'，但不是 obs_log 或 sen_log)
            elif 'log' in col:
                # 统计日志条目数：直接统计 `{` 的个数
                player_table_data[col] = player_table_data[col].apply(
                    lambda x: x.count('{') if isinstance(x, str) and x != '[]' else 0
                )




清除空值，将xp相加  
非数字类型映射(randomed列的TRUE和FALSE进行布尔映射)

In [None]:
# 填充空值为 0
player_table_data.fillna(0, inplace=True)

# 替换布尔值
player_table_data.replace({False: 0, True: 1}, inplace=True)

# 遍历每个玩家的 xp_reasons 列进行处理
for player_num in range(1, 11):
    # 构造当前玩家的 xp_reasons 列名
    reason_cols = [f'players-{player_num}-xp_reasons-{i}' for i in range(4)]
    new_col_name = f'players-{player_num}-xp'  # 新列名

    # 检查是否所有列存在
    if all(col in player_table_data.columns for col in reason_cols):
        # 计算总和
        player_table_data[new_col_name] = player_table_data[reason_cols].sum(axis=1)

        # 找到原始位置索引
        first_col_index = player_table_data.columns.get_loc(reason_cols[0])

        # 删除原始 xp_reasons 列
        player_table_data.drop(columns=reason_cols, inplace=True)

        # 调整列顺序，将新列插入到原始位置
        cols = player_table_data.columns.tolist()
        cols.insert(first_col_index, cols.pop(cols.index(new_col_name)))
        player_table_data = player_table_data[cols]

观察数据中0的比例

In [None]:
# 设置阈值
threshold = 0.8

# 计算每列中值为 0 的比例
zero_ratio = (player_table_data == 0).mean()

# 筛选出值为 0 比例超过阈值的列
columns_with_high_zero_ratio = zero_ratio[zero_ratio > threshold].index.tolist()

# 输出这些列的名称和比例
columns_with_high_zero_ratio_info = zero_ratio[zero_ratio > threshold]

# 查看列名及其对应的 0 的比例
print(f"值为 0 的比例超过 {threshold * 100}% 的列有 {len(columns_with_high_zero_ratio)} 列：")
print(columns_with_high_zero_ratio_info)


计算KDA,若Deaths=0，则直接k+a

In [None]:
for i in range(1, 11):  # 针对 player-1 到 player-10
    kills_col = f'players-{i}-kills'
    assists_col = f'players-{i}-assists'
    deaths_col = f'players-{i}-deaths'
    kda_col = f'players-{i}-KDA'

    # 确保特征列存在
    if kills_col in player_table_data.columns and assists_col in player_table_data.columns and deaths_col in player_table_data.columns:
        # 计算 KDA
        player_table_data[kda_col] = player_table_data.apply(
            lambda row: row[kills_col] + row[assists_col] if row[deaths_col] == 0
            else (row[kills_col] + row[assists_col]) / row[deaths_col], axis=1
        )
    else:
        print(f"缺少 {kills_col}, {assists_col}, 或 {deaths_col} 列，无法计算 KDA。")
# 检查结果
print("计算后的玩家数据：")
print(player_table_data.head())

计算两队kills总和

In [None]:
radiant_kills_cols = [f'players-{i}-kills' for i in range(1, 6)]
dire_kills_cols = [f'players-{i}-kills' for i in range(6, 11)]

# 检查列是否存在
missing_radiant_cols = [col for col in radiant_kills_cols if col not in player_table_data.columns]
missing_dire_cols = [col for col in dire_kills_cols if col not in player_table_data.columns]

if missing_radiant_cols:
    print(f"缺失 Radiant 的特征列: {missing_radiant_cols}")
if missing_dire_cols:
    print(f"缺失 Dire 的特征列: {missing_dire_cols}")

# 确保所有列存在后计算
if not missing_radiant_cols and not missing_dire_cols:
    # Radiant 队伍的 kills 总和
    player_table_data['Radiant-kills'] = player_table_data[radiant_kills_cols].sum(axis=1)

    # Dire 队伍的 kills 总和
    player_table_data['Dire-kills'] = player_table_data[dire_kills_cols].sum(axis=1)

    # 查看结果
    print("计算后的玩家数据：")
    print(player_table_data[['Radiant-kills', 'Dire-kills']].head())

检查空值

In [None]:
# 检查 player_table_data 中的空值
missing_values = player_table_data.isnull()

# 获取含有空值的具体位置
missing_locations = missing_values.stack()[missing_values.stack()].index.tolist()

# 输出空值的总数和具体位置
missing_count = len(missing_locations)
print(f"总共有 {missing_count} 个空值，具体位置如下：")
print(missing_locations[:10])  # 显示前10个位置，防止输出过长




看一下异常值的比例

In [None]:
import matplotlib.pyplot as plt

# 筛选出所有玩家的数据特征列并按特征类型进行分组
features = set([col.split('-')[-1] for col in player_table_data.columns if col.startswith('players-')])
anomalies_info_all_players = {}

# 遍历所有特征
for feature in features:
    # 收集所有玩家对应特征的数据
    feature_columns = [f'players-{player_num}-{feature}' for player_num in range(1, 11) if f'players-{player_num}-{feature}' in player_table_data.columns]
    combined_data = pd.concat([player_table_data[col].dropna() for col in feature_columns])

    # 确保数据为数值类型
    if combined_data.dtype in ['float64', 'int64']:
        # 计算 Q1 和 Q3
        Q1 = combined_data.quantile(0.25)
        Q3 = combined_data.quantile(0.75)
        IQR = Q3 - Q1

        # 计算上下界
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        # 计算异常值数量和比例
        total_values = len(combined_data)
        anomalies = ((combined_data < lower_bound) | (combined_data > upper_bound)).sum()
        anomalies_ratio = anomalies / total_values

        # 存储结果
        anomalies_info_all_players[f'player_{feature}'] = {
            'total_values': total_values,
            'anomalies': anomalies,
            'anomalies_ratio': anomalies_ratio,
            'lower_bound': lower_bound,
            'upper_bound': upper_bound,
            'Q1': Q1,
            'Q3': Q3
        }

# 转换为 DataFrame
anomalies_df_all_players = pd.DataFrame.from_dict(anomalies_info_all_players, orient='index')

# 显示异常值信息
print(anomalies_df_all_players.head())

# 可视化异常值比例
anomalies_df_all_players['anomalies_ratio'].sort_values(ascending=False).plot(
    kind='bar', figsize=(14, 7), title='Anomalies Ratio for All Features Across All Players')
plt.xlabel('Features')
plt.ylabel('Anomalies Ratio')
plt.tight_layout()
plt.show()

creeps_stacked sen_placed towers_killed异常值比例较高  
再看一下数据分布

In [None]:
import seaborn as sns
all_features = set(col.split('-')[-1] for col in player_table_data.columns if col.startswith('players-'))

# 绘制所有特征的箱线图
plt.figure(figsize=(20, len(all_features) * 2))
for i, feature in enumerate(sorted(all_features), start=1):
    plt.subplot((len(all_features) + 2) // 3, 3, i)  # 按 3 列布局，动态计算行数
    
    # 合并所有玩家的该特征数据
    feature_columns = [f'players-{player_num}-{feature}' for player_num in range(1, 11) if f'players-{player_num}-{feature}' in player_table_data.columns]
    combined_feature_data = pd.concat([player_table_data[col].dropna() for col in feature_columns])
    
    # 绘制箱线图
    sns.boxplot(x=combined_feature_data)
    plt.title(f'{feature} for All Players', fontsize=10)
    plt.xlabel(feature, fontsize=8)

plt.tight_layout()
plt.show()

尝试处理异常值过多的特征，由于基本都偏右，选择对数变换

In [None]:
import numpy as np
# 定义需要对数变换的特征列表
features_to_log_transform = [
    'deaths', 'denies', 'kills',
    'max_health', 'max_hero_hit', 'max_mana',
    'rune_pickups', 'nearby_creep_death_count'
]

# 对每个特征进行对数变换操作并覆盖原列
for feature in features_to_log_transform:
    for player_num in range(1, 11):
        column_name = f'players-{player_num}-{feature}'
        if column_name in player_table_data.columns:
            # 对列进行对数变换，直接覆盖原列
            player_table_data[column_name] = np.log1p(player_table_data[column_name])

# 对 'deaths' 和 'denies' 进行异常值截断
features_to_clip = ['deaths', 'denies']
for feature in features_to_clip:
    for player_num in range(1, 11):
        column_name = f'players-{player_num}-{feature}'
        if column_name in player_table_data.columns:
            # 计算上下界值
            Q1 = player_table_data[column_name].quantile(0.25)
            Q3 = player_table_data[column_name].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            
            # 截断异常值
            player_table_data[column_name] = player_table_data[column_name].clip(lower=lower_bound, upper=upper_bound)

# 绘制处理后的列的箱式图
plt.figure(figsize=(20, len(features_to_log_transform) * 2))
for i, feature in enumerate(features_to_log_transform, start=1):
    plt.subplot((len(features_to_log_transform) + 2) // 3, 3, i)  # 按 3 列布局，动态计算行数
    
    # 合并所有玩家的处理后列
    feature_columns = [
        f'players-{player_num}-{feature}' for player_num in range(1, 11) 
        if f'players-{player_num}-{feature}' in player_table_data.columns
    ]
    # 使用 ignore_index=True 避免索引冲突
    combined_feature_data = pd.concat([player_table_data[col].dropna() for col in feature_columns], ignore_index=True)
    
    # 绘制箱线图
    sns.boxplot(x=combined_feature_data)
    plt.title(f'Boxplot of {feature} for All Players', fontsize=10)
    plt.xlabel(feature, fontsize=8)

plt.tight_layout()
plt.show()


可以看到大多数对数变换后效果不错，对一些特征进行截断后保留，效果较差的不采用

下面进行归一化和标准化的处理，具体如下：  
除了id和randomed外，其余列都进行了变换

In [None]:
# 定义需要归一化和标准化的特征列表
features_to_normalize = [
    'deaths', 'denies', 'kills', 'rune_pickups', 'creeps_stacked', 'gold',
    'health', 'level', 'obs_left_log', 'observers_placed', 'sen_left_log',
    'sen_placed', 'towers_killed', 'xp','lh','assists','camps_stacked','teamflight_participation','KDA'
]

features_to_standardize = [
    'max_health', 'max_hero_hit', 'max_mana',
    'nearby_creep_death_count', 'stuns'
]
from sklearn.preprocessing import StandardScaler
# 初始化标准化和归一化器
scaler_normalize = MinMaxScaler()
scaler_standardize = StandardScaler()

# 对需要归一化的特征进行处理并替换原列
for feature in features_to_normalize:
    for player_num in range(1, 11):
        column_name = f'players-{player_num}-{feature}'
        if column_name in player_table_data.columns:
            # 归一化处理并替换原列
            player_table_data[[column_name]] = scaler_normalize.fit_transform(player_table_data[[column_name]])

# 对需要标准化的特征进行处理并替换原列
for feature in features_to_standardize:
    for player_num in range(1, 11):
        column_name = f'players-{player_num}-{feature}'
        if column_name in player_table_data.columns:
            # 标准化处理并替换原列
            player_table_data[[column_name]] = scaler_standardize.fit_transform(player_table_data[[column_name]])

# 保存更新后的数据为新的 CSV 文件
output_csv_path = 'player_table_data_processed.csv'
player_table_data.to_csv(output_csv_path, index=False)
print(f"处理后的数据已保存为: {output_csv_path}")

### Teamfight清洗v1.0
计算总团战时间，求出总团战时间在总游戏时间的占比  
提取并计算每支队伍在每局游戏中的damage、gold_delta、xp_delta的最大最小值及均值

In [None]:
# 文件路径
file_path_teamfights = 'teamfights_table_train.csv'
file_path_main_table = 'main_table_train.csv'

# 加载数据
teamfights_data = pd.read_csv(file_path_teamfights)
main_table_data = pd.read_csv(file_path_main_table)

# 初始化结果 DataFrame
processed_data = pd.DataFrame()

# 计算总团战时间
max_fights = 16  # 最多16次团战
teamfight_duration_cols = []
for i in range(1, max_fights + 1):
    start_col = f'teamfights-{i}-start'
    end_col = f'teamfights-{i}-end'
    if start_col in teamfights_data.columns and end_col in teamfights_data.columns:
        duration_col = f'teamfights-{i}-duration'
        teamfights_data[duration_col] = teamfights_data[end_col] - teamfights_data[start_col]
        teamfight_duration_cols.append(duration_col)

# 总团战时间
processed_data['total_teamfight_time'] = teamfights_data[teamfight_duration_cols].sum(axis=1)

# 定义需要处理的列
metrics = ['damage', 'gold_delta', 'xp_delta']
teams = {'Radiant': range(1, 6), 'Dire': range(6, 11)}

# 处理每种指标
for metric in metrics:
    for team, player_range in teams.items():
        team_max = []
        team_min = []
        team_avg = []
        for i in range(1, max_fights + 1):
            fight_cols = [
                f'teamfights-{i}-player-{player}-{metric}'
                for player in player_range
                if f'teamfights-{i}-player-{player}-{metric}' in teamfights_data.columns
            ]
            if fight_cols:
                # 计算每次团战的统计值
                team_max.append(teamfights_data[fight_cols].max(axis=1))
                team_min.append(teamfights_data[fight_cols].min(axis=1))
                team_avg.append(teamfights_data[fight_cols].mean(axis=1))

        # 计算整局的统计值
        if team_max:
            processed_data[f'{team}_{metric}_max'] = pd.concat(team_max, axis=1).max(axis=1)
            processed_data[f'{team}_{metric}_min'] = pd.concat(team_min, axis=1).min(axis=1)
            processed_data[f'{team}_{metric}_avg'] = pd.concat(team_avg, axis=1).mean(axis=1)

# 添加游戏时间和计算占比
if 'game_time' in main_table_data.columns:
    processed_data['game_time'] = main_table_data['game_time']
    processed_data['teamfight_time_ratio'] = (
        processed_data['total_teamfight_time'] / processed_data['game_time']
    ).fillna(0)

# 调整 `teamfight_time_ratio` 列的位置
if 'total_teamfight_time' in processed_data.columns and 'teamfight_time_ratio' in processed_data.columns:
    cols = processed_data.columns.tolist()
    total_time_idx = cols.index('total_teamfight_time')
    cols.insert(total_time_idx + 1, cols.pop(cols.index('teamfight_time_ratio')))
    processed_data = processed_data[cols]

# 保存最终结果
output_path = 'teamfights_statistics.csv'
processed_data.to_csv(output_path, index=False)

print(f"处理完成，结果已保存为 {output_path}")


先观察数据（忽略空值，但是暂时不能删除或者置0）

In [None]:
import math
# 加载数据
processed_data = pd.read_csv('teamfights_statistics.csv')

# 查看数据概览
print("数据概览:")
print(processed_data.info())

# 描述性统计
print("\n描述性统计:")
print(processed_data.describe())

# 绘制箱线图并分析异常值（忽略空值）
numeric_columns = processed_data.select_dtypes(include=['float64', 'int64']).columns
num_columns = len(numeric_columns)

# 动态计算行数和列数
cols = 3  # 每行显示的图表数量
rows = math.ceil(num_columns / cols)  # 根据特征数量计算行数

plt.figure(figsize=(cols * 5, rows * 5))  # 动态调整图表大小

for i, column in enumerate(numeric_columns):
    plt.subplot(rows, cols, i + 1)
    plt.boxplot(processed_data[column].dropna(), vert=False)  # 忽略空值绘制箱线图
    plt.title(column)
    plt.xlabel('Value')
    plt.ylabel('Box')

plt.tight_layout()
plt.show()

# 异常值分析
print("\n异常值分析:")
for column in processed_data.select_dtypes(include=['float64', 'int64']).columns:
    q1 = processed_data[column].quantile(0.25)
    q3 = processed_data[column].quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = processed_data[(processed_data[column] < lower_bound) | (processed_data[column] > upper_bound)]
    print(f"\n{column}列的异常值统计:")
    print(f"下边界: {lower_bound}, 上边界: {upper_bound}")
    print(f"异常值数量: {len(outliers)}（空值未计入）")


比例出现大于1的，检查一下

In [None]:
# 找出所有包含 "ratio" 的列
ratio_columns = [col for col in processed_data.columns if 'ratio' in col]

# 检查并输出每个 "ratio" 列中大于 1 的行号和对应值
for col in ratio_columns:
    greater_than_one = processed_data[processed_data[col] > 1]  # 筛选大于 1 的数据
    if not greater_than_one.empty:  # 如果存在大于 1 的数据
        print(f"\n列 '{col}' 中大于 1 的数据:")
        for index, value in zip(greater_than_one.index, greater_than_one[col]):
            print(f"行号: {index}, 值: {value}")
    else:
        print(f"\n列 '{col}' 中没有大于 1 的数据")


先观察数据（忽略空值，但是暂时不能删除或者置0）

发现有些局数的game_time为0，才出现了inf的错误，这里全部将上述异常值置1

In [None]:
for col in ratio_columns:
    processed_data[col] = processed_data[col].apply(lambda x: 1 if x > 1 else x)

# 检查替换结果
for col in ratio_columns:
    greater_than_one_count = (processed_data[col] > 1).sum()
    print(f"列 '{col}' 中剩余大于 1 的值数量: {greater_than_one_count}")

In [None]:
# 检查 teamfight_time_ratio 列是否存在，并统计值为 0 的个数
if 'teamfight_time_ratio' in processed_data.columns:
    zero_count = (processed_data['teamfight_time_ratio'] == 0).sum()
    print(f"'teamfight_time_ratio' 列中值为 0 的个数: {zero_count}")
else:
    print("'teamfight_time_ratio' 列不存在于数据中。")

下面归一化和特征化

In [None]:
# 初始化归一化和标准化器
min_max_scaler = MinMaxScaler()
standard_scaler = StandardScaler()

# 拷贝数据用于操作，避免影响原始数据
teamfight_data = processed_data.copy()

# 队伍 damage 相关数据归一化
damage_columns = [col for col in processed_data.columns if 'damage' in col]
for col in damage_columns:
    teamfight_data[col] = min_max_scaler.fit_transform(processed_data[[col]])

# gold_delta 的 min 和 avg 标准化，max 归一化
gold_delta_columns = [col for col in processed_data.columns if 'gold_delta' in col]
for col in gold_delta_columns:
    if 'min' in col or 'avg' in col:
        teamfight_data[col] = standard_scaler.fit_transform(processed_data[[col]])
    elif 'max' in col:
        teamfight_data[col] = min_max_scaler.fit_transform(processed_data[[col]])

# xp 相关数据归一化
xp_columns = [col for col in processed_data.columns if 'xp' in col]
for col in xp_columns:
    teamfight_data[col] = min_max_scaler.fit_transform(processed_data[[col]])

# 查看处理后的数据
print("\n归一化和标准化后的数据概览:")
print(teamfight_data.describe())

# 保存结果到新文件
teamfight_data.to_csv('teamfight_processed.csv', index=False)
print("\n处理后的数据已保存为 'teamfight_processed.csv'")

合并所有表格

In [None]:
# 加载五个表格
main_table_deleted = pd.read_csv('main_table_train.csv')
target_table_radiantwin = pd.read_csv('target_table_radiantwin_train.csv')
player_table_data_processed = pd.read_csv('player_table_data_processed.csv')
objective_statistics = pd.read_csv('objective_statistics_normalized.csv')
teamfight_processed = pd.read_csv('teamfight_processed.csv')

# 检查每张表的行数
print("表格行数检查:")
print(f"main_table_deleted: {main_table_deleted.shape[0]} 行")
print(f"target_table_radiantwin: {target_table_radiantwin.shape[0]} 行")
print(f"player_table_data_processed: {player_table_data_processed.shape[0]} 行")
print(f"objective_statistics: {objective_statistics.shape[0]} 行")
print(f"teamfight_processed: {teamfight_processed.shape[0]} 行")

# 确保所有表的行数一致
if not all(df.shape[0] == main_table_deleted.shape[0] for df in [
    target_table_radiantwin, player_table_data_processed, objective_statistics, teamfight_processed
]):
    raise ValueError("所有表格的行数不一致，无法直接拼接！")

# 水平合并表格（按列拼接）
merged_data = pd.concat(
    [main_table_deleted, target_table_radiantwin, player_table_data_processed, objective_statistics, teamfight_processed],
    axis=1
)

# 检查合并后的数据
print("\n合并后的数据概览:")
print(merged_data.info())

# 保存合并后的数据
merged_data.to_csv('train_data_v2.0.csv', index=False)
print("合并后的大表已保存为 'Dota_data_v1.0.csv'")
