In [None]:
import os                  # 一些操作系统提供的 API
from tqdm import notebook  # 为循环或其他迭代操作添加进度条
import numpy as np
import pandas as pd
import ujson as json       # 用于读入 .json 文件

In [None]:
# 宏定义
PATH_TO_RAW_DATA = '../data'
PATH_TO_PROCESSED_DATA = './data'

In [None]:
def read_matches(matches_file):
    """ 生成器函数，用于读取比赛数据 """

    MATCHES_COUNT = {
        'test_matches.jsonl': 10000,
        'train_matches.jsonl': 39675
    }

    _, filename = os.path.split(matches_file)
    total_matches = MATCHES_COUNT.get(filename)

    with open(matches_file) as fin:
        for line in notebook.tqdm(fin, total=total_matches):
            yield json.loads(line)

In [None]:
for match in read_matches(os.path.join(PATH_TO_RAW_DATA, 'train_matches.jsonl')):
    # 获取 match 的 key 名称
    print(match.keys())
    break

In [None]:
# 查看原始数据中，objectives 的 type 有哪些
obj_type = set()

for match in read_matches(os.path.join(PATH_TO_RAW_DATA, 'train_matches.jsonl')):
    objectives = match['objectives']
    for obj in objectives:
        print(obj)
        obj_type.add(obj['type'])

print(obj_type)

In [None]:
lack_type = [set(), set(), set()]
count_barracks_kill = 0
count_lack_type2 = 0
for match in read_matches('../data/train_matches.jsonl'):
    for objective in match['objectives']:
        if 'team' not in objective:
            lack_type[0].add(objective['type'])
        if 'team' not in objective and 'player_slot' not in objective:
            lack_type[1].add(objective['type'])
        if 'team' not in objective and 'player_slot' not in objective and 'slot' not in objective:
            count_lack_type2 += 1
            lack_type[2].add(objective['type'])
        if objective['type'] == 'CHAT_MESSAGE_BARRACKS_KILL':
            count_barracks_kill += 1

print(f'Barracks kill count: {count_barracks_kill}')
print(f'Lack type 2 count: {count_lack_type2}')

print(f'{lack_type[0]} has no team')
print(f'{lack_type[1]} has no team and player_slot')
print(f'{lack_type[2]} has no team, player_slot and slot')

In [None]:
team_type = set()
for match in read_matches('../data/train_matches.jsonl'):
    for objective in match['objectives']:
        if 'team' in objective:
            if objective['team'] == 100 or objective['team'] == 60:
                if 'player_slot' not in objective:
                    print("Error occurs!")
            if objective['team'] == 2 and ('player_slot' in objective and objective['player_slot'] >= 128 and objective['player_slot'] <= 132):
                print("Error occurs!")
            if objective['team'] == 3 and ('player_slot' in objective and objective['player_slot'] >= 0 and objective['player_slot'] <= 4):
                print("Error occurs!")
            team_type.add(objective['team'])

print(team_type)

In [None]:
ping_key = set()
for match in read_matches('../data/train_matches.jsonl'):
    players = match['players']
    for player in players:
        pings = player['pings']
        print(pings)

In [None]:
assists_num = 0
for match in read_matches('../data/train_matches.jsonl'):
    players = match['players']
    for player in players:
        if 'assists' in player:
            assists_num += 1

print(assists_num)

In [None]:
from utils.extractdata import extract_main

df_main_table = extract_main('../data/train_matches.jsonl')
df_main_table.to_csv(os.path.join(PATH_TO_PROCESSED_DATA, 'main_table.csv'), index=False)

In [None]:
from utils.extractdata import extract_objectives

df_objectives = extract_objectives('../data/train_matches.jsonl')
df_objectives.to_csv(os.path.join(PATH_TO_PROCESSED_DATA, 'objectives.csv'), index=False)

!sed -i 's/\.0//g' ./data/objectives.csv

In [None]:
from utils.extractdata import extract_targets

df_targets = extract_targets('../data/train_matches.jsonl')
df_targets.to_csv(os.path.join(PATH_TO_PROCESSED_DATA, 'targets.csv'), index=False)

In [None]:
from utils.extractdata import extract_teamfights

df_teamfights = extract_teamfights('../data/train_matches.jsonl')
df_teamfights.to_csv(os.path.join(PATH_TO_PROCESSED_DATA, 'teamfights.csv'), index=False)

!sed -i 's/\.0,/,/g' ./data/teamfights.csv

In [None]:
from utils.extractdata import extract_players

df_players = extract_players('../data/train_matches.jsonl')
df_players.to_csv(os.path.join(PATH_TO_PROCESSED_DATA, 'players.csv'), index=False)

In [11]:
!sed -i 's/\.0,/,/g' ./data/players.csv