In [1]:
import os
import copy
import ast
import time
import json
import random
import glob
import ast
import numpy as np
from functools import partial
from collections import Counter
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
#from torchsummary import summary

In [2]:
game_folders = ['MO/', 'PLAY/', 'LIU/']
folder_name = '../data/output2017/'
all_game_files = []

for game_folder in game_folders:
    for file_name in os.listdir(folder_name + game_folder):
        all_game_files.append(folder_name + game_folder + file_name)

## Updated Approach

In [12]:
def detect_wind(round_info, players_init_hands):
    '''
    Returns:
    - round_wind (str)
    - player_winds (dict) 
    '''
    char2wind = {'东':'F1', '南':'F2', '西':'F3', '北':'F4'}
    round_wind = char2wind[round_info[0]]
    dealer = max(players_init_hands, key=lambda x: len(players_init_hands[x]))
    player_winds = [0, 1, 2, 3]
    player_winds = player_winds[dealer:] + player_winds[:dealer]
    player_winds = {player: 'F'+str(i+1) for i, player in enumerate(player_winds)}
    return player_winds, round_wind


def extract_target_data(file, history_len=4):
    '''
    Args:
    - history_len (int): max length of history of a single player to use

    Returns:
    - target_data (list): all chow-able data of all players
    '''

    def custom_eval(x):
        try:
            return eval(x)
        except (NameError, SyntaxError):
            return x

    with open(file, 'r') as f:
        lines = f.readlines()

    target_data = []
    target_data_write = []

    # Record of all discarded tiles in sequential order
    players_discard_tiles = {}
    
    # Record of all stealed tiles in sequential order
    players_open_meld = {}

    # The histories for respective players
    players_history = {}

    # The tiles they respectively have seen
    players_seen = {}

    # Latest hand tiles for respective players
    players_latest_hands = {}

    for line in lines[2:6]:
        player_num, hands, _ = list(map(custom_eval, line.split('\t')))
        players_latest_hands[player_num] = hands
        players_seen[player_num] = hands
        players_history[player_num] = []
        players_discard_tiles[player_num] = []
        players_open_meld[player_num] = []

    round_info = lines[1].split('\t')

    player_winds, round_wind = detect_wind(round_info, players_latest_hands)

    # Play records
    plays = lines[6:]
    prev_turn_info = [-999, -999, -999, -999]  # dummy for first turn
    
    for turn_i, line in enumerate(plays):
        turn_info = line.split('\t') # e.g. [3, '打牌', ['F2'], '\n']
        turn_info[0] = int(turn_info[0])
        turn_info[2] =ast.literal_eval(turn_info[2])
        
        turn_player = turn_info[0]
        action = turn_info[1]
        turn_data = {'turn_id': turn_i, 'turn_player': turn_player}

        if action == '补花':
            players_latest_hands[turn_player].remove(turn_info[2][0])

        if action == '和牌':
            continue

        if action == '补花后摸牌' or action == '杠后摸牌': # ['3', '杠后摸牌', ['W1'], '\n']
            players_latest_hands[turn_player].append(turn_info[2][0])

        if action == '打牌':
            hand_list = players_latest_hands[turn_player]
            turn_data['hand'] = [tile for tile in hand_list if not tile.startswith('H')]
            turn_data['last_discard'] = {player: discard_tiles[-1] if len(discard_tiles) > 0 else '' for player, discard_tiles in players_discard_tiles.items()}
            turn_data['discard'] = players_discard_tiles  # children are lists (mutable), deep copy is needed
            turn_data['label'] = turn_info[2][0]
            turn_data['open_meld'] = players_open_meld
            
            turn_target_data = turn_data  # data are dynamic (keep changing in loop), deepcopy for safe
            turn_target_data['round_wind'] = round_wind
            turn_target_data['own_wind'] = player_winds[turn_player]
            
            #target_data.append([turn_target_data] + copy.deepcopy(players_history[turn_player][::-1]))  # data are dynamic (keep changing in loop), deepcopy for safe
            write_line = json.dumps([turn_target_data] + players_history[turn_player][::-1])
            target_data_write.append(write_line)
            
            discard = turn_info[2][0]
            players_discard_tiles[turn_player].append(discard)
            players_latest_hands[turn_player].remove(discard)

        if action == '摸牌' or action == '碰' or action == '明杠' or action == '暗杠':
            # Update player's hand tiles
            if action == '摸牌':  # ['3', '摸牌', ['W1'], '\n']
                players_latest_hands[turn_player].append(turn_info[2][0])
            elif action == '碰':  # ['3', '碰', ['W1','W1','W1'], 'W2', '2\n']
                for tile in turn_info[2][:-1]:
                    players_latest_hands[turn_player].remove(tile)
                players_open_meld[turn_player] += turn_info[2]
                
            elif action == '明杠':  # ['3', '明杠', ['F2','F2','F2','F2'], 'F2', '2\n']
                for tile in turn_info[2][:-1]:
                    players_latest_hands[turn_player].remove(tile)
                players_open_meld[turn_player] += turn_info[2]
                
            elif action == '暗杠':
                for tile in turn_info[2][:-1]:
                    players_latest_hands[turn_player].remove(tile)

        if action == '吃':  # ['3', '吃', ['W1','W2','W3'], 'W2', '2\n']
            for tile in turn_info[2]:
                if tile != turn_info[3]:  # exclude steal tile
                    players_latest_hands[turn_player].remove(tile)
            players_open_meld[turn_player] += turn_info[2]

        # history only includes states in '摸牌'/'碰'/'杠'/'吃' situations, states = [own hand(1 dim) + discard(4 dim) + steal(1 dim)]
        if 'last_discard' in turn_data:
            players_history[turn_player].append(turn_data)
            
        if len(players_history[turn_player]) > history_len:  # keep the 4 most recent histories only
            players_history[turn_player].pop(0)  # remove the oldest
            
        prev_turn_info = turn_info
    
    return target_data_write

In [146]:
data_vol = 0
for file in tqdm(all_game_files):
    discard_tile_data = extract_target_data(file, history_len=4)
    with open('processed_data_jason/discard_tile_data.nosync.txt', 'a') as f:
        for line in discard_tile_data:
            f.write(line+'\n')
            data_vol += 1

print(f'Total number of data: {data_vol}')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=530458.0), HTML(value='')))


Total number of data: 26930469


In [147]:
print(f'Total number of data: {data_vol}')

Total number of data: 26930469
