In [1]:
import json
import pandas as pd
from pathlib import Path
from jass.game.rule_schieber import RuleSchieber
from jass.game.game_state import GameState
from jass.game.game_state_util import state_from_complete_game
from jass.game.game_util import convert_one_hot_encoded_cards_to_int_encoded_list
from jass.game.const import team, next_player

In [2]:
rule = RuleSchieber()

In [3]:
final_game_aggregation = Path("./data/games.json")

In [4]:
games = json.load(open(final_game_aggregation, "r"))

In [5]:
def get_new_dataframe() -> pd.DataFrame:
    return pd.DataFrame(columns=["trump", "declared_trump", "forehand", "player", "hand", "possible_actions", "cards_trick", "cards_player", "current_points", "total_points", "card_played"])

In [6]:
def get_played_cards_trick(s_: GameState) -> list[int]:
    result = [0] * 36
    for trick, cards in enumerate(s_.tricks, 1):
        for card in cards:
            if card == -1:
                return result
            result[card] = trick / 9
    return result

def get_played_cards_player(s_: GameState) -> list[int]:
    result = [0] * 36
    for trick, cards in enumerate(s_.tricks):
        player = s_.trick_first_player[trick]
        if player == -1:
            break
        for card in cards:
            if card == -1:
                break
            result[card] = player + 1
            player = next_player[player]
    return result

def get_last_played_card(s_: GameState) -> int:
    tricks = s_.tricks.flatten()
    return tricks[tricks != -1][-1]
    

In [7]:
def get_data_for_state(s: GameState, ls: GameState, fs: GameState) -> dict:
    return {
        "trump": ls.trump + 1,
        "player": (ls.player + 1) - 4,
        "declared_trump": (ls.declared_trump + 1) / 4,
        "forehand": ls.forehand,
        "hand": convert_one_hot_encoded_cards_to_int_encoded_list(ls.hands[ls.player]),
        "possible_actions": convert_one_hot_encoded_cards_to_int_encoded_list(rule.get_valid_actions_from_state(ls)),
        "cards_trick": get_played_cards_trick(ls),
        "cards_player": get_played_cards_player(ls),
        "current_points": ls.points[team[ls.player]] / 157,
        "total_points": fs.points[team[ls.player]] / 157,
        "card_played": get_last_played_card(s)
    }

In [8]:
df = get_new_dataframe()
for index, game in enumerate(games, 1):
    if index % 8000 == 0:
        print(f"Progress: {index}/{len(games)}")
        df.to_parquet(f"./data/parquet/data_{index // 8000}.parquet", engine="pyarrow")
        df = get_new_dataframe()
    fs = GameState.from_json(game)
    ls = None
    for i in range(36):
        s = state_from_complete_game(fs, i)
        if ls is not None:
            df.loc[len(df)] = get_data_for_state(s, ls, fs)
        ls = s
    df.loc[len(df)] = get_data_for_state(fs, ls, fs)
print(f"Progress: {len(games)}/{len(games)}")
df.to_parquet(f"./data/parquet/data_{(index // 8000) + 1}.parquet", engine="pyarrow")