In [1]:
import os
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
log_dir = './logs/'
files = [os.path.join(log_dir, v) for v in os.listdir('./logs/') if v.startswith('log')]

In [8]:
def parse_game_log(file_path):
    with open(file_path) as f:
        content = f.readlines()

    # Game Started
    lines_game_started = list(filter(lambda x: x.startswith('Game Started'), content))
    assert len(lines_game_started) > 0, 'no game started line found'
    assert len(lines_game_started) == 1, 'multiple game started lines found'
    _, start_datetime, python_version = lines_game_started[0].split(' - ')
    start_datetime = pd.to_datetime(start_datetime)
    content.remove(lines_game_started[0])

    # Environment
    lines_environment = list(filter(lambda x: x.startswith('[') and x.endswith('\n'), content))
    assert len(lines_environment) > 0, 'no environment line found'
    assert len(lines_environment) == 1, 'multiple environment lines found'
    environment = lines_environment[0].strip('[\n')
    content.remove(lines_environment[0])

    # Gameplay
    df = pd.Series(content).str.split(' - ', 1, expand=True)
    df.columns = ['datetime', 'message']
    df['message'] = df['message'].fillna(df['datetime'])
    df['message_count'] = df['datetime'].str.match('20\d{2}').cumsum()
    df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
    df['datetime'] = df['datetime'].fillna(method='ffill')
    df = df.groupby('message_count').agg({
        'message': 'sum', 
        'datetime': 'first', 
    })
    df['message'] = df['message'].apply(lambda x: x.replace('\n', '').lower())
    df['is_board_state'] = df['message'].str.match('board state - ')
    df['board_count'] = (~df['is_board_state']).cumsum()
    df = df.groupby('board_count').agg({
        'datetime': [['datetime', 'min']], 
        'message': [['message', lambda x: [v for v in x if not v.startswith('board state')][0]], 
                    ['board_state', lambda x: [v for v in x if v.startswith('board state')][0].split(' - ')[1].strip('[]').split(', ')]], 
    })
    df.columns = df.columns.droplevel(0)
    df[['player_turn', 'dice']] = df['message'].str.extract('next turn - (\w+) to play : (.+)')
    df['turn'] = (~df['player_turn'].isnull()).cumsum()
    df['player_turn'] = df['player_turn'].fillna(method='ffill')
    df['dice'] = df['dice'].fillna(method='ffill')
    df['move'] = df['message'].str.extract('player [a-z]+ made the move \(([\d, ]+)\)$')
    df['move'] = df['move'].apply(lambda x: None if pd.isna(x) else x.split(', '))
    winner_values = df['message'].str.extract('game won by ([a-z]+) player').dropna().values
    assert len(winner_values) <= 1, 'there are more than one winner lines'
    winner = winner_values[0][0] if len(winner_values) > 0 else 'Unknown'
    df = df.set_index('datetime')
    
    # Arrange and publish
    result = {'game_start': start_datetime, 'environment': environment, 'python_version': python_version, 'winner': winner, 'game_df': df}
    return result

In [9]:
# df.iloc[0].values
# pd.read_csv(files[0], delimiter=' - ')
games_log_data_list = list()
for file in tqdm(files):
    games_log_data_list.append(parse_game_log(file))
games_log_data_df = pd.DataFrame(games_log_data_list)

HBox(children=(FloatProgress(value=0.0, max=85.0), HTML(value='')))




In [10]:
print(games_log_data_df.shape[0], 'games played from', 
      games_log_data_df['game_start'].dt.date.min(), 'to', games_log_data_df['game_start'].dt.date.max(), '\b.')
print('Winning stats: '), 
display(pd.DataFrame(games_log_data_df['winner'].value_counts(normalize=True)*100))


85 games played from 2020-09-10 to 2020-09-12.
Winning stats: 


Unnamed: 0,winner
black,62.352941
red,29.411765
Unknown,8.235294


In [19]:
games_log_data_df.sort_values('game_start').tail(5)

Unnamed: 0,game_start,environment,python_version,winner,game_df
5,2020-09-12 21:51:52.951782,Clang 4.0.1 (tags/RELEASE_401/final)] on Amits...,"3.7.6 (default, Jan 8 2020, 13:42:34) \n",black,...
23,2020-09-12 21:56:31.474012,Clang 4.0.1 (tags/RELEASE_401/final)] on Amits...,"3.7.6 (default, Jan 8 2020, 13:42:34) \n",black,...
57,2020-09-12 21:59:05.390822,Clang 4.0.1 (tags/RELEASE_401/final)] on Amits...,"3.7.6 (default, Jan 8 2020, 13:42:34) \n",black,...
24,2020-09-12 22:05:25.011168,Clang 4.0.1 (tags/RELEASE_401/final)] on Amits...,"3.7.6 (default, Jan 8 2020, 13:42:34) \n",red,...
41,2020-09-12 22:08:55.064548,Clang 4.0.1 (tags/RELEASE_401/final)] on Amits...,"3.7.6 (default, Jan 8 2020, 13:42:34) \n",black,...
