In [2]:
import itertools
from typing import Dict, List, Tuple, Union
from pprint import pprint

import copy
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm

In [13]:
data = pd.read_json("../../data/epilog_data.json", lines=True)
data = data.convert_dtypes()

In [62]:
data = pd.read_csv("https://raw.githubusercontent.com/austinconnor/MinecraftCollab/Data-Plugin/worldData_move.csv")
data = data.convert_dtypes()

In [63]:
data

Unnamed: 0,_id,event,location.x,location.y,location.z,player,worldName,worldTime
0,61671d6b0bec634729e689ba,PlayerMoveEvent,-83,67,2,1d12324d5be14676989de8ac05a3ea9c,world,328605
1,61671d6b0bec634729e689bd,PlayerMoveEvent,-85,67,3,1d12324d5be14676989de8ac05a3ea9c,world,328616
2,61671d6b0bec634729e689c1,PlayerMoveEvent,-88,67,4,1d12324d5be14676989de8ac05a3ea9c,world,328629
3,61671d6b0bec634729e689c4,PlayerMoveEvent,-90,67,5,1d12324d5be14676989de8ac05a3ea9c,world,328639
4,61671d6b0bec634729e689b8,PlayerMoveEvent,-81,67,2,1d12324d5be14676989de8ac05a3ea9c,world,328597
...,...,...,...,...,...,...,...,...
249995,6168f514deff395077010ed2,PlayerMoveEvent,-129,74,-94,6e973a24dca653c6017e821e2702894d,world,2750005
249996,6168f514deff395077010ed7,PlayerMoveEvent,1207,72,35,770b6ee2bf3fca8e3d3c5ccfe93f835a,world,2750007
249997,6168f514deff395077010eda,PlayerMoveEvent,1207,71,35,770b6ee2bf3fca8e3d3c5ccfe93f835a,world,2750009
249998,6168f514deff395077010edf,PlayerMoveEvent,22,64,11,134bce0ff5a216d08ff6a074da019a42,world,2750011


In [None]:
data[["player", "x", "y", "z"]].dropna().to_json("../../data/small_epilog.json")

In [None]:
make 3 columns per player, rename columns to have x_playername

In [85]:
new_data = data.loc[:,["player", "location.x", "location.y", "location.z"]]

In [88]:
list_data = list(new_data["player"].unique())

In [89]:
def list_to_dic(lst):
    index = [i for i in range(len(lst))]
    zipped = list(zip(lst,index))
    ret = {zipped[i][0]: zipped[i][1] for i in range(len(zipped))}
    return ret

In [90]:
data_mapping = list_to_dic(list_data)

In [91]:
new_data['player'] = new_data['player'].map(data_mapping)

In [92]:
new_data

Unnamed: 0,player,location.x,location.y,location.z
0,0,-83,67,2
1,0,-85,67,3
2,0,-88,67,4
3,0,-90,67,5
4,0,-81,67,2
...,...,...,...,...
249995,17,-129,74,-94
249996,13,1207,72,35
249997,13,1207,71,35
249998,12,22,64,11


In [93]:
new_list_data = list(new_data["player"].unique())

In [None]:
for i in new_list_data

In [14]:
def chunk_play_sequences(
    df: pd.DataFrame,
    play_seqn: List[Tuple],
    cols: List = ["time", "x", "y", "z"]
) -> List[pd.DataFrame]:
    ls = [df.loc[sequence, cols] for sequence in play_seqn]
    return list(filter(lambda x: x.shape[0] > 0, ls))

In [15]:
def get_player(
    df: pd.DataFrame,
    index: int,
    cols: List = ["time", "x", "y", "z"]
) -> List[pd.DataFrame]:
    player = df.loc[df["player"] == index]  # Get Player
    
    # Find [LoginEvent, QuitEvent] ranges for player
    play_sequence = get_play_sequence(player)

    
    # Drop NaN Coordinates __after__ determining `play_sequences`
    #   b/c PlayerLoginEvent and PlayerQuitEvent have NaN Coordinates
    player = player.loc[~player["x"].isna()]
    player = player.loc[:, cols].astype(int)
    
    return chunk_play_sequences(player, play_sequence, cols)

In [16]:
def get_play_sequence(df: pd.DataFrame) -> List[Tuple]:
    play = "PlayerLoginEvent"
    quit = "PlayerQuitEvent"
    
    logins = df.loc[df["event"] == play].index.tolist()
    logoff = df.loc[df["event"] == quit].index.tolist()
    
    if logins[0] > logoff[0]:
        logoff = logoff[1:]
    if logins[-1] > logoff[-1]:
        logins = logins[:-1]
    
    play_sequence = map(lambda x: slice(*x, 1), zip(logins, logoff))
    
    return list(play_sequence)

In [17]:
player_ls = data["player"].unique().dropna()

players = {player: 
           get_player(data, player) for player in player_ls}

In [23]:
players

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [24]:
def find_overlapping_play(ps: Dict[int, List[pd.DataFrame]]) -> pd.DataFrame:
    overlaps = []
    
    product = list(itertools.product(ps.keys(), ps.keys()))
    product = list(filter(lambda t: t[0] < t[1], product))
    for p1, p2 in tqdm(product, total=len(product), desc="Overlapping Play"):
        if p1 >= p2:
            continue

        p1_playtime = [p["time"] for p in ps[p1]]
        p2_playtime = [p["time"] for p in ps[p2]]
        
        loop = list(itertools.product(enumerate(p1_playtime), enumerate(p2_playtime)))
        # loop = tqdm(loop, total=len(loop), desc=f"{p1} & {p2}")
        for (p1_idx, p1_time), (p2_idx, p2_time) in loop:
            together = p1_time.isin(p2_time)
            if together.sum() > 0:
                overlaps += [{"p1": p1, "p2": p2, "p1_idx": p1_idx, "p2_idx": p2_idx}]
        
        # TODO find overlap in play times
        # Ideally, you know who intersects and when ~ (player1, player2) @ (time12, time4)
    
    return pd.DataFrame.from_dict(overlaps)

In [25]:
overlapping_play = find_overlapping_play(players)

Overlapping Play: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 946/946 [02:56<00:00,  5.36it/s]


In [26]:
display(overlapping_play)

Unnamed: 0,p1,p2,p1_idx,p2_idx
0,1,3,1,2
1,1,3,2,2
2,1,3,3,3
3,1,3,4,9
4,1,3,4,10
...,...,...,...,...
1244,2050,2127,1,3
1245,3793,3818,1,1
1246,3793,3818,2,1
1247,3818,3942,1,0


In [27]:
display(overlapping_play[0:11])

Unnamed: 0,p1,p2,p1_idx,p2_idx
0,1,3,1,2
1,1,3,2,2
2,1,3,3,3
3,1,3,4,9
4,1,3,4,10
5,1,3,22,12
6,1,3,22,13
7,1,3,23,12
8,1,3,23,13
9,1,3,34,28
