# EGN 4060 - Spring 2021
# Final Project: Data Analysis over HeapCraft Epilog Plugin

Import all libraries in cell below.

In [None]:
import itertools
from typing import Dict, List, Tuple, Union
from pprint import pprint

import copy
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm

Load JSON file into a Pandas DataFrame, and convert the data types to be more optimal.

If you are using this with the new data collected you'll need to change it to read CSV instead of JSON, and rename the x, y, z, and time columns. 

In [None]:
data = pd.read_json("data/epilog_data.json", lines=True)
data = data.convert_dtypes()

In [None]:
def chunk_play_sequences(
    df: pd.DataFrame,
    play_seqn: List[Tuple],
    cols: List = ["time", "x", "y", "z"]
) -> List[pd.DataFrame]:
    ls = [df.loc[sequence, cols] for sequence in play_seqn]
    return list(filter(lambda x: x.shape[0] > 0, ls))

In [None]:
def get_player(
    df: pd.DataFrame,
    index: int,
    cols: List = ["time", "x", "y", "z"]
) -> List[pd.DataFrame]:
    player = df.loc[df["player"] == index]  # Get Player
    
    # Find [LoginEvent, QuitEvent] ranges for player
    play_sequence = get_play_sequence(player)

    
    # Drop NaN Coordinates __after__ determining `play_sequences`
    #   b/c PlayerLoginEvent and PlayerQuitEvent have NaN Coordinates
    player = player.loc[~player["x"].isna()]
    player = player.loc[:, cols].astype(int)
    
    return chunk_play_sequences(player, play_sequence, cols)

In [None]:
def get_play_sequence(df: pd.DataFrame) -> List[Tuple]:
    play = "PlayerLoginEvent"
    quit = "PlayerQuitEvent"
    
    logins = df.loc[df["event"] == play].index.tolist()
    logoff = df.loc[df["event"] == quit].index.tolist()
    
    if logins[0] > logoff[0]:
        logoff = logoff[1:]
    if logins[-1] > logoff[-1]:
        logins = logins[:-1]
    
    play_sequence = map(lambda x: slice(*x, 1), zip(logins, logoff))
    
    return list(play_sequence)

In [None]:
def get_playtime(players: Dict) -> pd.DataFrame:
    playtime = []
    
    for player, instances in players.items():
        for idx, instance in enumerate(instances):
            walltime = pd.to_datetime(instance["time"], unit="ms")
            playtime.append({
                "player": player,
                "n_events": len(instance),
                "walltime": (walltime.max() - walltime.min()),
            })
        
    df = pd.DataFrame.from_dict(playtime)
    df = df.dropna(subset=["walltime"])
    return df

In [None]:
player_ls = data["player"].unique().dropna()

players = {player: 
           get_player(data, player) for player in player_ls}

Get the total amount that each player played overall, then analyze results.

In [None]:
playtime_df = get_playtime(players)

In [None]:
playtime_df.groupby("player")["walltime"].describe()

After analyzing the data, remove most inactive players from the DataFrame by dropping those who only logged into once, therefore having a non existent standard deviation playtime.

In [None]:
active_players = playtime_df.groupby("player")["walltime"].describe().dropna(subset=["std"])
active_players

In [None]:
def find_overlapping_play(ps: Dict[int, List[pd.DataFrame]]) -> pd.DataFrame:
    overlaps = []
    
    product = list(itertools.product(ps.keys(), ps.keys()))
    product = list(filter(lambda t: t[0] < t[1], product))
    for p1, p2 in tqdm(product, total=len(product), desc="Overlapping Play"):
        if p1 >= p2:
            continue

        p1_playtime = [p["time"] for p in ps[p1]]
        p2_playtime = [p["time"] for p in ps[p2]]
        
        loop = list(itertools.product(enumerate(p1_playtime), enumerate(p2_playtime)))
        # loop = tqdm(loop, total=len(loop), desc=f"{p1} & {p2}")
        for (p1_idx, p1_time), (p2_idx, p2_time) in loop:
            together = p1_time.isin(p2_time)
            if together.sum() > 0:
                overlaps += [{"p1": p1, "p2": p2, "p1_idx": p1_idx, "p2_idx": p2_idx}]
        
        # TODO find overlap in play times
        # Ideally, you know who intersects and when ~ (player1, player2) @ (time12, time4)
    
    return pd.DataFrame.from_dict(overlaps)

In [None]:
overlapping_play = find_overlapping_play(players)

In [None]:
display(overlapping_play)

In [None]:
dfna.loc[dfna["event"] == "SheepDyeWoolEvent"]

In [None]:
dfn["player"].unique()

In [None]:
pd.set_option("display.precision", 20)

In [None]:
# display(overlapping_play[0:11])

In [None]:
def make_scatter3d(
    dfs: Dict[str, pd.DataFrame],
    x: str = "x",
    y: str = "y",
    z: str = "z",
    color: str = "time"
) -> go.Figure:
    assert type(dfs) == dict
    
    data = []
    for key, df in dfs.items():
        scatter = go.Scatter3d(
            x=df[x], y=df[y], z=df[z],
            marker={
                "size": 3,
                "color": df[color], 
                "colorscale": "Rainbow",
                "opacity": 0.8
            },
            line=dict(
                color='darkblue',
                width=2
            ),
            text = [key for _ in range(len(df))]
        )
        data.append(scatter)
    fig = go.Figure(data=data)
    fig.update_layout(width=1600, height=1200)
#     fig.show()
    return fig

In [None]:
# CREATES FIGURE 2

# scatterplayers1 = {
#     "p1-l1": players[1][6],
# }
# fig = make_scatter3d(scatterplayers1, "x", "y", "z", "time")
# fig.update_layout(title="Figure 2: Player 1, instance 6")
# fig.write_html("figure2.html")

In [None]:
# CREATES FIGURE 3

# scatterplayers1 = {
#     "p1-l2": players[1][1],
#     "p1-l3": players[1][2],
#     "p1-l4": players[1][3],
#     "p1-l5": players[1][4],
#     "p1-l23": players[1][22],
#     "p1-l24": players[1][23],
#     "p1-l35": players[1][34],
#     "p1-l36": players[1][35],
# }
# fig = make_scatter3d(scatterplayers1, "x", "y", "z", "time")
# fig.update_layout(title="Figure 3: All instances Player 1 overlapped with Player 3")
# fig.write_html("figure3.html")

In [None]:
# CREATES FIGURE 4

# scatterplayers3 = {
#     "p3-l3": players[3][2],
#     "p3-l4": players[3][3],
#     "p3-l10": players[3][9],
#     "p3-l11": players[3][10],
#     "p3-l13": players[3][12],
#     "p3-l29": players[3][28],
# }
# fig = make_scatter3d(scatterplayers3, "x", "y", "z", "time")
# fig.update_layout(title="Figure 4: All instances Player 3 overlapped with Player 1")
# fig.write_html("figure4.html")

In [None]:
# CREATES FIGURE 5

# scatterplayers1_3 = {
#     "p1-l2": players[1][1],
#     "p1-l3": players[1][2],
#     "p1-l4": players[1][3],
#     "p1-l5": players[1][4],
#     "p1-l23": players[1][22],
#     "p1-l24": players[1][23],
#     "p1-l35": players[1][34],
#     "p1-l36": players[1][35],
#     "p3-l3": players[3][2],
#     "p3-l4": players[3][3],
#     "p3-l10": players[3][9],
#     "p3-l11": players[3][10],
#     "p3-l13": players[3][12],
#     "p3-l29": players[3][28]
# }
# fig = make_scatter3d(scatterplayers1_3, "x", "y", "z", "time")
# fig.update_layout(title="Figure 5: All instances with both Players 1 and 3, together")
# fig.write_html("figure5.html")