In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import fnmatch
from IPython.display import Markdown as md
import plotly.graph_objects as go
import plotly.express as px
import math
import random
from dask import dataframe as dd
import pickle

In [2]:
pd.options.display.max_columns = 200
pd.options.display.max_rows = 200
pd.set_option("display.max_colwidth", None)

In [3]:
DATAPATH = Path('../../data/')
DOCSPATH = Path('../../docs/data/')
PLOTSPATH3D = Path('../../docs/plots/3d/')
PLOTSPATH2D = Path('../../docs/plots/2d/')

In [4]:
# Using Dask's DataFrames to glob all the event CSVs in `datapath`

ddf = dd.read_csv(f"{DATAPATH}/overworld/*.csv")["player"].unique().compute()
# Swapping the index/data values so that when `dict(...)` is run we get entries like:
#   "<hash>": <index>,
ddf = pd.DataFrame({"player": ddf.values, "pindex": ddf.index.values})

playerindex = {player: pindex for (player, pindex) in zip(ddf["player"], ddf["pindex"])}

# NOTE then you can do something like...
#     player2index = dict(pd.read_csv(datapath / "playerindex.csv"))
#   and you'll have a dictionary that does the correct remapping

get_colors = lambda n: list(map(lambda i: "#" + "%06x" % random.randint(0, 0xFFFFFF),range(n)))
colors = get_colors(ddf.player.size)

In [5]:
def get_every(df, n):
    return df.loc[df.index % n == 0]

In [6]:
def stringify_attrs(row, cols: dict) -> str:
    return "\n".join([f"{title}: {row[col]}" for col, title in cols.items()])

def make_metadata(group, event_name):
    if event_name in ["PlayerChangedWorldEvent", "PlayerDeathEvent", "PlayerRespawnEvent", "PlayerEggThrowEvent", "PlayerQuitEvent", "PlayerJoinEvent", "PlayerShearEntityEvent"]:
        return group["day"].apply(lambda day: f"Day {day}")
    elif event_name in ["BlockPlaceEvent", "BlockBreakEvent"]:
        cols = {"day": "Day", "block": "Block"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()
    elif event_name in ["PlayerBucketFillEvent", "PlayerBucketEmptyEvent"]:
        cols = {"day": "Day", "contents": "Bucket Contents"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()
    elif event_name in ["PlayerLeashEntityEvent", "PlayerUnleashEntityEvent"]:
        cols = {"day": "Day", "entity": "Entity"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()
    elif event_name  == "EntityDamageByEntityEvent":
        cols = {"day": "Day", "damaged": "Damaged", "cause": "Cause", "weapon": "Weapon Used"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()
    elif event_name == "PlayerDropItemEvent":
        cols = {"day": "Day", "itemDrop": "Item Dropped"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()
        # day and itemDrop
    elif event_name == "PlayerInteractEvent":
        cols = {"day": "Day", "itemHeld": "Item Held", "action": "Action"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()
    elif event_name == "ProjectileHitEvent":
        cols = {"day": "Day", "weapon": "Weapon Used", "target": "Target"}
        return group[cols].apply(lambda row: stringify_attrs(row, cols), axis=1).values.tolist()

In [7]:
# %% Convert a Player into a Scatter3D trace if dim == True, else make 2D plot
def player2trace(player, group, event_name, dim):
    if dim == True:
        scatter = go.Scatter3d(
            mode="markers",
            x=group["x"], y=group["z"], z=group["y"],
            marker={"size": 3, "color": colors[player], "opacity": 0.8},
            name=f"Player {player}",
            text=make_metadata(group, event_name),
        )
    else:
        scatter = go.Scatter(
            mode="markers",
            x=group["x"], y=group["z"],
            marker={"size": 3, "color": colors[player], "opacity": 0.8},
            name=f"Player {player}",
            text=make_metadata(group, event_name),
        )
    return scatter

In [8]:
# %% Convert a DataFrame into a Plot
def df2plot(df, name, dim):
    traces = []

    # For every player in the dataframe, convert them into a plotly trace
    for (player, group) in df.groupby("pindex"):
        traces += [player2trace(player, group, name, dim)]
    
    fig = go.Figure(data=traces)
    fig.update_layout({
        "title": {"text": name},
        "width": 600, "height": 600,
    })
    return fig

In [9]:
# %% Preprocess the DataFrame
def preprocess_df(file):
    df = pd.read_csv(file)
    # Use the Player Index created by Dask (above) to do the mappings
    # player2index = dict(pd.read_csv(DATAPATH / "playerindex.csv"))
    df["pindex"] = df["player"].replace(playerindex)

    df = df.rename(columns={
        "location.x": "x", "location.y": "y", "location.z": "z",
        "worldTime": "time",
    })

    df["day"] = np.floor((df["time"] / 1728000)).astype(int)

    return df

In [10]:
# %% Chunk events by Players
def chunk_by_player(event_path, df, name):
    for (player, group) in df.groupby("pindex"):
        fname = event_path / f"{player}_{name}.csv"
        
        # Comment line below if you don't want to save files to CSV
        group.reset_index().to_csv(fname, index=False)
    return df

In [13]:
# %% Preprocess & Plot the Data
def preprocess_and_plot_data(world, dim):
    # NOTE this works iff you're only looking for CSVs.
    for file in (DATAPATH / world).glob("*.csv"):
        name = (os.path.splitext(file.name)[0])
        display(md(f"## {name}"))
        df = preprocess_df(file)

        event_path = DOCSPATH / world / file.stem
        event_path.mkdir(parents=True, exist_ok=True)
        df = chunk_by_player(event_path, df, name)

        if dim == True:
            if file.stem == "PlayerMoveEvent":
                df = get_every(df, 30)
            elif file.stem == "EntityDamageByEntityEvent":
                df = get_every(df, 40)
        
            fname = str(PLOTSPATH3D / world / name) + ".html"

        else:
            if file.stem == "PlayerMoveEvent" or file.stem == "EntityDamageByEntityEvent":
                df = get_every(df, 60)
        
            fname = str(PLOTSPATH2D / world / name) + ".html"

        fig = df2plot(df, name, dim)
            
        # Uncomment line below to save plot as HTML
        # fig.write_html(fname, full_html=False, include_plotlyjs="cdn")
        
        # Uncomment line below to display plot
        # fig.show()

# 3D Plots

For the 3D plots:
`preprocess_and_plot_data(world_name, True)`

## Overworld

In [14]:
preprocess_and_plot_data("overworld", True)

## ProjectileHitEvent

## PlayerQuitEvent

## PlayerBedLeaveEvent

## PlayerBedEnterEvent

## PlayerBucketEmptyEvent

## PlayerDeathEvent

## PlayerMoveEvent

## PlayerBucketFillEvent

## PlayerDropItemEvent

## PlayerInteractEvent

## PlayerLeashEntityEvent

## PlayerChangedWorldEvent

## PlayerRespawnEvent

## EntityDamageByEntityEvent

## PlayerEggThrowEvent

## PlayerUnleashEntityEvent

## BlockPlaceEvent

## BlockBreakEvent

## PlayerJoinEvent

## PlayerShearEntityEvent

## EnchantItemEvent

## Nether

In [15]:
preprocess_and_plot_data("nether", True)

## ProjectileHitEvent

## PlayerQuitEvent

## PlayerBedLeaveEvent

## PlayerBedEnterEvent

## PlayerBucketEmptyEvent

## PlayerDeathEvent

## PlayerMoveEvent

## PlayerBucketFillEvent

## PlayerDropItemEvent

## PlayerInteractEvent

## PlayerLeashEntityEvent

## PlayerChangedWorldEvent

## PlayerRespawnEvent

## EntityDamageByEntityEvent

## PlayerEggThrowEvent

## PlayerUnleashEntityEvent

## BlockPlaceEvent

## BlockBreakEvent

## PlayerJoinEvent

## PlayerShearEntityEvent

## EnchantItemEvent

## End

In [16]:
preprocess_and_plot_data("end", True)

## ProjectileHitEvent

## PlayerQuitEvent

## PlayerBedLeaveEvent

## PlayerBedEnterEvent

## PlayerBucketEmptyEvent

## PlayerDeathEvent

## PlayerMoveEvent

## PlayerBucketFillEvent

## PlayerDropItemEvent

## PlayerInteractEvent

## PlayerLeashEntityEvent

## PlayerChangedWorldEvent

## PlayerRespawnEvent

## EntityDamageByEntityEvent

## PlayerEggThrowEvent

## PlayerUnleashEntityEvent

## BlockPlaceEvent

## BlockBreakEvent

## PlayerJoinEvent

## PlayerShearEntityEvent

## EnchantItemEvent

# 2D Plots

For the 2D plots:
`preprocess_and_plot_data(world_name, False)`

## Overworld

In [17]:
preprocess_and_plot_data("overworld", False)

## ProjectileHitEvent

## PlayerQuitEvent

## PlayerBedLeaveEvent

## PlayerBedEnterEvent

## PlayerBucketEmptyEvent

## PlayerDeathEvent

## PlayerMoveEvent

## PlayerBucketFillEvent

## PlayerDropItemEvent

## PlayerInteractEvent

## PlayerLeashEntityEvent

## PlayerChangedWorldEvent

## PlayerRespawnEvent

## EntityDamageByEntityEvent

## PlayerEggThrowEvent

## PlayerUnleashEntityEvent

## BlockPlaceEvent

## BlockBreakEvent

## PlayerJoinEvent

## PlayerShearEntityEvent

## EnchantItemEvent

## Nether

In [18]:
preprocess_and_plot_data("nether", False)

## ProjectileHitEvent

## PlayerQuitEvent

## PlayerBedLeaveEvent

## PlayerBedEnterEvent

## PlayerBucketEmptyEvent

## PlayerDeathEvent

## PlayerMoveEvent

## PlayerBucketFillEvent

## PlayerDropItemEvent

## PlayerInteractEvent

## PlayerLeashEntityEvent

## PlayerChangedWorldEvent

## PlayerRespawnEvent

## EntityDamageByEntityEvent

## PlayerEggThrowEvent

## PlayerUnleashEntityEvent

## BlockPlaceEvent

## BlockBreakEvent

## PlayerJoinEvent

## PlayerShearEntityEvent

## EnchantItemEvent

## End

In [19]:
preprocess_and_plot_data("end", False)

## ProjectileHitEvent

## PlayerQuitEvent

## PlayerBedLeaveEvent

## PlayerBedEnterEvent

## PlayerBucketEmptyEvent

## PlayerDeathEvent

## PlayerMoveEvent

## PlayerBucketFillEvent

## PlayerDropItemEvent

## PlayerInteractEvent

## PlayerLeashEntityEvent

## PlayerChangedWorldEvent

## PlayerRespawnEvent

## EntityDamageByEntityEvent

## PlayerEggThrowEvent

## PlayerUnleashEntityEvent

## BlockPlaceEvent

## BlockBreakEvent

## PlayerJoinEvent

## PlayerShearEntityEvent

## EnchantItemEvent

# Qualtrics Pre-Survey

In [None]:
df_q = pd.read_csv("../../data/01-qualtrics.csv")
df_q = df_q.drop(columns=['StartDate',	'EndDate',	'Status',	'IPAddress',	'Progress',	'Duration (in seconds)',	'Finished',	'RecordedDate',	'ResponseId',	'RecipientLastName',	'RecipientFirstName',	'RecipientEmail',	'ExternalReference',	'LocationLatitude',	'LocationLongitude',	'DistributionChannel',	'UserLanguage', 'mc_dim_4_TEXT'])	

In [None]:
df_q = df_q.drop(index=[0, 1, 2, 3, 47, 48, 49, 50, 51, 52, 53, 54,55, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 90, 106]).reset_index(drop=True)
df_q.to_csv("../../data/02-qualtrics.csv", index=False)

In [None]:
df_q = df_q.drop(columns=["username", "age_check", "agree_rules", "consent"]).reset_index(drop=True)
df_q = df_q.rename(columns={"age_1": "Age","expertise_1": "overall_exp", "expertise_2":"single_exp", "expertise_3":"multi_exp"})

In [None]:
df_act = pd.DataFrame(df_q["mc_act"].str.get_dummies(sep=",").T.sum(axis=1))
df_act = df_act.rename(columns={0:"count"})
df_act

In [None]:
df_mode = pd.DataFrame(df_q["mc_mode"].str.get_dummies(sep=",").T.sum(axis=1))
df_mode = df_mode.rename(columns={0:"count"})
df_mode

In [None]:
df_lvl = pd.DataFrame(df_q["mc_level"].str.get_dummies(sep=",").T.sum(axis=1))
df_lvl = df_lvl.rename(columns={0:"count"})
df_lvl

In [None]:
df_ref = pd.DataFrame(df_q["study_ref"].str.get_dummies(sep=",").T.sum(axis=1))
df_ref = df_ref.rename(columns={0:"count"})
df_ref

In [None]:
fig = px.bar(df_act.sort_values(by="count"), x="count", title="Minecraft Activities")
fig.write_html(str(PLOTSPATH2D) + "/mc_act.html", full_html=False, include_plotlyjs="cdn")
fig.show()

In [None]:
fig = px.bar(df_mode.sort_values(by="count"), x="count", title="Minecraft Player Mode")
fig.write_html(str(PLOTSPATH2D) + "/mc_mode.html", full_html=False, include_plotlyjs="cdn")
fig.show()

In [None]:
fig = px.bar(df_lvl.sort_values(by="count"), x="count", title="Minecraft Player Level")
fig.write_html(str(PLOTSPATH2D) + "/mc_level.html", full_html=False, include_plotlyjs="cdn")
fig.show()

In [None]:
fig = px.bar(df_ref.sort_values(by="count"), x="count", title="Minecraft Study Reference")
fig.write_html(str(PLOTSPATH2D) + "/mc_study_ref.html", full_html=False, include_plotlyjs="cdn")
fig.show()

In [None]:
fig = px.bar(df_q, x="Age", title="Player Age")
fig.write_html(str(PLOTSPATH2D) + "/player_age.html", full_html=False, include_plotlyjs="cdn")
fig.show()

In [None]:
df_exp = df_q[["overall_exp", "single_exp", "multi_exp"]].melt(var_name="type", value_name="level")
df_exp = df_exp.replace({"overall_exp": "Overall", "single_exp": "Singleplayer", "multi_exp": "Multiplayer"})

fig = px.bar(df_exp.sort_values(by="level"), x="level", barmode="group",color="type",title="Player Experience")
fig.write_html(str(PLOTSPATH2D) + "/player_exp.html", full_html=False, include_plotlyjs="cdn")
fig.show()

In [None]:
df_q.to_csv("../../public/data/01-qualtrics.csv", index=False)