In [None]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import fnmatch
from IPython.display import Markdown as md
import plotly.graph_objects as go
import plotly.express as px
import math
import random
from dask import dataframe as dd
import pickle

In [None]:
pd.options.display.max_columns = 200
pd.options.display.max_rows = 200
pd.set_option("display.max_colwidth", None)

In [None]:
DATAPATH = Path('../../data/')
DOCSPATH = Path('../../docs/data/')
PLOTSPATH3D = Path('../../docs/plots/3d/')
PLOTSPATH2D = Path('../../docs/plots/2d/')

In [None]:
# Using Dask's DataFrames to glob all the event CSVs in `datapath`
ddf = dd.read_csv(f"{DATAPATH}/**/*.csv")["player"].unique().compute()
# Swapping the index/data values so that when `dict(...)` is run we get entries like:
#   "<hash>": <index>,
ddf = pd.DataFrame({"player": ddf.values, "pindex": ddf.index.values})
# ddf.to_csv(DATAPATH / "platerindex.csv", index=False)

playerindex = {player: pindex for (player, pindex) in zip(ddf["player"], ddf["pindex"])}

# NOTE then you can do something like...
#     player2index = dict(pd.read_csv(datapath / "playerindex.csv"))
#   and you'll have a dictionary that does the correct remapping

In [None]:
# def list_to_dic(lst):
#     index = [i for i in range(len(lst))]
#     zipped = list(zip(lst,index))
#     ret = {zipped[i][0]: zipped[i][1] for i in range(len(zipped))}
#     return ret

In [None]:
# def process_data(world):
#     basepath = '../../data/'
#     datapath = '../../docs/data/'
    
#     with os.scandir(basepath + world) as entries:
#         for entry in entries:
#             if entry.is_file():
#                 event_file = entry.name
#                 event_name = (os.path.splitext(entry.name)[0])

#                 event_df = pd.read_csv(basepath + world + event_file)
#                 event_df.drop(columns=['event'], inplace=True)
                
#                 unique_players = list(event_df['player'].unique())
#                 player_mapping = list_to_dic(unique_players)
#                 event_df['player'] = event_df['player'].map(player_mapping)
#                 unique_players = list(event_df['player'].unique())

#                 event_df.rename(columns={'location.x': 'x', 'location.y': 'y', 'location.z': 'z', 'worldTime': 'time'}, inplace=True)
#                 event_df['mc_day'] = (event_df['time'] / 24000).astype(int)
#                 display(md(f'## {event_name}'))
#                 # display(event_df)

#                 path = Path(datapath + world + event_name + '/')
#                 path.mkdir(exist_ok=True)
                

#                 if fnmatch.fnmatch(event_file, 'PlayerMoveEvent.csv') or fnmatch.fnmatch(event_file, 'EntityDamageByEntityEvent.csv'):
#                     make_csv_large(event_df, unique_players, datapath + world + event_name + '/', event_name)
#                 else:
#                     make_csv_small(event_df, unique_players, datapath + world + event_name + '/', event_name)
#                     # players_ls = make_ls_players(event_df, unique_players)
#                     # plot = make_player_plots(players_ls, color)
#                     # plot.show()
                

In [None]:
# def make_csv_large(event_df, players, path, event_name):
#     for player in players:
#         df = event_df[event_df['player'] == player]

#         df = df[df.index % 20 == 0]

#         filename = path + str(player) + '_' + event_name + '.csv'
#         df.to_csv(filename, index=False)

In [None]:
# def make_csv_small(event_df, players, path, event_name):
#     for player in players:
#         df = event_df[event_df['player'] == player]

#         filename = path + str(player) + '_' + event_name + '.csv'
#         df.to_csv(filename, index=False)

In [None]:
# def make_ls_players(event_df, players):
#     players_ls = []

#     for player in players:
#         df = event_df[event_df['player'] == player]
#         players_ls.append(df)
        
#     return players_ls

In [None]:
# def make_player_plots(players, random_color):
#     data = []
    
#     for df in players:
#         scatter = go.Scatter3d(
#             mode='markers',
#             x=df["x"], y=df["z"], z=df["y"],
#             name = 'Player' + df,
#             marker={
#                 "size": 3,
#                 "color": random_color, 
#                 # "colorscale": "Rainbow",
#                 "opacity": 0.8
#             },
#             # line=dict(
#             #     color='darkblue',
#             #     width=2
#             # ),
#             # text = [df for _ in range(len(df))]
#         )
#         data.append(scatter)
#     fig = go.Figure(data=data)
#     fig.update_layout(width=600, height=400)
# #     fig.show()
#     return fig

In [None]:
# def make_random_color():
#     random_color = []
    
#     for i in range(0, 40): 
#         r = str(math.floor(random.randrange(0, 256)))
#         g = str(math.floor(random.randrange(0, 256)))
#         b = str(math.floor(random.randrange(0, 256)))
#         random_color.append('rgb(' + r + ',' + g + ',' + b +')')
        
#     return random_color

In [None]:
def get_every_40(df):
    return df.loc[df.index % 40 == 0]

In [None]:
def get_every_60(df):
    return df.loc[df.index % 60 == 0]

In [None]:
# %% Convert a Player into a Scatter3D trace
def player2trace(player, group):
    scatter = go.Scatter3d(
        mode="markers",
        x=group["x"], y=group["z"], z=group["y"],
        marker={"size": 3, "color": group["mc_day"], "opacity": 0.8},
        name=f"Player {player}",
    )
    return scatter

In [None]:
# %% Convert a DataFrame into a Plot
def df2plot(df, name):
    traces = []
    # For every player in the dataframe, convert them into a plotly trace
    for (player, group) in df.groupby("pindex"):
        traces += [player2trace(player, group)]

    fig = go.Figure(data=traces)
    fig.update_layout({
        "title": {"text": name},
        "width": 900, "height": 900,
    })
    return fig

In [None]:
# %% Preprocess the DataFrame
def preprocess_df(file):
    df = pd.read_csv(file)
    # Use the Player Index created by Dask (above) to do the mappings
    # player2index = dict(pd.read_csv(DATAPATH / "playerindex.csv"))
    df["pindex"] = df["player"].replace(playerindex)

    df = df.rename(columns={
        "location.x": "x", "location.y": "y", "location.z": "z",
        "worldTime": "time",
    })

    df["mc_day"] = (df["time"] / 24_000).astype(int)
    # df.to_csv(file.parent / f"{file.stem}-preprocessed.csv", index=False)
    return df

In [None]:
# %% Chunk events by Players
def chunk_by_player(event_path, df, name):
    for (player, group) in df.groupby("pindex"):
        fname = event_path / f"{player}_{name}.csv"
        group.reset_index().to_csv(fname, index=False)

    return df

In [None]:
# %% Preprocess & Plot the Data
def preprocess_and_plot_data(world):
    # NOTE this works iff you're only looking for CSVs.
    for file in (DATAPATH / world).glob("*.csv"):
        name = (os.path.splitext(file.name)[0])
        display(md(f"## {name}"))
        df = preprocess_df(file)

        event_path = DOCSPATH / world / file.stem
        event_path.mkdir(parents=True, exist_ok=True)
        df = chunk_by_player(event_path, df, name)

        # skip_plot = ["PlayerMoveEvent", "EntityDamageByEntityEvent"]
        if file.stem == "PlayerMoveEvent":
            df = get_every_40(df)
        elif file.stem == "EntityDamageByEntityEvent":
            df = get_every_60(df)

        df.to_csv(str(DOCSPATH / world / name) + ".html")
        # fig = df2plot(df, name)
        # fig.write_html(str(PLOTSPATH3D / world / name) + ".html")
        # fig.show()

# Overworld

In [None]:
preprocess_and_plot_data("overworld")

# Nether

In [None]:
preprocess_and_plot_data("nether")

# End

In [None]:
# random_color = make_random_color()
preprocess_and_plot_data("end")

# Plots

In [None]:
# import plotly.graph_objects as go
# import plotly.express as px

In [None]:
def get_data(world, event):
    event_file = '../../data/' + world + '/' + event + '.csv'
    event_df = pd.read_csv(event_file)
    display(event_df)
    
    # drop unnecessary columns
    event_df.drop(columns=['_id','event'], inplace=True)
    display(event_df)
    
    unique_players = list(event_df['player'].unique())
    print(unique_players)
    
    player_mapping = list_to_dic(unique_players)
    
    event_df['player'] = event_df['player'].map(player_mapping)
    
    unique_players = list(event_df['player'].unique())
    print(unique_players)
    
    event_df.rename(columns={'location.x': 'x', 'location.y': 'y', 'location.z': 'z', 'worldTime': 'time'}, inplace=True)
    
    return (event_df, unique_players)

def make_csv_plots(move, players, world, folder):
    for player in players:
        # df = move[move['player'] == player]
        # print('BEFORE: ' + str(len(df)))
        #drop all odd numbered rows
    #     drop_i = [i for i in list(range(len(df))) if i % 2 == 1]
    #     df.drop(drop_i, inplace=True)
        df = move[move.index % 20 == 0]
        # print('AFTER: ', str(len(df)))

    filepath = '../../docs/data/' + world + "/" + folder + "/"
    filename = filepath + 'plot' + '_' + folder + '.csv'
    df.to_csv(filename, index=False)
    # return df
    

In [None]:
(df, unique_players) = get_data('overworld', 'PlayerMoveEvent')
make_csv_plots(df, unique_players, 'overworld', 'move')

# Qualtrics Pre-Survey

In [None]:
df_q = pd.read_csv("../../data/01-qualtrics.csv")
df_q = df_q.drop(columns=['StartDate',	'EndDate',	'Status',	'IPAddress',	'Progress',	'Duration (in seconds)',	'Finished',	'RecordedDate',	'ResponseId',	'RecipientLastName',	'RecipientFirstName',	'RecipientEmail',	'ExternalReference',	'LocationLatitude',	'LocationLongitude',	'DistributionChannel',	'UserLanguage', 'mc_dim_4_TEXT'])	

In [None]:
df_q["consent"] = df_q["consent"].dropna()

In [None]:
df_q = df_q.drop(index=[0, 1, 2, 3, 47, 48, 49, 50, 51, 52, 53, 54,55, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 90]).reset_index(drop=True)
df_q.to_csv("../../data/02-qualtrics.csv", index=False)

In [None]:
df_q = df_q.drop(columns=["username"])

In [None]:
df_q.to_csv("../../public/data/01-qualtrics.csv", index=False)