In [1]:
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from metrics import get_summary, get_posterior_mean, get_neighbors, rotate_factors
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

In [2]:
import os
experiment_result = {}
for file in os.listdir("experiments"):
    if file.endswith(".pickle"):
        with open(f"experiments/{file}","rb") as f:
            data = pickle.load(f)
        f.close()
        experiment_result[file.split(".")[0]] = data

In [None]:
ag = 25

X_samples = experiment_result["X"]
F_samples = experiment_result["F"]

rotated_X_samples = rotate_factors(X_samples.reshape(-1, X_samples.shape[2], X_samples.shape[3]))[0].reshape(X_samples.shape)

posterior_X_mean = get_posterior_mean(rotated_X_samples)






    




In [7]:
pd.DataFrame(get_summary(rotated_X_samples))
pd.DataFrame(get_summary(F_samples))


In [8]:
df = pd.read_csv("datasets/player_data.csv")
df = df.sort_values(by=["id","year"])

### nearest neighbors
print(get_neighbors(df, "Stephen Curry", 6, posterior_X_mean, query = f"age == {age} and minutes > 0"))
print(get_neighbors(df, "Tim Duncan", 6, posterior_X_mean, query = f"age == {age} and minutes > 0"))
print(get_neighbors(df, "Kevin Durant", 6, posterior_X_mean, query = f"age == {age} and minutes > 0"))
print(get_neighbors(df, "Klay Thompson", 6, posterior_X_mean, query = f"age == {age} and minutes > 0"))


In [15]:
import plotly.express as px
from scipy.cluster.hierarchy import linkage, leaves_list

def plot_scatter(dataframe, rotated_means_dict, metric, model, offset = "minutes", mixed_metric_index = 0, mixed_metric_model = "binomial", mixed_metric_offset = "minutes", chain=1):

    key_name = f"{model}_{metric}_{chain}_rflvm"
    
    if model != "mixed":
        df_size_vals = dataframe[[offset,"id"]].groupby("id").mean().reset_index()
        df_names = dataframe[["id","name"]].drop_duplicates()["name"].values
        data = pd.DataFrame(rotated_means_dict[key_name], columns=["x","y"])
        data["names"] = df_names
        data[offset] = df_size_vals[offset]
    else:
        df_names = dataframe[(dataframe["age"] == 25) & (dataframe["minutes"] > 0)][["id","name"]].drop_duplicates()["name"].values
        metric_list = metric.split(",")
        exposure_list = mixed_metric_offset.split(",")
        metric_df = dataframe[(dataframe["age"] == 25) & (dataframe["minutes"] > 0)][metric_list + ["id"]]
        exposure_df = dataframe[(dataframe["age"] == 25) & (dataframe["minutes"] > 0)][exposure_list + ["id"]]
        player_id_df = dataframe[(dataframe["age"] == 25) & (dataframe["minutes"] > 0)][["id"]].drop_duplicates()
        data = pd.merge(metric_df, player_id_df, on ="id", how = "right")[metric_list].fillna(0)
        offset_data = pd.merge(exposure_df, player_id_df, on ="id", how = "right").iloc[:, 0:len(exposure_list)].fillna(0)

    if model == "binomial":
        df_color_vals = dataframe[[metric,offset, "id"]].groupby("id").sum().reset_index()
        data[f"{metric}_pct"] = df_color_vals[metric]/df_color_vals[offset]
        fig = px.scatter(data_frame= data,   
                   x = "x", y = "y", hover_data = [f"{metric}_pct",offset,"names"], title = metric, size = offset,
                         color = f"{metric}_pct", range_color = [.1,.7])
    elif model == "poisson":
        df_color_vals = dataframe[[metric,offset,"id"]].groupby("id").sum().reset_index()
        data[f"{metric}_rate"] = df_color_vals[metric]/df_color_vals[offset]
        fig = px.scatter(data_frame = data, x = "x", y = "y", hover_data = [f"{metric}_rate",offset,"names"], 
                         title = metric, size = offset,
                         color = f"{metric}_rate", range_color = [0,.1])
    elif model == "gaussian":
        df_color_vals = dataframe[[metric, "id"]].groupby("id").mean().reset_index()
        data["hover"] = df_color_vals[metric]
        fig = px.scatter(data_frame = data, x = "x", y = "y", color = "hover",
                    size = offset, hover_data = ["hover",offset,"names"], title = metric )
    elif model == "mixed":
        metrics = metric.split(",")
        metric = metrics[mixed_metric_index]
        offset = mixed_metric_offset.split(",")[mixed_metric_index]
        new_df = pd.DataFrame(rotated_means_dict[key_name]["X"][0].mean(axis=0).T, columns=["x","y"])
        new_df["names"] = df_names
        if mixed_metric_model == "binomial":
            new_df[f"{metric}_pct"] = data.iloc[:, mixed_metric_index]/offset_data.iloc[:, mixed_metric_index]
            new_df[offset] = offset_data.iloc[:, mixed_metric_index]
            fig = px.scatter(data_frame= new_df,   
                    x = "x", y = "y", hover_data = [f"{metric}_pct",offset,"names"], title = metric, size = offset,
                            color = f"{metric}_pct", range_color = [.1,.7])
        elif mixed_metric_model == "poisson":
            new_df[f"{metric}_rate"] = data.iloc[:, mixed_metric_index]/offset_data.iloc[:, mixed_metric_index]
            new_df[offset] = offset_data.iloc[:, mixed_metric_index]
            fig = px.scatter(data_frame = new_df, x = "x", y = "y", hover_data = [f"{metric}_rate",offset,"names"], 
                            title = metric, size = offset,
                            color = f"{metric}_rate", range_color = [0,.1])
        elif mixed_metric_model == "gaussian":
            new_df[f"{metric}"] = data.iloc[:, mixed_metric_index]
            new_df[offset] = np.sqrt(offset_data.iloc[:, mixed_metric_index])
            fig = px.scatter(data_frame = new_df, x = "x", y = "y", color = metric,
                        size = offset, hover_data = [metric,offset,"names"], title = metric, range_color = [0,10] )
    
    fig.show()
    

def plot_heatmap(K, labels):
    dissimilarity = np.around(1 - np.abs(K), decimals = 10)
    hierarchy = linkage(dissimilarity, method='complete')
    order = leaves_list(hierarchy)
    reordered_matrix = dissimilarity[:, order]
    reordered_matrix = reordered_matrix[order, :]
    fig = px.imshow(1 - reordered_matrix, x=labels[order], y=labels[order], labels=dict(x='X-axis', y='Y-axis'))
    # Configure the layout
    fig.update_layout(title='Correlation of Latent Space')

    # Enable hover information
    fig.update_traces(hovertemplate='x: %{x}<br>y: %{y}')

    # Enable zooming functionality
    fig.update_layout(
    hovermode='x unified',
    hoverdistance=1,
    )

    # Show the interactive heatmap
    fig.show()
       


In [None]:

from scipy.spatial import KDTree
def plot_trajectories(data_frame, player, num_k, metric, latent_space):
    name_map = data_frame[["id","name"]].drop_duplicates().reset_index()[["id","name"]]
    player_index = name_map.index[name_map["name"] == player][0]
    point = latent_space[player_index,:]
    _, indices = KDTree(latent_space).query(point, k = num_k)
    nearest_neighbors_names = name_map.loc[indices]["name"]
    trajectory_df = data_frame[data_frame["name"].isin(nearest_neighbors_names)][["name",metric,"age"]]
    fig = px.line(trajectory_df, x='age', y=metric, color='name', labels={metric: metric, 'name': 'Player'})
    fig.update_layout(title=f'{metric} over time for {num_k -1} neighbors of {player}')
    fig.show()

