In [1]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

In [2]:
import os
experiment_result = {}
for file in os.listdir("experiments"):
    if file.endswith(".pickle"):
        with open(f"experiments/{file}","rb") as f:
            data = pickle.load(f)
        f.close()
        experiment_result[file.split(".")[0]] = data

In [10]:
experiment_result["binomial_ftm_rflvm"]["X"].shape

(1000, 2332, 2)

In [40]:
import numpy as np
from scipy.linalg import svd 
from typing import Tuple

def varimax(Phi, gamma = 1.0, q = 20, tol = 1e-6):

    p,k = Phi.shape
    R = np.eye(k)
    d=0
    for i in range(q):
        d_old = d
        Lambda = np.dot(Phi, R)
        u,s,vh = svd(np.dot(Phi.T,np.asarray(Lambda)**3 - (gamma/p) * np.dot(Lambda, np.diag(np.diag(np.dot(Lambda.T,Lambda))))))
        R = np.dot(u,vh)
        d = np.sum(s)
        if d_old!=0 and d/d_old < 1 + tol: break
    return np.dot(Phi, R)


def rotate_factors(player_factor_tensor:np.ndarray, use_varimax:bool = True)->Tuple[np.ndarray, np.ndarray]:
    """

    Args:
        player_factor_tensor (np.ndarray): sample x num factors x num players 
        varimax (bool): whether to apply varimax rotation or not

    Returns:
        np.ndarray: sample x num factors x num players  rotated tensor
    """

    n_samples, n_factors, _ = player_factor_tensor.shape
    output_tensor = np.zeros_like(player_factor_tensor)
    rotations = [np.eye(n_factors)]
    output_tensor[0,:,:] = player_factor_tensor[0,:,:] if not use_varimax else varimax(player_factor_tensor[0,:,:])
    for i in range(1,n_samples):
        U, _, V =  svd(output_tensor[0,:,:].dot(player_factor_tensor[i,:,:].T), full_matrices=False)
        rotation = U.dot(V)
        rotations.append(rotation)
        output_tensor[i,:,:] = rotation.dot(player_factor_tensor[i,:,:])
    print(len(rotations))
    return output_tensor, np.stack(rotations,axis = 0)




In [41]:
rotated_experiment_results = {}
# for model in experiment_result:
#     print(model, experiment_result[model]["X"])
#     rotated_experiment_results[model] = rotate_factors(np.swapaxes(experiment_result[model]["X"], axis1 = 1, axis2=2))

rotated_experiment_results["poisson_ast_rflvm"] = rotate_factors(np.swapaxes(experiment_result["poisson_ast_rflvm"]["X"],1,2))
rotated_experiment_results["poisson_stl_rflvm"] = rotate_factors(np.swapaxes(experiment_result["poisson_stl_rflvm"]["X"],1,2))
rotated_experiment_results["poisson_blk_rflvm"] = rotate_factors(np.swapaxes(experiment_result["poisson_blk_rflvm"]["X"],1,2))
rotated_experiment_results["poisson_dreb_rflvm"] = rotate_factors(np.swapaxes(experiment_result["poisson_dreb_rflvm"]["X"],1,2))
rotated_experiment_results["poisson_oreb_rflvm"] = rotate_factors(np.swapaxes(experiment_result["poisson_oreb_rflvm"]["X"],1,2))
rotated_experiment_results["gaussian_bpm_rflvm"] = rotate_factors(np.swapaxes(experiment_result["gaussian_bpm_rflvm"]["X"],1,2))
rotated_experiment_results["gaussian_dbpm_rflvm"] = rotate_factors(np.swapaxes(experiment_result["gaussian_dbpm_rflvm"]["X"],1,2))
rotated_experiment_results["gaussian_obpm_rflvm"] = rotate_factors(np.swapaxes(experiment_result["gaussian_obpm_rflvm"]["X"],1,2))
rotated_experiment_results["binomial_ftm_rflvm"] = rotate_factors(np.swapaxes(experiment_result["binomial_ftm_rflvm"]["X"],1,2))
rotated_experiment_results["binomial_fg2m_rflvm"] = rotate_factors(np.swapaxes(experiment_result["binomial_fg2m_rflvm"]["X"],1,2))
rotated_experiment_results["binomial_fg3m_rflvm"] = rotate_factors(np.swapaxes(experiment_result["binomial_fg3m_rflvm"]["X"],1,2))

1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000


In [42]:
rotated_means_results = {model : rotated_experiment_results[model][0].mean(axis=0).T for model in  rotated_experiment_results}

In [43]:
rotated_means_results

{'poisson_ast_rflvm': array([[-0.77955127, -0.31252417],
        [ 3.03080943, -0.66348468],
        [-0.21793832,  0.41636184],
        ...,
        [-0.51433442,  0.46664878],
        [ 0.12029459, -0.12547487],
        [-0.97031239,  0.58311113]]),
 'poisson_stl_rflvm': array([[-0.75296253,  0.2707299 ],
        [-1.17499283,  0.33398007],
        [-0.48201936,  0.11116952],
        ...,
        [-0.22202047, -0.11665657],
        [-0.04909354, -0.03588178],
        [-0.50164334, -0.02004732]]),
 'poisson_blk_rflvm': array([[-0.23714523, -0.46303191],
        [ 0.06777094, -0.54193865],
        [-1.04252833, -0.70617365],
        ...,
        [-0.67819896,  0.76689656],
        [-0.43935093,  1.00095136],
        [ 0.50773111, -0.09605224]]),
 'poisson_dreb_rflvm': array([[-0.32803275, -0.04177471],
        [ 0.6788238 , -0.15288084],
        [ 0.58459001,  1.07821281],
        ...,
        [ 0.07606217,  0.2658391 ],
        [ 0.65828453,  0.61112861],
        [ 0.03153441,  0.2054

In [12]:
import pandas as pd
df = pd.read_csv("datasets/player_data.csv")

In [13]:
df = df.sort_values(by=["id","year"])


In [14]:
import plotly.express as px


def plot_scatter(dataframe, rotated_means_dict, metric, model, offset = "minutes"):
    key_name = f"{model}_{metric}_rflvm"
    df_size_vals = dataframe[[offset,"id"]].groupby("id").mean().reset_index()
    df_names = dataframe[["id","name"]].drop_duplicates()["name"].values
    data = pd.DataFrame(rotated_means_dict[key_name], columns=["x","y"])
    data["names"] = df_names
    data[offset] = df_size_vals[offset]
    if model == "binomial":
        df_color_vals = dataframe[[metric,offset, "id"]].groupby("id").sum().reset_index()
        data[f"{metric}_pct"] = df_color_vals[metric]/df_color_vals[offset]
        fig = px.scatter(data_frame= data,   
                   x = "x", y = "y", hover_data = [f"{metric}_pct",offset,"names"], title = metric, size = offset,
                         color = f"{metric}_pct", range_color = [.1,.7])
    elif model == "poisson":
        df_color_vals = dataframe[[metric,offset,"id"]].groupby("id").sum().reset_index()
        data[f"{metric}_rate"] = df_color_vals[metric]/df_color_vals[offset]
        fig = px.scatter(data_frame = data, x = "x", y = "y", hover_data = [f"{metric}_rate",offset,"names"], 
                         title = metric, size = offset,
                         color = f"{metric}_rate", range_color = [0,.1])
    elif model == "gaussian":
        df_color_vals = dataframe[[metric, "id"]].groupby("id").mean().reset_index()
        data["hover"] = df_color_vals[metric]
        fig = px.scatter(data_frame = data, x = "x", y = "y", color = "hover",
                    size = offset, hover_data = ["hover",offset,"names"], title = metric )
    
    fig.show()
    

    


In [18]:
plot_scatter(df, rotated_means_results, "fg3m", "binomial", "fg3a")

In [35]:

from scipy.spatial import KDTree
def plot_trajectories(data_frame, player, num_k, metric, latent_space):
    name_map = data_frame[["id","name"]].drop_duplicates().reset_index()[["id","name"]]
    player_index = name_map.index[name_map["name"] == player][0]
    point = latent_space[player_index,:]
    _, indices = KDTree(latent_space).query(point, k = num_k)
    nearest_neighbors_names = name_map.loc[indices]["name"]
    trajectory_df = data_frame[data_frame["name"].isin(nearest_neighbors_names)][["name",metric,"age"]]
    fig = px.line(trajectory_df, x='age', y=metric, color='name', labels={metric: metric, 'name': 'Player'})
    fig.update_layout(title=f'{metric} over time for {num_k -1} neighbors of {player}')
    fig.show()



In [36]:
plot_trajectories(df, "Michael Jordan", 6, "bpm", rotated_means_results["gaussian_bpm_rflvm"])

In [37]:
experiment_model_result = {}
for file in os.listdir("experiments"):
    if file.endswith("model_rflvm.pickle"):
        with open(f"experiments/{file}","rb") as f:
            data = pickle.load(f)
        f.close()
        experiment_model_result[file.split(".")[0]] = data

In [54]:
bpm_preds = experiment_model_result["gaussian_bpm_model_rflvm"].predict(experiment_result["gaussian_bpm_rflvm"]["X"][-1,:,:])

In [56]:
bpm_preds.shape

(2332, 27)

In [58]:
bpm_preds_df = pd.DataFrame(bpm_preds, columns=list(range(18,45)))

In [70]:
bpm_preds_df["name"] = df[["name","id"]].drop_duplicates()["name"].values
bpm_preds_df["id"] = df[["name","id"]].drop_duplicates()["id"].values

In [71]:
bpm_preds_df_long = bpm_preds_df.melt(id_vars=["name","id"], var_name="age", value_name="bpm")


In [73]:
plot_trajectories(bpm_preds_df_long, "Michael Jordan", 6, "bpm", experiment_result["gaussian_bpm_rflvm"]["X"][-1,:,:],)

In [72]:
bpm_preds_df_long

Unnamed: 0,name,id,age,bpm
0,Byron Scott,2,18,-4.810454
1,Grant Long,3,18,-5.224247
2,Dan Schayes,7,18,-5.348480
3,Sedale Threatt,9,18,-5.272296
4,Chris King,12,18,-5.359851
...,...,...,...,...
62959,Facundo Campazzo,1630267,44,-8.478789
62960,Nate Darling,1630268,44,-8.507473
62961,Brodric Thomas,1630271,44,-8.526915
62962,Freddie Gillespie,1630273,44,-8.535514


In [77]:
blk_preds = experiment_model_result["poisson_blk_model_rflvm"].predict(experiment_result["poisson_blk_rflvm"]["X"][-1,:,:])

In [78]:
blk_preds_df = pd.DataFrame(blk_preds, columns=list(range(18,45)))
blk_preds_df["name"] = df[["name","id"]].drop_duplicates()["name"].values
blk_preds_df["id"] = df[["name","id"]].drop_duplicates()["id"].values

In [79]:
blk_preds_df_long = blk_preds_df.melt(id_vars=["name","id"], var_name="age", value_name="blk")


In [81]:
plot_trajectories(blk_preds_df_long, "Tim Duncan", 6, "blk", experiment_result["poisson_blk_rflvm"]["X"][-1,:,:],)

In [82]:
plot_trajectories(df, "Tim Duncan", 6, "blk", experiment_result["poisson_blk_rflvm"]["X"][-1,:,:],)