# Apply Mapper algorithm to each league

## Data Preprocessing

In [169]:
# Data wrangling
import numpy as np
import pandas as pd  # Not a requirement of giotto-tda, but is compatible with the gtda.mapper module

# Data viz
from gtda.plotting import plot_point_cloud

# TDA magic
from gtda.mapper import (
    CubicalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter
)
import matplotlib.pyplot as plt
# ML tools
from sklearn import datasets
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [170]:
uaap_df = pd.read_csv('Data/UAAP_2024_averages.csv')
pba_df = pd.read_csv('Data/PBA--Commissioners-Cup_2024_averages.csv')
japan_df = pd.read_csv('Data/Japanese-BLeague_2024_averages.csv')
korea_df = pd.read_csv('Data/South-Korean-KBL_2024_averages.csv')

In [171]:
# Preprocessing steps
# 1. Select numeric columns
# 2. Normalize with standardscaler
# 3. Select columns using feature selection (if needed)

dfs = [uaap_df, pba_df, japan_df, korea_df]
dfs_numeric_scaled = []
for i, df in enumerate(dfs):

    # drop # ID column
    df = df.drop('#', axis=1)

    # # Alagappan features
    # select_columns = ['RPG','APG','TOV','SPG','BPG','PF','PPG']
    # df_select = df[select_columns]
    # df = df_select

    df_numeric = df.select_dtypes(include=[np.number])
    df_numeric_scaled = StandardScaler().fit_transform(df_numeric)
    dfs_numeric_scaled.append(df_numeric_scaled)

print([len(df) for df in dfs_numeric_scaled])

[115, 132, 315, 157]


## PCA 2 dims, no feature selection

In [172]:
pca = PCA(n_components=2)

for i, df in enumerate(dfs_numeric_scaled):
    pca.fit(df)
    print(f"Explained variance for league {i}: {pca.explained_variance_ratio_}")

Explained variance for league 0: [0.56220285 0.15547608]
Explained variance for league 1: [0.54648221 0.1302977 ]
Explained variance for league 2: [0.55819243 0.15811781]
Explained variance for league 3: [0.55597043 0.13414257]


### UAAP point cloud

In [173]:
# UAAP

plot_point_cloud(pca.transform(dfs_numeric_scaled[0]))

### PBA point cloud

In [174]:
# PBA

plot_point_cloud(pca.transform(dfs_numeric_scaled[1]))

### Japan point cloud

In [175]:
# Japan B.League

plot_point_cloud(pca.transform(dfs_numeric_scaled[2]))

### Korea point cloud

In [176]:
# South Korean KBL

plot_point_cloud(pca.transform(dfs_numeric_scaled[3]))

## Mapper with 2-dim PCA no feature selection

In [198]:
# Define filter function – can be any scikit-learn transformer
filter_func = PCA(n_components=2)
# Define cover
cover = CubicalCover(n_intervals=10, overlap_frac=0.5)
# Choose clustering algorithm – default is DBSCAN
clusterer = DBSCAN()

# Configure parallelism of clustering step
n_jobs = 1

# Initialise pipeline
pipe = make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer,
    verbose=False,
    n_jobs=n_jobs,
)

In [178]:
uaap_mapper, pba_mapper, japan_mapper, korea_mapper = dfs_numeric_scaled

### UAAP Mapper

In [199]:
uaap_pos_dummies = pd.get_dummies(uaap_df['Position'])
UAAP_MIP = MapperInteractivePlotter(pipe, uaap_mapper)
# Generate interactive widget
UAAP_MIP.plot(color_data=uaap_pos_dummies)

VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

In [180]:
# hole 1: [4,28,29,50,41,0,72,5]
# hole 2: [31,30,33,34,35,61,62,68,45,51,49]

#### Define functions

In [181]:
def find_nodes(mip, nodes):
    node_players = []

    for node in nodes:
        node_elements = mip.graph_.vs[node]['node_elements']
        node_players.append(node_elements)

    node_players = np.concatenate(node_players)
    node_players = np.unique(node_players)

    return node_players

def compare_means(node_players, df):
    node_df = df.iloc[node_players]
    node_means = node_df.mean(numeric_only=True)
    df_means = df.mean(numeric_only=True)
    percent_diff = np.round((node_means - df_means) / df_means * 100, 2)

    mean_df = pd.concat([np.round(node_means, 2), np.round(df_means, 2), percent_diff], axis=1)
    mean_df.columns = ['Feature Mean', 'Dataset Mean', 'Difference']

    return node_means, mean_df

#### Outlier analysis

In [204]:
# outlier 1: node 57

outlier_1 = find_nodes(UAAP_MIP, [57])

uaap_df.iloc[outlier_1].T

Unnamed: 0,37
Player,"Blanco, Isaiah"
Team,DLSU
GP,1
Position,G
MPG,1.49
PPG,3.0
FGM,1.0
FGA,2.0
FG%,50.0
3PM,1.0


In [183]:
# outlier 2: nodes 2, 47, 48

outlier_2 = find_nodes(UAAP_MIP, [2,47,48])

uaap_df.iloc[outlier_2]

Unnamed: 0,Player,Team,GP,Position,MPG,PPG,FGM,FGA,FG%,3PM,...,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF,#
1,"Kouame, Ange",ADMU,14,C,27.3,11.6,4.642857,8.428571,54.8,0.5,...,53.85,5.428571,6.0,11.3,1.7,0.5,2.5,1.4,2.4,2
12,"Faye, Adama",UST,13,C,30.57,11.4,4.615385,10.153846,45.1,0.0,...,38.36,2.0,10.230769,12.2,1.0,0.4,2.2,3.8,3.1,13
29,"Phillips, Michael",DLSU,10,C,25.17,9.4,3.6,7.6,47.4,0.0,...,55.0,3.6,6.7,10.3,1.7,1.9,1.6,1.9,2.0,44
102,"Diouf, Malick",UP,14,C,22.51,11.2,3.857143,7.571429,49.2,0.214286,...,67.8,3.142857,7.714286,11.5,2.8,1.4,1.7,1.7,2.7,117


In [184]:
# show average stats of these players compared to full league

_, comp = compare_means(outlier_2, uaap_df)

comp

Unnamed: 0,Feature Mean,Dataset Mean,Difference
GP,12.75,11.17,14.11
MPG,26.39,14.07,87.53
PPG,10.9,4.8,127.17
FGM,4.18,1.73,142.05
FGA,8.44,4.64,81.93
FG%,49.12,35.6,38.01
3PM,0.18,0.49,-63.52
3PA,0.66,1.83,-63.97
3P%,17.86,17.35,2.93
FTM,2.3,0.86,168.48


#### Hole 1 analysis

In [205]:
# hole 1: [4,28,29,50,59,41,0,72,5]

hole_1 = find_nodes(UAAP_MIP, [4,28,29,50,41,0,72,5])

# show all columns
pd.set_option('display.max_columns', None)
uaap_df.iloc[hole_1]

Unnamed: 0,Player,Team,GP,Position,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF,#,League
0,"Ildefonso, Dave",ADMU,14,G,27.44,12.0,4.071429,13.071429,30.9,1.857143,7.428571,25.0,2.071429,3.214286,64.44,2.214286,6.071429,8.3,2.7,1.0,0.1,2.8,1.7,1,UAAP
2,"Andrade, Bryan",ADMU,14,G,25.5,9.5,3.214286,8.857143,36.2,1.785714,5.285714,33.78,1.642857,1.785714,92.0,0.928571,1.857143,2.7,2.1,0.9,0.2,1.5,1.5,3,UAAP
13,"Pangilinan, Miguel",UST,14,G,25.46,6.6,2.142857,7.428571,28.8,1.214286,4.357143,27.87,1.071429,1.571429,68.18,1.857143,2.785714,4.6,1.4,0.9,0.1,1.6,2.2,15,UAAP
15,"Manalang, Paul",UST,13,G,25.08,4.5,1.307692,6.769231,19.3,0.846154,4.769231,17.74,1.076923,1.384615,77.78,0.384615,2.307692,2.7,2.8,0.6,0.0,2.0,2.5,19,UAAP
26,"Winston, Schonny",DLSU,10,G,22.4,14.9,5.4,12.6,42.9,1.0,3.1,32.26,3.1,3.9,79.49,1.5,2.8,4.3,2.5,2.1,0.1,2.4,1.3,41,UAAP
27,"Nelle, Evan",DLSU,11,G,28.06,12.0,3.818182,10.272727,34.1,1.454545,5.454545,26.67,2.0,2.090909,95.65,1.272727,3.636364,4.8,6.0,1.7,0.6,2.8,1.9,42,UAAP
30,"Austria, Cyrus",DLSU,13,F,23.25,8.5,3.0,7.076923,42.9,0.538462,1.846154,29.17,1.769231,3.461538,51.11,1.461538,2.384615,3.9,2.2,1.2,0.1,1.6,1.5,45,UAAP
31,"Nonoy, Mark",DLSU,11,G,19.43,7.6,2.181818,9.272727,25.2,1.363636,6.0,22.73,1.181818,1.454545,81.25,0.363636,2.090909,2.5,2.7,2.3,0.0,1.6,1.8,46,UAAP
42,"Sajonia, Bryan",FEU,12,G,23.28,11.2,4.0,11.75,34.0,1.166667,5.666667,20.59,2.0,2.5,80.0,1.5,2.666667,4.2,1.7,1.6,0.3,1.3,3.1,57,UAAP
43,"Torres, Xyrus",FEU,14,F,26.38,10.6,3.642857,13.357143,27.3,2.571429,9.714286,26.47,0.785714,1.142857,68.75,0.642857,1.928571,2.6,0.6,1.2,0.1,0.6,1.5,58,UAAP


In [201]:
h1_means, comp = compare_means(hole_1, uaap_df)

comp

Unnamed: 0,Feature Mean,Dataset Mean,Difference
GP,12.57,11.17,12.45
MPG,23.05,14.07,63.83
PPG,9.27,4.8,93.28
FGM,3.17,1.73,83.46
FGA,9.1,4.64,96.09
FG%,34.47,35.6,-3.18
3PM,1.4,0.49,186.42
3PA,4.84,1.83,164.14
3P%,29.33,17.35,69.06
FTM,1.51,0.86,76.49


#### Hole 2 analysis

In [187]:
# hole 2: [31,30,33,34,35,61,62,68,45,51,49]

hole_2 = find_nodes(UAAP_MIP, [31,30,33,34,35,61,62,68,45,51,49])

uaap_df.iloc[hole_2]

Unnamed: 0,Player,Team,GP,Position,MPG,PPG,FGM,FGA,FG%,3PM,...,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF,#
14,"Lazarte, Ivan",UST,11,G,15.18,5.5,1.727273,6.090909,28.4,0.545455,...,60.71,1.181818,1.636364,2.8,0.8,0.8,0.0,0.5,1.3,17
16,"Manaytay, Christian",UST,14,F,19.08,3.9,1.571429,4.214286,37.3,0.0,...,45.83,2.0,2.928571,4.9,1.4,0.9,0.3,1.9,2.6,21
17,"Garing, JC",UST,13,G,15.43,3.5,1.384615,3.923077,34.7,0.384615,...,60.0,1.692308,2.461538,3.9,1.3,0.9,0.1,1.0,1.8,23
18,"Calimag, Richi",UST,13,G,14.33,3.5,1.153846,3.846154,30.0,0.769231,...,55.56,1.076923,1.846154,2.9,0.8,0.3,0.1,1.0,1.4,25
28,"Quiambao, Kevin",DLSU,13,F,25.26,11.2,4.307692,12.538462,34.4,0.692308,...,71.43,1.846154,4.692308,6.5,3.2,1.1,0.4,3.1,1.0,43
30,"Austria, Cyrus",DLSU,13,F,23.25,8.5,3.0,7.076923,42.9,0.538462,...,51.11,1.461538,2.384615,3.9,2.2,1.2,0.1,1.6,1.5,45
34,"Phillips, Benjamin",DLSU,13,C,14.17,4.6,1.615385,4.076923,41.0,0.538462,...,55.56,1.538462,2.153846,3.6,1.0,0.4,0.3,0.6,1.6,49
44,"Sleat, Patrick",FEU,14,G,20.11,8.8,3.5,7.214286,48.5,0.714286,...,71.43,1.642857,2.142857,3.8,1.8,0.9,0.1,1.7,2.4,59
45,"Anonuevo, Cholo",FEU,14,F,25.41,6.2,2.214286,6.785714,32.6,0.357143,...,48.78,2.5,3.785714,6.3,2.1,1.6,0.9,2.2,2.4,60
49,"Alforque, Royce",FEU,14,G,19.58,2.6,1.142857,4.5,25.4,0.142857,...,50.0,1.285714,2.857143,4.1,2.9,1.6,0.2,1.5,1.6,64


In [188]:
h2_means, comp = compare_means(hole_2, uaap_df)

comp

Unnamed: 0,Feature Mean,Dataset Mean,Difference
GP,13.58,11.17,21.56
MPG,19.19,14.07,36.41
PPG,6.19,4.8,28.95
FGM,2.29,1.73,32.61
FGA,5.78,4.64,24.54
FG%,38.74,35.6,8.84
3PM,0.43,0.49,-12.33
3PA,1.76,1.83,-3.91
3P%,20.64,17.35,18.99
FTM,1.16,0.86,35.01


#### Hole fixing

In [203]:
# make uaap_augmented concatenated uaap_df with a h1_means row

h1_means = pd.DataFrame(h1_means).T
h1_means['Position'] = 'Hole filling'

# h2_means = pd.DataFrame(h2_means).T
# h2_means['Position'] = 'Hole filling'

uaap_augmented = pd.concat([uaap_df, h1_means], axis=0)

# Preprocessing

uaap_augmented = uaap_augmented.drop('#', axis=1)
uaap_augmented_numeric = uaap_augmented.select_dtypes(include=[np.number])
uaap_augmented_mapper = StandardScaler().fit_transform(uaap_augmented_numeric)

# Mapper

uaap_augmented_pos_dummies = pd.get_dummies(uaap_augmented['Position'])
uaap_augmented_MIP = MapperInteractivePlotter(pipe, uaap_augmented_mapper)

# Generate interactive widget
uaap_augmented_MIP.plot(color_data=uaap_augmented_pos_dummies)




VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

In [189]:
# make uaap_augmented concatenated uaap_df with a h1_means row

# h1_means = pd.DataFrame(h1_means).T
# h1_means['Position'] = 'Hole filling'

h2_means = pd.DataFrame(h2_means).T
h2_means['Position'] = 'Hole filling'

uaap_augmented = pd.concat([uaap_df, h2_means], axis=0)

# Preprocessing

uaap_augmented = uaap_augmented.drop('#', axis=1)
uaap_augmented_numeric = uaap_augmented.select_dtypes(include=[np.number])
uaap_augmented_mapper = StandardScaler().fit_transform(uaap_augmented_numeric)

# Mapper

uaap_augmented_pos_dummies = pd.get_dummies(uaap_augmented['Position'])
uaap_augmented_MIP = MapperInteractivePlotter(pipe, uaap_augmented_mapper)

# Generate interactive widget
uaap_augmented_MIP.plot(color_data=uaap_augmented_pos_dummies)




VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

### PBA Mapper

In [190]:
pba_pos_dummies = pd.get_dummies(pba_df['Position'])
PBA_MIP = MapperInteractivePlotter(pipe, pba_mapper)
# Generate interactive widget
PBA_MIP.plot(color_data=pba_pos_dummies)

VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

### Japan mapper

In [191]:
japan_pos_dummies = pd.get_dummies(japan_df['Position'])
J_MIP = MapperInteractivePlotter(pipe, japan_mapper)
# Generate interactive widget
J_MIP.plot(color_data=japan_pos_dummies)

VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

### Korea mapper

In [192]:
korea_pos_dummies = pd.get_dummies(korea_df['Position'])
K_MIP = MapperInteractivePlotter(pipe, korea_mapper)
# Generate interactive widget
K_MIP.plot(color_data=korea_pos_dummies)

VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

# Tests

### Cross-league mapper

In [193]:
# merge all dfs and add a column representing leagues

uaap_df['League'] = 'UAAP'
pba_df['League'] = 'PBA'
japan_df['League'] = 'Japan'
korea_df['League'] = 'Korea'

all_df = pd.concat([uaap_df, pba_df, japan_df, korea_df])



# merge all mapper dfs

all_mapper = np.concatenate([uaap_mapper, pba_mapper, japan_mapper, korea_mapper])

all_pos_dummies = pd.get_dummies(all_df['Position'])
all_league_dummies = pd.get_dummies(all_df['League'])

MIP = MapperInteractivePlotter(pipe, all_mapper)

# Generate interactive widget

MIP.plot(color_data=all_pos_dummies)

VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…

In [194]:
# Generate interactive widget

MIP.plot(color_data=all_league_dummies)

VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Cover parameters</b>'), Text(value='uniform', cont…