# Variance Threshold

In [18]:
import pandas as pd
from sklearn.feature_selection import VarianceThreshold

# read in files
season_22 = pd.read_csv('2022-23_data.csv')
season_23 = pd.read_csv('2023-24_data.csv')

# Remove non-numeric
# Drop non-numeric columns
numeric_22 = season_22.select_dtypes(include='number')
numeric_23 = season_23.select_dtypes(include='number')

# Instantiate the VarianceThreshold object with a threshold value 
threshold = 0.1 
selector_22 = VarianceThreshold(threshold)
selector_23 = VarianceThreshold(threshold)

# Fit selector to 2022-23 season
selector_22.fit(numeric_22)

# Get the indices of features with high variance 
high_variance_indices_22 = selector_22.get_support(indices=True)

# Subset data with selected features
selected_22 = numeric_22.iloc[:, high_variance_indices_22]

# Fit selector to 2023-24 season
selector_23.fit(numeric_23)
high_variance_indices_23 = selector_23.get_support(indices=True)
selected_23 = numeric_22.iloc[:, high_variance_indices_23]

# selected_22.head()
# selected_23.head()


## Consolidating Spearman with Variance

In [33]:
spearman_corr = pd.read_csv('spearman_corr_features.csv')

# print(spearman_corr.count())

variance_features_22 = set(selected_22.columns)
variance_features_23 = set(selected_23.columns)
variance_combined = variance_features_22.intersection(variance_features_23)

print(len(variance_combined)) # 220 

spearman_features = set(spearman_corr['Feature'])

final_features = variance_combined.intersection(spearman_features)

print(f"Number of features after combining methods: {len(final_features)}")
print(final_features)

220
Number of features after combining methods: 23
{'uncontestedFieldGoalsMade_my_player_2', 'uncontestedFieldGoalsMade_my_player_1', 'reboundChancesTotal_my_player_5', 'reboundChancesOffensive_my_player_1', 'uncontestedFieldGoalsMade_my_player_6', 'reboundChancesTotal_opposing_player_7', 'assists_opposing_player_1', 'secondaryAssists_opposing_player_7', 'uncontestedFieldGoalsMade_my_player_3', 'REST_DAYS', 'uncontestedFieldGoalsMade_my_player_4', 'secondaryAssists_opposing_player_3', 'assists_my_player_1', 'reboundChancesTotal_opposing_player_4', 'reboundChancesTotal_opposing_player_1', 'reboundChancesOffensive_my_player_2', 'reboundChancesTotal_opposing_player_2', 'reboundChancesTotal_opposing_player_5', 'reboundChancesTotal_opposing_player_3', 'secondaryAssists_opposing_player_2', 'REF_BIAS', 'reboundChancesTotal_opposing_player_6', 'reboundChancesOffensive_my_player_5'}
