In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import StandardScaler

# Load and round off data for dimension 45
X = np.load('Datasets/kryptonite-45-X.npy')
y = np.load('Datasets/kryptonite-45-y.npy')
# X = np.round(X)  # Round to nearest integer (0 or 1)

# Create DataFrame for easier handling
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
df['Target'] = y

# Standardize features for better correlation visualization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop(columns=['Target']))
df_scaled = pd.DataFrame(X_scaled, columns=df.columns[:-1])
df_scaled['Target'] = df['Target']

# Pairwise scatter plot for features
sns.pairplot(df_scaled, plot_kws={'alpha': 0.3, 's': 10})
plt.suptitle('Pairwise Scatter Plot of Features (Dim 45)', y=1.02)
plt.show()

# Compute mutual information between each feature and the target
mi_scores = mutual_info_classif(X, y, discrete_features=True)
mi_df = pd.DataFrame({'Feature': df.columns[:-1], 'Mutual Information': mi_scores})
mi_df = mi_df.sort_values(by='Mutual Information', ascending=False)

# Plot mutual information scores
plt.figure(figsize=(10, 6))
sns.barplot(x='Mutual Information', y='Feature', data=mi_df, palette='viridis')
plt.title('Mutual Information between Each Feature and Target')
plt.xlabel('Mutual Information Score')
plt.ylabel('Features')
plt.show()


KeyboardInterrupt: 