<a href="https://colab.research.google.com/github/appliedcode/mthree-c422/blob/mthree-c422-Likhitha/Filter_methods_Fisher_Score_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, chi2, f_classif
from sklearn.preprocessing import KBinsDiscretizer

# Load data
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

# 1. Fisher Score calculation
class_labels = np.unique(y)
mean_overall = X.mean()
between_var, within_var = [], []
for feature in X.columns:
    b = w = 0.0
    for cls in class_labels:
        samples = X.loc[y == cls, feature]
        n = len(samples)
        mu_cls = samples.mean()
        var_cls = samples.var()
        b += n * (mu_cls - mean_overall[feature])**2
        w += n * var_cls
    between_var.append(b)
    within_var.append(w)
fisher_scores = np.array(between_var) / np.array(within_var)

# 2. ANOVA F-score calculation
anova_selector = SelectKBest(score_func=f_classif, k=X.shape[1])
anova_selector.fit(X, y)
f_scores = anova_selector.scores_

# 3. Correlation with target
corr_with_target = X.apply(lambda col: col.corr(y))

# 4. Chi-Square scores (requires discretization)
kb = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
X_binned = pd.DataFrame(kb.fit_transform(X), columns=X.columns)
chi2_selector = SelectKBest(score_func=chi2, k=X.shape[1])
chi2_selector.fit(X_binned, y)
chi2_scores = chi2_selector.scores_

# 5. Combine into DataFrame
df = pd.DataFrame({
    'feature': X.columns,
    'Fisher_score': fisher_scores,
    'F_score': f_scores,
    'Corr_with_target': corr_with_target.values,
    'Chi2_score': chi2_scores
})

# 6. Compute rankings (1 = highest score)
for col in ['Fisher_score', 'F_score', 'Corr_with_target', 'Chi2_score']:
    # rank features: highest score gets rank 1
    df[f'{col}_rank'] = df[col].rank(ascending=False, method='min')

# 7. Compute average rank
rank_cols = [c for c in df.columns if c.endswith('_rank')]
df['Average_rank'] = df[rank_cols].mean(axis=1)

# 8. Sort by average rank
df_sorted = df.sort_values('Average_rank').reset_index(drop=True)

# 9. Round numeric columns for readability
round_cols = ['Fisher_score', 'F_score', 'Corr_with_target', 'Chi2_score', 'Average_rank']
df_sorted[round_cols] = df_sorted[round_cols].round(6)

# 10. Display final table
print(df_sorted[[
    'feature',
    'Fisher_score', 'Fisher_score_rank',
    'F_score', 'F_score_rank',
    'Corr_with_target', 'Corr_with_target_rank',
    'Chi2_score', 'Chi2_score_rank',
    'Average_rank'
]].to_string(index=False))

          feature  Fisher_score  Fisher_score_rank     F_score  F_score_rank  Corr_with_target  Corr_with_target_rank  Chi2_score  Chi2_score_rank  Average_rank
 petal width (cm)     12.800095                2.0  960.007147           2.0          0.956547                    1.0   93.621622              1.0           1.5
petal length (cm)     15.735482                1.0 1180.161182           1.0          0.949035                    2.0   91.123288              2.0           1.5
sepal length (cm)      1.590193                3.0  119.264502           3.0          0.782561                    3.0   54.142857              3.0           3.0
 sepal width (cm)      0.655467                4.0   49.160040           4.0         -0.426658                    4.0   20.864407              4.0           4.0
