<a href="https://colab.research.google.com/github/aryanmhalsank19/HeartPrediction/blob/master/chisquare.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# Load the Cleveland heart disease dataset
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
                'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
heart_data = pd.read_csv(data_url, names=column_names, na_values='?')

# Drop rows with missing values
heart_data.dropna(inplace=True)

# Separate features and target
X = heart_data.drop('target', axis=1)
y = heart_data['target']

# Initialize SelectKBest with the chi2 scoring function
k = 9  # Select top 5 features, you can adjust this number
selector = SelectKBest(score_func=chi2, k=k)

# Fit selector to the data
selector.fit(X, y)

# Get the chi2 scores and p-values for each feature
scores = selector.scores_
p_values = selector.pvalues_

# Create a DataFrame to store feature names, scores, and p-values
feature_scores_df = pd.DataFrame({
    'Feature': X.columns,
    'Chi2 Score': scores,
    'P-value': p_values
})

# Sort the DataFrame by chi2 scores in descending order
feature_scores_df = feature_scores_df.sort_values(by='Chi2 Score', ascending=False)

# Print the feature scores table
print("Feature Scores Table:")
print(feature_scores_df)


Feature Scores Table:
     Feature  Chi2 Score       P-value
7    thalach  219.883557  1.986038e-46
11        ca  107.213553  2.858376e-22
9    oldpeak   98.505523  2.046244e-20
12      thal   70.520283  1.762563e-14
8      exang   39.119182  6.582518e-08
4       chol   38.167452  1.034874e-07
0        age   27.975233  1.261793e-05
3   trestbps   19.348090  6.713433e-04
2         cp   16.383180  2.545823e-03
6    restecg   13.026434  1.114735e-02
10     slope   10.178421  3.752710e-02
1        sex    7.511646  1.111968e-01
5        fbs    6.778461  1.480690e-01
