# Filter Method: Correlation & Chi-Squared

In [None]:
!pip install pandas numpy scikit-learn

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import chi2, SelectKBest

# Example dataset
df = pd.DataFrame({
    'feature1': np.random.randint(0,100,100),
    'feature2': np.random.randint(0,50,100),
    'feature3': np.random.randint(0,10,100),
    'target': np.random.randint(0,2,100)  # binary classification
})

X = df.drop('target', axis=1)
y = df['target']

# 1. Quick correlation check
print("Correlation with target:")
print(df.corr()['target'])

# 2. Chi-square test
selector = SelectKBest(chi2, k=2)
X_chi2 = selector.fit_transform(X, y)

print("\nChi-square selected features:", selector.get_support(indices=True))

# Wrapper Method: Recursive Feature Elimination (RFE)

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE

# Same dataset as above, for demonstration
df = pd.DataFrame({
    'feature1': np.random.randint(0,100,100),
    'feature2': np.random.randint(0,50,100),
    'feature3': np.random.randint(0,10,100),
    'feature4': np.random.rand(100)*10,
    'target': np.random.randint(0,2,100)
})

X = df.drop('target', axis=1)
y = df['target']

model = LogisticRegression()
rfe = RFE(model, n_features_to_select=2)
rfe.fit(X, y)

print("Feature Ranking:", rfe.ranking_)
print("Selected Features (True=selected):", rfe.support_)

# Embedded Method: Lasso Regularization

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LassoCV

# Example dataset
df = pd.DataFrame({
    'feature1': np.random.rand(100),
    'feature2': np.random.rand(100) * 2,
    'feature3': np.random.rand(100) * 3,
    'target': np.random.rand(100) * 5
})

X = df.drop('target', axis=1)
y = df['target']

lasso = LassoCV(cv=5).fit(X, y)
coef = pd.Series(lasso.coef_, index=X.columns)
selected_features = coef[coef != 0].index.tolist()

print("Lasso Coefficients:\n", coef)
print("\nSelected Features:", selected_features)

# Tree-Based Importance

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier

# Classification dataset
df = pd.DataFrame({
    'feature1': np.random.randn(200),
    'feature2': np.random.randn(200),
    'feature3': np.random.randn(200),
    'feature4': np.random.randint(0, 2, 200),
    'target': np.random.randint(0,2,200)
})

X = df.drop('target', axis=1)
y = df['target']

model = RandomForestClassifier(n_estimators=100)
model.fit(X, y)
importances = model.feature_importances_

for col, imp in zip(X.columns, importances):
    print(f"{col}: {imp:.3f}")

# Permutation Importance

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance

# Example regression dataset
df = pd.DataFrame({
    'feature1': np.random.rand(300),
    'feature2': np.random.rand(300),
    'feature3': np.random.rand(300),
    'target': np.random.rand(300)*10
})

X = df.drop('target', axis=1)
y = df['target']

model = RandomForestRegressor(n_estimators=100)
model.fit(X, y)

result = permutation_importance(model, X, y, n_repeats=10, random_state=42)
for col, imp in zip(X.columns, result.importances_mean):
    print(f"{col}: {imp:.4f}")