## Cross Validation, Ensemble Methods, Regularization, Feature Selection

In [None]:
from sklearn.model_selection import cross_val_score

# Define the model
model = DecisionTreeClassifier()

# Perform cross-validation
scores = cross_val_score(model, X_train, y_train, cv=5)  # cv specifies the number of folds

# Print the cross-validation scores
print("Cross-validation scores:", scores)
print("Mean cross-validation score:", np.mean(scores))


In [None]:
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

# Define base classifiers
base_classifier = DecisionTreeClassifier()

# Bagging
bagging_model = BaggingClassifier(base_estimator=base_classifier, n_estimators=10)
bagging_model.fit(X_train, y_train)

# Boosting (AdaBoost)
adaboost_model = AdaBoostClassifier(base_estimator=base_classifier, n_estimators=50, learning_rate=1.0)
adaboost_model.fit(X_train, y_train)

# Stacking
stacking_model = StackingClassifier(estimators=[('bagging', bagging_model), ('adaboost', adaboost_model)],
                                    final_estimator=LogisticRegression())
stacking_model.fit(X_train, y_train)


In [None]:
from sklearn.linear_model import Ridge, Lasso, ElasticNet

# Ridge Regression
ridge_model = Ridge(alpha=0.5)  # Alpha is the regularization strength
ridge_model.fit(X_train, y_train)

# Lasso Regression
lasso_model = Lasso(alpha=0.1)  # Alpha is the regularization strength
lasso_model.fit(X_train, y_train)

# ElasticNet Regression
elasticnet_model = ElasticNet(alpha=0.1, l1_ratio=0.5)  # Alpha is the regularization strength, l1_ratio controls the balance between L1 and L2 penalties
elasticnet_model.fit(X_train, y_train)


In [None]:
from sklearn.feature_selection import SelectKBest, chi2, f_classif, RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# SelectKBest with chi-square test for classification
selector = SelectKBest(score_func=chi2, k=5)  # Select top 5 features
X_selected = selector.fit_transform(X_train, y_train)

# Print selected features
selected_features = np.array(X.columns)[selector.get_support()]
print("Selected features using SelectKBest:", selected_features)

# RFE with RandomForestClassifier for feature selection
estimator = RandomForestClassifier(n_estimators=10)
selector_rfe = RFE(estimator, n_features_to_select=5, step=1)  # Select top 5 features
selector_rfe.fit(X_train, y_train)

# Print selected features
selected_features_rfe = np.array(X.columns)[selector_rfe.support_]
print("Selected features using RFE:", selected_features_rfe)

# Example usage with Logistic Regression after feature selection
logistic_regression = LogisticRegression()
logistic_regression.fit(X_selected, y_train)


## Feature Importance

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Get feature importances
feature_importances = clf.feature_importances_

# Print feature importances
for i, importance in enumerate(feature_importances):
    print("Feature {}: {}".format(i+1, importance))


In [None]:
from sklearn.linear_model import LogisticRegression

# Train a logistic regression model
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Get feature coefficients
feature_coefficients = clf.coef_[0]

# Print feature coefficients
for i, coefficient in enumerate(feature_coefficients):
    print("Feature {}: {}".format(i+1, coefficient))


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Train a Gradient Boosting classifier
clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)

# Get feature importances
feature_importances = clf.feature_importances_

# Print feature importances
for i, importance in enumerate(feature_importances):
    print("Feature {}: {}".format(i+1, importance))


In [None]:
# Permutation importance
from sklearn.inspection import permutation_importance

# Train an SVM classifier
clf = SVC()
clf.fit(X_train, y_train)

# Calculate permutation importance
perm_importance = permutation_importance(clf, X_test, y_test)

# Print permutation importance
for i, importance in enumerate(perm_importance.importances_mean):
    print("Feature {}: {}".format(i+1, importance))
