In [9]:
import xgboost as xgb
import shap
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error as rmse
from shap_analysis import SHAPAnalyzer, function_map
import optuna

In [2]:
df = pd.read_csv("thesis_cleaned_transformed_histone_dataset_categorical(H3K23me1).tsv", sep="\t")

X = df.drop(columns = ['Gene Expression (FPKM)_log'])
y = df['Gene Expression (FPKM)_log']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
def evaluate_model(X_train, X_test, y_train, y_test, params=None, optuna_study=None):
    """
    Evaluate the model either using the provided parameters or the Optuna study's best trial.
    
    Parameters:
    - X_train (pd.DataFrame): Training feature data.
    - X_test (pd.DataFrame): Test feature data.
    - y_train (pd.Series): Training target data.
    - y_test (pd.Series): Test target data.
    - params (dict): Parameters for the XGBoost model. If None, use `optuna_study`.
    - optuna_study (optuna.Study): Optuna study object containing the best trial.
    
    Returns:
    - model: Trained XGBoost model.
    - total_score (float): The evaluation score based on SHAP analysis.
    - result_summary (dict): Summary of matches and mismatches.
    - results_df (pd.DataFrame): Detailed SHAP results for features.
    """
    # Determine parameters: Use the best trial from Optuna if no parameters are provided
    if optuna_study is not None and params is None:
        params = optuna_study.best_trial.params
        params.update({
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'tree_method': 'hist',
            'device': 'cuda'
        })

    # Prepare the data
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

    # Train the model
    model = xgb.train(
        params, 
        dtrain, 
        num_boost_round=100, 
        evals=[(dtest, 'validation')],
        early_stopping_rounds=10, 
        verbose_eval=False
    )

    # SHAP analysis
    background_indices = np.random.choice(X_train.shape[0], size=200, replace=False)
    background_sample = X_train.iloc[background_indices]

    sample_indices = np.random.choice(X_test.shape[0], size=200, replace=False)
    sample = X_test.iloc[sample_indices]

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test)

    # Use the SHAPAnalyzer to calculate metrics
    analyzer = SHAPAnalyzer(X_test, shap_values, function_map)
    analyzer.calculate_high_value_shap_means()

    results_df, total_score, result_summary = analyzer.get_results()

    print(f"Results Summary: {result_summary}")

    return model, total_score, result_summary, results_df


In [4]:
def feature_selection(X_train, X_test, y_train, y_test):
    remaining_features = X_train.columns.tolist()
    selected_features = []

    while remaining_features:
        print(f"Remaining Features: {remaining_features}")
        reduced_X_train = X_train[remaining_features]
        reduced_X_test = X_test[remaining_features]

        model, total_score, result_summary, results_df = evaluate_model(
            reduced_X_train, reduced_X_test, y_train, y_test
            )

        mismatch_features = results_df[results_df["Result"] == "Mismatch"]
        
        if mismatch_features.empty:
            selected_features = remaining_features
            break

        mismatch_to_remove = mismatch_features.sort_values(
            by = "Mean SHAP Value (High)", ascending = False
        ).iloc[0]['Features']

        print(f"Removing mismatch feature: {mismatch_to_remove}")

        remaining_features.remove(mismatch_to_remove)

    return selected_features

In [6]:
def feature_selection_with_optuna(X_train, X_test, y_train, y_test, function_map):
    remaining_features = X_train.columns.tolist()
    selected_features = []

    def optuna_objective(trial, X_train, X_test, y_train, y_test):
        params = {
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'tree_method': 'hist',
            'device': 'cuda',
            'learning_rate': trial.suggest_float("learning_rate", 0.001, 0.05),
            'max_depth': trial.suggest_int("max_depth", 10, 30),
            'subsample': trial.suggest_float("subsample", 0.4, 1.0),
            'gamma': trial.suggest_float("gamma", 0.0, 1.0),
            'colsample_bytree': trial.suggest_float("colsample_bytree", 0.5, 1.0),
            'lambda': trial.suggest_float("lambda", 1e-3, 15),
            'alpha': trial.suggest_float("alpha", 1e-3, 15),
    }

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dvalid = xgb.DMatrix(X_test, label=y_test)

        model = xgb.train(params, dtrain, num_boost_round=100,
                          evals=[(dvalid, 'validation')],
                          early_stopping_rounds=10,
                          verbose_eval=False)

        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X_test)

        analyzer = SHAPAnalyzer(X_test, shap_values, function_map)
        analyzer.calculate_high_value_shap_means()

        results_df, total_score, result_summary = analyzer.get_results()
        total_mismatches = result_summary.get("Mismatch", 0)
        total_matches = result_summary.get("Match", 0)
        accuracy = total_matches / (total_matches + total_mismatches) if (total_matches + total_mismatches) > 0 else 0

        preds = model.predict(dvalid)
        rmse_score = rmse(y_test, preds)

        # Define the objective score
        alpha = 1.0  # Weight for accuracy
        beta = 0.1   # Weight for rmse_score to penalize
        objective_score = alpha * accuracy - beta * rmse_score
        
        trial.set_user_attr("result_summary", result_summary)
        trial.set_user_attr("results_df", results_df)
        
        return objective_score

    while remaining_features:
        print(f"Remaining Features: {remaining_features}")
        
        # Run Optuna for the current subset of features
        study = optuna.create_study(direction='maximize')
        study.optimize(lambda trial: optuna_objective(trial, 
                                                      X_train[remaining_features], 
                                                      X_test[remaining_features], 
                                                      y_train, y_test), 
                       n_trials=40)
        
        best_params = study.best_params
        print(f"Best Parameters: {best_params}")

        # Evaluate model with the current subset
        reduced_X_train = X_train[remaining_features]
        reduced_X_test = X_test[remaining_features]

        model, total_score, result_summary, results_df = evaluate_model(
            reduced_X_train, reduced_X_test, y_train, y_test, optuna_study=study
)

        mismatch_features = results_df[results_df["Result"] == "Mismatch"]

        if mismatch_features.empty:
            selected_features = remaining_features
            break

        # Sort mismatching features and remove the highest impact one
        mismatch_to_remove = mismatch_features.sort_values(
            by="Mean SHAP Value (High)", ascending=False
        ).iloc[0]["Features"]

        print(f"Removing mismatch feature: {mismatch_to_remove}")
        remaining_features.remove(mismatch_to_remove)

    return selected_features, study


In [11]:
optuna.logging.set_verbosity(optuna.logging.WARNING)

import warnings
warnings.filterwarnings("ignore", message="Saving into deprecated binary model format")


total_matches = 0
total_mismatches = 0

features = function_map["Features"]
known_functions = function_map["Known Function"]

feature_scores = {}

for i, feature_to_leave_out in enumerate(features):
    print(f"\n--- Performing LOO for feature {feature_to_leave_out} ---\n")

    reduced_X_train = X_train.drop(columns = [feature_to_leave_out])
    reduced_X_test = X_test.drop(columns = [feature_to_leave_out])

    selected_features, study = feature_selection_with_optuna(reduced_X_train, reduced_X_test, y_train, y_test, function_map)

    final_X_train = X_train[selected_features + [feature_to_leave_out]]
    final_X_test = X_test[selected_features + [feature_to_leave_out]]

    print(f"\n--- Evaluating model with feature {feature_to_leave_out} added back ---\n")

    model, total_score, result_summary, results_df = evaluate_model(
        final_X_train, final_X_test, y_train, y_test, optuna_study=study
        )

    known_function = known_functions[i]
    feature_result = results_df[results_df["Features"] == feature_to_leave_out]

    if not feature_result.empty:
        match_result = feature_result["Result"].values[0]
        print(f"Feature: {feature_to_leave_out}, Known Function: {known_function}, Result: {match_result}")

        if match_result == "Match":
            total_matches += 1
        else:
            total_mismatches += 1

        feature_scores[feature_to_leave_out] = {
            "Total Score": total_score,
            "Known Function": known_function,
            "Result": match_result
        }

total_features = total_matches + total_mismatches
accuracy = total_matches / total_features if total_features > 0 else 0

print("\n--- Feature Analysis Results ---")

for feature, score in feature_scores.items():
    print(f"Feature: {feature}, Total Score: {score['Total Score']}, Known Function: {score['Known Function']}, Result: {score['Result']}")

print("\n--- Overall Accuracy ---")
print(f"Total Matches: {total_matches}")
print(f"Total Mismatches: {total_mismatches}")
print(f"Accuracy: {accuracy:.2f}")


--- Performing LOO for feature H3K4me1 ---

Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.012236880496338871, 'max_depth': 28, 'subsample': 0.4745770184811099, 'gamma': 0.29292571020236186, 'colsample_bytree': 0.9377761977062267, 'lambda': 6.4842569520924815, 'alpha': 13.953882512056131}




Results Summary: {'Match': 14, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02378682645167061, 'max_depth': 15, 'subsample': 0.8991973845415585, 'gamma': 0.4956642003559679, 'colsample_bytree': 0.6977598642682462, 'lambda': 4.3821912400049845, 'alpha': 8.300556448230695}




Results Summary: {'Match': 14, 'No Evaluation': 1}

--- Evaluating model with feature H3K4me1 added back ---





Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3K4me1, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K9me2_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log']




Best Parameters: {'learning_rate': 0.03506808561131743, 'max_depth': 16, 'subsample': 0.6498636467607665, 'gamma': 0.8856085290139792, 'colsample_bytree': 0.5516198700160942, 'lambda': 8.173211007257514, 'alpha': 9.921816287807793}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log']




Best Parameters: {'learning_rate': 0.028826189076504145, 'max_depth': 15, 'subsample': 0.8004197006566673, 'gamma': 0.06371364015713464, 'colsample_bytree': 0.7798558178415739, 'lambda': 6.864495372364853, 'alpha': 12.825748720675508}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log']




Best Parameters: {'learning_rate': 0.03407728395635277, 'max_depth': 25, 'subsample': 0.5520255680435977, 'gamma': 0.18832274197567922, 'colsample_bytree': 0.5783125834217426, 'lambda': 2.9475570698953373, 'alpha': 1.465327088524008}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9me2_log added back ---





Results Summary: {'Match': 11, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H3K9me2_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K4me3 ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03319774133418583, 'max_depth': 22, 'subsample': 0.45530688421979926, 'gamma': 0.8924647781684371, 'colsample_bytree': 0.6100116220736771, 'lambda': 1.4878834264859935, 'alpha': 11.297285095391624}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.035513740517169316, 'max_depth': 21, 'subsample': 0.6687029376171939, 'gamma': 0.09047544937508789, 'colsample_bytree': 0.6504656636936443, 'lambda': 9.722422342765297, 'alpha': 3.8114938705679062}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.026421082358822567, 'max_depth': 25, 'subsample': 0.8137525649631564, 'gamma': 0.06469893622021176, 'colsample_bytree': 0.6668301925266502, 'lambda': 1.3993089756803478, 'alpha': 1.5642770929120475}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K4me3 added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H3K4me3, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K36me3 ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03909859317995872, 'max_depth': 25, 'subsample': 0.8120930926480786, 'gamma': 0.7972314199726304, 'colsample_bytree': 0.6413879047317694, 'lambda': 14.903957152200991, 'alpha': 4.348552116886552}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.025077276989491255, 'max_depth': 30, 'subsample': 0.9397802550145807, 'gamma': 0.3159765483958795, 'colsample_bytree': 0.6013363000619116, 'lambda': 6.011703985673551, 'alpha': 9.285325108291739}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.017963460182627667, 'max_depth': 21, 'subsample': 0.40064740631003654, 'gamma': 0.33652056983432493, 'colsample_bytree': 0.7065351549296386, 'lambda': 10.692091115522373, 'alpha': 7.844295295534352}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K36me3 added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K36me3, Known Function: Activating, Result: Match

--- Performing LOO for feature H4K5Ac ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03833182141987084, 'max_depth': 24, 'subsample': 0.5307431000221692, 'gamma': 0.8080943165548986, 'colsample_bytree': 0.6627076018711607, 'lambda': 8.04682125822817, 'alpha': 1.5422448256897452}




Results Summary: {'Match': 14, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.023362153187267608, 'max_depth': 19, 'subsample': 0.8901826120882665, 'gamma': 0.07348361013691362, 'colsample_bytree': 0.733646955077894, 'lambda': 4.702859702074418, 'alpha': 9.867953009605387}




Results Summary: {'Match': 14, 'No Evaluation': 1}

--- Evaluating model with feature H4K5Ac added back ---





Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H4K5Ac, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K27me3_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02892580730578519, 'max_depth': 26, 'subsample': 0.948223320794361, 'gamma': 0.09630993470483151, 'colsample_bytree': 0.744815244730518, 'lambda': 6.35014806156165, 'alpha': 8.148416197333045}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.027380111612647497, 'max_depth': 26, 'subsample': 0.4151017502275016, 'gamma': 0.10125921442978898, 'colsample_bytree': 0.6607200309243811, 'lambda': 12.304839132891793, 'alpha': 3.4406436054744534}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.021552300435809066, 'max_depth': 25, 'subsample': 0.7583003568159378, 'gamma': 0.22094252724341534, 'colsample_bytree': 0.7527423922495206, 'lambda': 11.271884614926863, 'alpha': 12.908499581394944}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K27me3_log added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H3K27me3_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K9Ac_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03820381115501854, 'max_depth': 19, 'subsample': 0.4997140168214311, 'gamma': 0.8803355910510404, 'colsample_bytree': 0.5363941320972655, 'lambda': 5.155815882051618, 'alpha': 14.371017421288151}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02512750153298072, 'max_depth': 16, 'subsample': 0.6010346744834041, 'gamma': 0.12918787025291245, 'colsample_bytree': 0.6659726372520971, 'lambda': 7.855236537083449, 'alpha': 3.3370685504201396}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.01565941732150602, 'max_depth': 24, 'subsample': 0.7295696205552885, 'gamma': 0.41751833737041577, 'colsample_bytree': 0.8423233840719611, 'lambda': 13.986716618124442, 'alpha': 14.269211095450725}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9Ac_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K9Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature cpg_percentage_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.022895632273654654, 'max_depth': 14, 'subsample': 0.4320101079520008, 'gamma': 0.4350124023237894, 'colsample_bytree': 0.7618628244713155, 'lambda': 6.365473621584548, 'alpha': 13.271408453951556}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.021241173552824774, 'max_depth': 16, 'subsample': 0.47110192351406754, 'gamma': 0.5567059000877569, 'colsample_bytree': 0.8045418479771533, 'lambda': 10.585189506044632, 'alpha': 12.758445552942318}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.025625058361390476, 'max_depth': 27, 'subsample': 0.485170288437871, 'gamma': 0.2018482695718919, 'colsample_bytree': 0.6899410370115047, 'lambda': 5.230377680250164, 'alpha': 9.033197921351805}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature cpg_percentage_log added back ---





Results Summary: {'Match': 12, 'Mismatch': 2, 'No Evaluation': 1}
Feature: cpg_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature chh_percentage_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.021670756240576638, 'max_depth': 17, 'subsample': 0.7571623111308438, 'gamma': 0.8645651530774588, 'colsample_bytree': 0.7391163564891179, 'lambda': 14.770451307247878, 'alpha': 9.484681352063305}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.04693632920828629, 'max_depth': 22, 'subsample': 0.9953604879266148, 'gamma': 0.19273499000679897, 'colsample_bytree': 0.5727166756291426, 'lambda': 11.596130276709172, 'alpha': 2.179985132509331}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.025302746430510198, 'max_depth': 23, 'subsample': 0.42065282303089135, 'gamma': 0.8120919140512447, 'colsample_bytree': 0.6213486468767021, 'lambda': 6.590291654299541, 'alpha': 11.67541536435165}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature chh_percentage_log added back ---





Results Summary: {'Match': 13, 'No Evaluation': 1, 'Mismatch': 1}
Feature: chh_percentage_log, Known Function: Repressive, Result: Mismatch

--- Performing LOO for feature H4K20me1_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.017791395333486913, 'max_depth': 30, 'subsample': 0.41193825743699153, 'gamma': 0.0032658299332038454, 'colsample_bytree': 0.9043233440434528, 'lambda': 14.836493164256936, 'alpha': 8.922556973436357}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02521192179061783, 'max_depth': 29, 'subsample': 0.7812640644284516, 'gamma': 0.557075712517553, 'colsample_bytree': 0.6920942827580203, 'lambda': 6.926462998516765, 'alpha': 14.912261422326296}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.020422594946501886, 'max_depth': 25, 'subsample': 0.860744863837128, 'gamma': 0.4839890615636381, 'colsample_bytree': 0.8831067329287758, 'lambda': 11.604186905962827, 'alpha': 11.097587864332137}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H4K20me1_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H4K20me1_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature chg_percentage_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.026467596790864193, 'max_depth': 12, 'subsample': 0.5595996546251156, 'gamma': 0.6676514271643041, 'colsample_bytree': 0.5416002981286856, 'lambda': 8.359356120439385, 'alpha': 14.531717507005515}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02695857775062374, 'max_depth': 18, 'subsample': 0.7792430701549422, 'gamma': 0.9678417794479801, 'colsample_bytree': 0.6103295066148161, 'lambda': 5.7054891636700145, 'alpha': 8.899923630079032}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.020098820660664533, 'max_depth': 25, 'subsample': 0.6184120609676036, 'gamma': 0.553743890027711, 'colsample_bytree': 0.6876013419899516, 'lambda': 5.910807748681368, 'alpha': 9.306192157429383}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature chg_percentage_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: chg_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H2A.W.7_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03446282198453966, 'max_depth': 24, 'subsample': 0.5767262079007651, 'gamma': 0.5378776108825678, 'colsample_bytree': 0.72437406440675, 'lambda': 7.795029544093098, 'alpha': 11.7204139505197}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.04447096626933065, 'max_depth': 24, 'subsample': 0.6928953674216588, 'gamma': 0.13507997605102556, 'colsample_bytree': 0.73000589165361, 'lambda': 10.979079284565902, 'alpha': 8.870099382391945}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.018794924211617797, 'max_depth': 30, 'subsample': 0.860457145028726, 'gamma': 0.5214717292483121, 'colsample_bytree': 0.8446838073018941, 'lambda': 11.492512521935442, 'alpha': 5.592959282779213}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H2A.W.7_log added back ---





Results Summary: {'Match': 13, 'No Evaluation': 1, 'Mismatch': 1}
Feature: H2A.W.7_log, Known Function: Repressive, Result: Mismatch

--- Performing LOO for feature H3K9me1_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.023823922000247362, 'max_depth': 10, 'subsample': 0.5518313251598889, 'gamma': 0.1275241094502939, 'colsample_bytree': 0.6649378688312451, 'lambda': 9.446818456431526, 'alpha': 3.215845030523759}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.038187044301904845, 'max_depth': 25, 'subsample': 0.4441228171775978, 'gamma': 0.12214272099789968, 'colsample_bytree': 0.5807306109597331, 'lambda': 10.654491460068755, 'alpha': 8.484512853954914}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.031515986263533746, 'max_depth': 12, 'subsample': 0.5549816350961032, 'gamma': 0.9977734522367939, 'colsample_bytree': 0.5763109099431711, 'lambda': 10.374016867325142, 'alpha': 0.006927369146859482}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9me1_log added back ---





Results Summary: {'Match': 11, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H3K9me1_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H2A.W.6_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03498043025700154, 'max_depth': 21, 'subsample': 0.7101102169410771, 'gamma': 0.5744830420111485, 'colsample_bytree': 0.6573838350311343, 'lambda': 11.181717131461852, 'alpha': 8.535165754711738}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.023663689676673077, 'max_depth': 27, 'subsample': 0.6594577562326289, 'gamma': 0.18241089398533633, 'colsample_bytree': 0.8518940954610197, 'lambda': 8.19303548748091, 'alpha': 14.90529202811986}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.031271423474450895, 'max_depth': 25, 'subsample': 0.5215754282242485, 'gamma': 0.5363418516206945, 'colsample_bytree': 0.7021961634413949, 'lambda': 0.03253583896495371, 'alpha': 8.866176435122803}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H2A.W.6_log added back ---





Results Summary: {'Match': 11, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H2A.W.6_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3Ac_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.032392415573410036, 'max_depth': 23, 'subsample': 0.5867866018887475, 'gamma': 0.03928518428446763, 'colsample_bytree': 0.5017811853717092, 'lambda': 6.83644877157766, 'alpha': 11.695836715367829}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02793812517467547, 'max_depth': 18, 'subsample': 0.46007491536435163, 'gamma': 0.4685406379347352, 'colsample_bytree': 0.5721076577543084, 'lambda': 11.509564312426376, 'alpha': 8.863922044745104}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.01872008859748947, 'max_depth': 16, 'subsample': 0.43726738652430247, 'gamma': 0.6203867433275583, 'colsample_bytree': 0.8096403117526134, 'lambda': 9.836705754600214, 'alpha': 14.207054189991684}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3Ac_log added back ---





Results Summary: {'Match': 12, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3Ac_log, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K9K14Ac_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03580752262227231, 'max_depth': 14, 'subsample': 0.511439549112598, 'gamma': 0.47066491907611163, 'colsample_bytree': 0.7457710132762743, 'lambda': 5.60730614085211, 'alpha': 14.952767232444216}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.034112434608522715, 'max_depth': 26, 'subsample': 0.43921921272114633, 'gamma': 0.11424931144358115, 'colsample_bytree': 0.8006946126694321, 'lambda': 4.996985827832639, 'alpha': 6.790347691426793}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.022378018350126656, 'max_depth': 20, 'subsample': 0.40874511914821254, 'gamma': 0.9022330738219806, 'colsample_bytree': 0.9939588562629744, 'lambda': 12.598259724278067, 'alpha': 14.88285011245708}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9K14Ac_log added back ---





Results Summary: {'Match': 12, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3K9K14Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K23me1_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.027956044721640937, 'max_depth': 22, 'subsample': 0.4732944998132392, 'gamma': 0.7616674917870077, 'colsample_bytree': 0.5940524543075539, 'lambda': 3.7962635940176606, 'alpha': 7.599770101029521}




Results Summary: {'Match': 14, 'Mismatch': 2}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.030712552403156082, 'max_depth': 13, 'subsample': 0.7449144560727653, 'gamma': 0.4133437191434687, 'colsample_bytree': 0.5667151731230875, 'lambda': 5.165869593272847, 'alpha': 11.118268449222144}




Results Summary: {'Match': 14, 'Mismatch': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.018064971807926218, 'max_depth': 20, 'subsample': 0.5534823057473445, 'gamma': 0.9191159761298259, 'colsample_bytree': 0.6750563081482891, 'lambda': 6.403741492196459, 'alpha': 8.107782194086186}




Results Summary: {'Match': 14}

--- Evaluating model with feature H3K23me1_log added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H3K23me1_log, Known Function: Unknown, Result: No Evaluation

--- Feature Analysis Results ---
Feature: H3K4me1, Total Score: 1.3042715121991932, Known Function: Activating, Result: Mismatch
Feature: H3K9me2_log, Total Score: 1.8885597929474898, Known Function: Repressive, Result: Match
Feature: H3K4me3, Total Score: 1.8975745285861194, Known Function: Activating, Result: Match
Feature: H3K36me3, Total Score: 1.6590399200795218, Known Function: Activating, Result: Match
Feature: H4K5Ac, Total Score: 1.583871609531343, Known Function: Activating, Result: Mismatch
Feature: H3K27me3_log, Total Score: 1.6270086739677936, Known Function: Repressive, Result: Match
Feature: H3K9Ac_log, Total Score: 1.4494410338811576, Known Function: Activating, Result: Match
Feature: cpg_percentage_log, Total Score: 1.7572835902683437, Known Function: Repressive, Result: Match
Feature: chh_percentage_log, Total Score: 1.74529638929379