In [1]:
import xgboost as xgb
import shap
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from shap_analysis import SHAPAnalyzer, function_map
import optuna

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [None]:
df = pd.read_csv("thesis_cleaned_transformed_histone_dataset_categorical(H3K23me1).tsv", sep="\t")

X = df.drop(columns = ['Gene Expression (FPKM)_log'])
y = df['Gene Expression (FPKM)_log']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def evaluate_model(X_train, X_test, y_train, y_test, params=None, optuna_study=None):
    """
    Evaluate the model either using the provided parameters or the Optuna study's best trial.
    
    Parameters:
    - X_train (pd.DataFrame): Training feature data.
    - X_test (pd.DataFrame): Test feature data.
    - y_train (pd.Series): Training target data.
    - y_test (pd.Series): Test target data.
    - params (dict): Parameters for the XGBoost model. If None, use `optuna_study`.
    - optuna_study (optuna.Study): Optuna study object containing the best trial.
    
    Returns:
    - model: Trained XGBoost model.
    - total_score (float): The evaluation score based on SHAP analysis.
    - result_summary (dict): Summary of matches and mismatches.
    - results_df (pd.DataFrame): Detailed SHAP results for features.
    """
    # Determine parameters: Use the best trial from Optuna if no parameters are provided
    if optuna_study is not None and params is None:
        params = optuna_study.best_trial.params
        params.update({
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'tree_method': 'hist',
            'device': 'cuda'
        })

    # Prepare the data
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

    # Train the model
    model = xgb.train(
        params, 
        dtrain, 
        num_boost_round=100, 
        evals=[(dtest, 'validation')],
        early_stopping_rounds=10, 
        verbose_eval=False
    )

    # SHAP analysis
    background_indices = np.random.choice(X_train.shape[0], size=200, replace=False)
    background_sample = X_train.iloc[background_indices]

    sample_indices = np.random.choice(X_test.shape[0], size=200, replace=False)
    sample = X_test.iloc[sample_indices]

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test)

    # Use the SHAPAnalyzer to calculate metrics
    analyzer = SHAPAnalyzer(X_test, shap_values, function_map)
    analyzer.calculate_high_value_shap_means()

    results_df, total_score, result_summary = analyzer.get_results()

    print(f"Results Summary: {result_summary}")

    return model, total_score, result_summary, results_df


In [5]:
model, total_score, result_summary, resdults_df = evaluate_model(X_train, X_test, y_train, y_test)
print(total_score)
print(result_summary)



Results Summary: {'Match': 9, 'Mismatch': 7, 'No Evaluation': 1}
0.6137400286097545
{'Match': 9, 'Mismatch': 7, 'No Evaluation': 1}


In [6]:
def feature_selection(X_train, X_test, y_train, y_test):
    remaining_features = X_train.columns.tolist()
    selected_features = []

    while remaining_features:
        print(f"Remaining Features: {remaining_features}")
        reduced_X_train = X_train[remaining_features]
        reduced_X_test = X_test[remaining_features]

        model, total_score, result_summary, results_df = evaluate_model(
            reduced_X_train, reduced_X_test, y_train, y_test
            )

        mismatch_features = results_df[results_df["Result"] == "Mismatch"]
        
        if mismatch_features.empty:
            selected_features = remaining_features
            break

        mismatch_to_remove = mismatch_features.sort_values(
            by = "Mean SHAP Value (High)", ascending = False
        ).iloc[0]['Features']

        print(f"Removing mismatch feature: {mismatch_to_remove}")

        remaining_features.remove(mismatch_to_remove)

    return selected_features

In [7]:
def feature_selection_with_optuna(X_train, X_test, y_train, y_test, function_map):
    remaining_features = X_train.columns.tolist()
    selected_features = []

    def optuna_objective(trial, X_train, X_test, y_train, y_test):
        params = {
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'tree_method': 'hist',
            'device': 'cuda',
            'learning_rate': trial.suggest_float("learning_rate", 0.001, 0.05),
            'max_depth': trial.suggest_int("max_depth", 10, 30),
            'subsample': trial.suggest_float("subsample", 0.4, 1.0),
            'gamma': trial.suggest_float("gamma", 0.0, 1.0),
            'colsample_bytree': trial.suggest_float("colsample_bytree", 0.5, 1.0),
            'lambda': trial.suggest_float("lambda", 1e-3, 15),
            'alpha': trial.suggest_float("alpha", 1e-3, 15),
    }

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dvalid = xgb.DMatrix(X_test, label=y_test)

        model = xgb.train(params, dtrain, num_boost_round=100,
                          evals=[(dvalid, 'validation')],
                          early_stopping_rounds=10,
                          verbose_eval=False)

        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X_test)

        analyzer = SHAPAnalyzer(X_test, shap_values, function_map)
        analyzer.calculate_high_value_shap_means()

        results_df, total_score, result_summary = analyzer.get_results()
        total_mismatches = result_summary.get("Mismatch", 0)
        total_matches = result_summary.get("Match", 0)
        accuracy = total_matches / (total_matches + total_mismatches) if (total_matches + total_mismatches) > 0 else 0

        # Define the objective score
        alpha = 1.0  # Weight for accuracy
        beta = 1.0   # Weight for mismatch count
        objective_score = alpha * accuracy
        
        trial.set_user_attr("result_summary", result_summary)
        trial.set_user_attr("results_df", results_df)
        
        return objective_score

    while remaining_features:
        print(f"Remaining Features: {remaining_features}")
        
        # Run Optuna for the current subset of features
        study = optuna.create_study(direction='maximize')
        study.optimize(lambda trial: optuna_objective(trial, 
                                                      X_train[remaining_features], 
                                                      X_test[remaining_features], 
                                                      y_train, y_test), 
                       n_trials=40)
        
        best_params = study.best_params
        print(f"Best Parameters: {best_params}")

        # Evaluate model with the current subset
        reduced_X_train = X_train[remaining_features]
        reduced_X_test = X_test[remaining_features]

        model, total_score, result_summary, results_df = evaluate_model(
            reduced_X_train, reduced_X_test, y_train, y_test, optuna_study=study
)

        mismatch_features = results_df[results_df["Result"] == "Mismatch"]

        if mismatch_features.empty:
            selected_features = remaining_features
            break

        # Sort mismatching features and remove the highest impact one
        mismatch_to_remove = mismatch_features.sort_values(
            by="Mean SHAP Value (High)", ascending=False
        ).iloc[0]["Features"]

        print(f"Removing mismatch feature: {mismatch_to_remove}")
        remaining_features.remove(mismatch_to_remove)

    return selected_features, study


In [8]:
def feature_selection_remove_all_mismatches(X_train, X_test, y_train, y_test):
    remaining_features = X_train.columns.tolist()
    selected_features = []

    while True:
        print(f"Remaining features: {remaining_features}")
        
        # Reduce the dataset to remaining features
        reduced_X_train = X_train[remaining_features]
        reduced_X_test = X_test[remaining_features]

        # Train the model and perform SHAP analysis
        model, total_score, result_summary, results_df = evaluate_model(
            reduced_X_train, reduced_X_test, y_train, y_test
        )

        # Identify mismatched features
        mismatch_features = results_df[results_df["Result"] == "Mismatch"]["Features"].tolist()
        
        if not mismatch_features:  # No mismatches left
            selected_features = remaining_features
            break

        print(f"Removing mismatched features: {mismatch_features}")

        # Remove all mismatched features
        remaining_features = [feature for feature in remaining_features if feature not in mismatch_features]

    return selected_features


In [9]:
optuna.logging.set_verbosity(optuna.logging.WARNING)

total_matches = 0
total_mismatches = 0

features = function_map["Features"]
known_functions = function_map["Known Function"]

feature_scores = {}

for i, feature_to_leave_out in enumerate(features):
    print(f"\n--- Performing LOO for feature {feature_to_leave_out} ---\n")

    reduced_X_train = X_train.drop(columns = [feature_to_leave_out])
    reduced_X_test = X_test.drop(columns = [feature_to_leave_out])

    selected_features, study = feature_selection_with_optuna(reduced_X_train, reduced_X_test, y_train, y_test, function_map)

    final_X_train = X_train[selected_features + [feature_to_leave_out]]
    final_X_test = X_test[selected_features + [feature_to_leave_out]]

    print(f"\n--- Evaluating model with feature {feature_to_leave_out} added back ---\n")

    model, total_score, result_summary, results_df = evaluate_model(
        final_X_train, final_X_test, y_train, y_test, optuna_study=study
        )

    known_function = known_functions[i]
    feature_result = results_df[results_df["Features"] == feature_to_leave_out]

    if not feature_result.empty:
        match_result = feature_result["Result"].values[0]
        print(f"Feature: {feature_to_leave_out}, Known Function: {known_function}, Result: {match_result}")

        if match_result == "Match":
            total_matches += 1
        else:
            total_mismatches += 1

        feature_scores[feature_to_leave_out] = {
            "Total Score": total_score,
            "Known Function": known_function,
            "Result": match_result
        }

total_features = total_matches + total_mismatches
accuracy = total_matches / total_features if total_features > 0 else 0

print("\n--- Feature Analysis Results ---")

for feature, score in feature_scores.items():
    print(f"Feature: {feature}, Total Score: {score['Total Score']}, Known Function: {score['Known Function']}, Result: {score['Result']}")

print("\n--- Overall Accuracy ---")
print(f"Total Matches: {total_matches}")
print(f"Total Mismatches: {total_mismatches}")
print(f"Accuracy: {accuracy:.2f}")


--- Performing LOO for feature H3K4me1 ---

Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.013193333635654558, 'max_depth': 28, 'subsample': 0.7866046408546474, 'gamma': 0.7244040171307976, 'colsample_bytree': 0.7094159048005868, 'lambda': 1.3408855949018241, 'alpha': 1.3259860683055658}




Results Summary: {'Match': 14, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.008190211321538209, 'max_depth': 29, 'subsample': 0.840345698687513, 'gamma': 0.15623992559093391, 'colsample_bytree': 0.665539612251044, 'lambda': 1.6408751908282269, 'alpha': 13.456841582647867}




Results Summary: {'Match': 14, 'No Evaluation': 1}

--- Evaluating model with feature H3K4me1 added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1, 'Mismatch': 1}
Feature: H3K4me1, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K9me2_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log']




Best Parameters: {'learning_rate': 0.02682473867457864, 'max_depth': 21, 'subsample': 0.6582896152785989, 'gamma': 0.9717778961802406, 'colsample_bytree': 0.5007748008945803, 'lambda': 13.458835682459116, 'alpha': 9.591647259300473}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log']




Best Parameters: {'learning_rate': 0.012362349140144346, 'max_depth': 16, 'subsample': 0.4478236413559799, 'gamma': 0.1919159213463929, 'colsample_bytree': 0.6550184891130018, 'lambda': 11.070675844601832, 'alpha': 5.442887257716517}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log']




Best Parameters: {'learning_rate': 0.0015415943092424883, 'max_depth': 29, 'subsample': 0.8025671589236549, 'gamma': 0.4076822656559983, 'colsample_bytree': 0.9370191898926122, 'lambda': 6.81597243471593, 'alpha': 0.9585939166326292}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9me2_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K9me2_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K4me3 ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.006104499008627262, 'max_depth': 22, 'subsample': 0.4662068370877305, 'gamma': 0.01532875568021519, 'colsample_bytree': 0.9648819074302355, 'lambda': 13.637161195914679, 'alpha': 1.200547508979462}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.01997228618324515, 'max_depth': 12, 'subsample': 0.9526202152583666, 'gamma': 0.22893848454104693, 'colsample_bytree': 0.6549783427423936, 'lambda': 9.956996783814878, 'alpha': 3.7149583681046505}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.010437216074846732, 'max_depth': 15, 'subsample': 0.6856688087994136, 'gamma': 0.22905893189509652, 'colsample_bytree': 0.7891584281415415, 'lambda': 5.857085373508898, 'alpha': 10.391414004860392}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K4me3 added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K4me3, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K36me3 ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02778164450143416, 'max_depth': 24, 'subsample': 0.7667060417614066, 'gamma': 0.8316890701657148, 'colsample_bytree': 0.6117696348167534, 'lambda': 11.220918451982827, 'alpha': 14.506157492463698}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.025757954789926985, 'max_depth': 17, 'subsample': 0.8355136108199366, 'gamma': 0.5296128290626767, 'colsample_bytree': 0.5753972158672191, 'lambda': 14.703947059022303, 'alpha': 0.05501394634267126}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.001450213817173085, 'max_depth': 25, 'subsample': 0.5503132887195465, 'gamma': 0.8470881766776036, 'colsample_bytree': 0.8277601624774655, 'lambda': 9.481753919818559, 'alpha': 12.395782152640088}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K36me3 added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H3K36me3, Known Function: Activating, Result: Match

--- Performing LOO for feature H4K5Ac ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.01977106486510879, 'max_depth': 15, 'subsample': 0.4370242912797001, 'gamma': 0.6062078176822686, 'colsample_bytree': 0.5708472599414179, 'lambda': 14.050984958534332, 'alpha': 9.642692785673354}




Results Summary: {'Match': 14, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.009266792542540623, 'max_depth': 30, 'subsample': 0.9575803937208366, 'gamma': 0.028274522071819574, 'colsample_bytree': 0.9118303487756306, 'lambda': 7.606599213855828, 'alpha': 10.773649640536242}




Results Summary: {'Match': 14, 'No Evaluation': 1}

--- Evaluating model with feature H4K5Ac added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1, 'Mismatch': 1}
Feature: H4K5Ac, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K27me3_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.0350197939268146, 'max_depth': 29, 'subsample': 0.9755489077745545, 'gamma': 0.08158679465761054, 'colsample_bytree': 0.6090701291847292, 'lambda': 8.578718300626887, 'alpha': 0.8011705313650517}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.018485878409947788, 'max_depth': 27, 'subsample': 0.5087245048454486, 'gamma': 0.4742117497394641, 'colsample_bytree': 0.6497151613146562, 'lambda': 9.406994496800005, 'alpha': 8.70218183856258}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.0033165955684341573, 'max_depth': 28, 'subsample': 0.4672886913701919, 'gamma': 0.8305618036142056, 'colsample_bytree': 0.974071601013303, 'lambda': 10.869294660390967, 'alpha': 2.3677713989505182}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K27me3_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K27me3_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K9Ac_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.017374482288978085, 'max_depth': 13, 'subsample': 0.9325252044356351, 'gamma': 0.6788482690354948, 'colsample_bytree': 0.7844810057911664, 'lambda': 12.655618057236449, 'alpha': 12.651052293296278}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.00522280034995229, 'max_depth': 25, 'subsample': 0.5548290347709015, 'gamma': 0.6766283397867153, 'colsample_bytree': 0.8838213977356291, 'lambda': 8.974382344001034, 'alpha': 3.22223851633208}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.006968917981971534, 'max_depth': 24, 'subsample': 0.5441779504148119, 'gamma': 0.33493019943235536, 'colsample_bytree': 0.7384142046443463, 'lambda': 3.12041466456647, 'alpha': 13.177563490300326}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9Ac_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K9Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature cpg_percentage_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.014422298132851244, 'max_depth': 19, 'subsample': 0.45589969836293026, 'gamma': 0.005538175562315395, 'colsample_bytree': 0.5409650197658695, 'lambda': 5.488908453144522, 'alpha': 14.495673469086082}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.015832327831786532, 'max_depth': 24, 'subsample': 0.4952605743518418, 'gamma': 0.09094360705049154, 'colsample_bytree': 0.5223016687035149, 'lambda': 11.947939662345298, 'alpha': 6.492209673925233}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.00491473796550694, 'max_depth': 24, 'subsample': 0.4934443411054228, 'gamma': 0.6119493455620594, 'colsample_bytree': 0.7518648605047868, 'lambda': 11.931849653302057, 'alpha': 5.638212153419083}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature cpg_percentage_log added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: cpg_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature chh_percentage_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.004355134390883639, 'max_depth': 20, 'subsample': 0.9180599291128927, 'gamma': 0.6313365900402695, 'colsample_bytree': 0.8440734968983061, 'lambda': 10.413676535443328, 'alpha': 13.577787934773376}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.005730269330957153, 'max_depth': 23, 'subsample': 0.8835686388669746, 'gamma': 0.007664810911364395, 'colsample_bytree': 0.8852940724022929, 'lambda': 11.132447785662743, 'alpha': 3.0396886001119845}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.016532856707191183, 'max_depth': 18, 'subsample': 0.8775695555602059, 'gamma': 0.5875977485821051, 'colsample_bytree': 0.7288658957648213, 'lambda': 11.503521097330797, 'alpha': 12.305226196644291}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature chh_percentage_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: chh_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H4K20me1_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.008047743821586387, 'max_depth': 15, 'subsample': 0.8223367095842969, 'gamma': 0.10821218601110172, 'colsample_bytree': 0.6672314375051952, 'lambda': 14.074287150323354, 'alpha': 10.931270751760849}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.00921574632725251, 'max_depth': 19, 'subsample': 0.936040407135273, 'gamma': 0.05611768643128623, 'colsample_bytree': 0.9715445012720413, 'lambda': 10.869096919925473, 'alpha': 6.348770468541596}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.019703346548041782, 'max_depth': 26, 'subsample': 0.5092963482543972, 'gamma': 0.3767012530628192, 'colsample_bytree': 0.7826753311546508, 'lambda': 8.230624296939448, 'alpha': 0.45604913312070045}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H4K20me1_log added back ---





Results Summary: {'Match': 12, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H4K20me1_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature chg_percentage_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.004546913757620737, 'max_depth': 11, 'subsample': 0.8388716873496331, 'gamma': 0.8427821042555799, 'colsample_bytree': 0.7229252472274119, 'lambda': 0.34379395360504333, 'alpha': 0.2534799796173568}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02910773762039566, 'max_depth': 27, 'subsample': 0.867094146523595, 'gamma': 0.24168369385386057, 'colsample_bytree': 0.5353156325693936, 'lambda': 2.7801584498801017, 'alpha': 7.055141993181848}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.008620979795032435, 'max_depth': 25, 'subsample': 0.5387012690096679, 'gamma': 0.8354058808167735, 'colsample_bytree': 0.807090122404061, 'lambda': 3.8729692923356556, 'alpha': 9.507711702544011}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature chg_percentage_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: chg_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H2A.W.7_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.0036787951458960673, 'max_depth': 11, 'subsample': 0.7050854394993142, 'gamma': 0.9860138758984699, 'colsample_bytree': 0.905436360984037, 'lambda': 14.252254634087942, 'alpha': 4.55596727815868}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.03431508148964933, 'max_depth': 21, 'subsample': 0.6351600616331295, 'gamma': 0.6755553143940797, 'colsample_bytree': 0.5593098454317327, 'lambda': 5.726807382932456, 'alpha': 2.6319841629168077}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.0043622545189073905, 'max_depth': 21, 'subsample': 0.6043916457985751, 'gamma': 0.6177098447979533, 'colsample_bytree': 0.7673575486441035, 'lambda': 10.942323004530827, 'alpha': 10.356314632217215}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H2A.W.7_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H2A.W.7_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K9me1_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.01473517834093924, 'max_depth': 28, 'subsample': 0.6155476880341818, 'gamma': 0.48034649347806, 'colsample_bytree': 0.8502894894445949, 'lambda': 10.999420471902454, 'alpha': 13.951731071645561}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.015317026072386313, 'max_depth': 17, 'subsample': 0.70083331591885, 'gamma': 0.7380333945910359, 'colsample_bytree': 0.5896131387593007, 'lambda': 1.1227637951019829, 'alpha': 9.452431919446864}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.009605757064106862, 'max_depth': 26, 'subsample': 0.6483734336280316, 'gamma': 0.18928663453614913, 'colsample_bytree': 0.6320824706975081, 'lambda': 3.050953831574722, 'alpha': 8.643111641185321}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9me1_log added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H3K9me1_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H2A.W.6_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.02826167250233346, 'max_depth': 14, 'subsample': 0.43313605118627607, 'gamma': 0.9240049029250957, 'colsample_bytree': 0.5311485470256802, 'lambda': 4.57244413270468, 'alpha': 3.90289120935308}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.029715271626673312, 'max_depth': 15, 'subsample': 0.44937639617199954, 'gamma': 0.2526732324315857, 'colsample_bytree': 0.565511562698406, 'lambda': 9.602434018880869, 'alpha': 2.249989635480772}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.016578189555356858, 'max_depth': 16, 'subsample': 0.9978530462745743, 'gamma': 0.47153775653593066, 'colsample_bytree': 0.8009434242000023, 'lambda': 4.16501167919456, 'alpha': 6.099261451697363}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H2A.W.6_log added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H2A.W.6_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3Ac_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.013773495633806612, 'max_depth': 26, 'subsample': 0.6565473239441302, 'gamma': 0.20716111262745673, 'colsample_bytree': 0.7636184372049748, 'lambda': 9.795477463317999, 'alpha': 2.5382488473461335}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.006201459370940833, 'max_depth': 22, 'subsample': 0.6256054783822916, 'gamma': 0.5602398544564285, 'colsample_bytree': 0.8210293014615733, 'lambda': 5.7791436354000805, 'alpha': 9.566506412949723}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.009228058523705014, 'max_depth': 10, 'subsample': 0.7899990689405783, 'gamma': 0.04113887720686871, 'colsample_bytree': 0.7166121069766851, 'lambda': 11.920858014718773, 'alpha': 13.487573317151517}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3Ac_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K9K14Ac_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.012217771617209154, 'max_depth': 18, 'subsample': 0.5283251807912954, 'gamma': 0.7782741701508655, 'colsample_bytree': 0.9423016211536723, 'lambda': 10.337416973002377, 'alpha': 10.972394079909833}




Results Summary: {'Match': 13, 'Mismatch': 2, 'No Evaluation': 1}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.004405307179722134, 'max_depth': 26, 'subsample': 0.5144291521845419, 'gamma': 0.2691434574330549, 'colsample_bytree': 0.8979273539133238, 'lambda': 6.060433058942152, 'alpha': 2.6119156313112897}




Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K23me1_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.004201213199912471, 'max_depth': 10, 'subsample': 0.5998489237770401, 'gamma': 0.5944429090609664, 'colsample_bytree': 0.6491029456636089, 'lambda': 5.469888944838971, 'alpha': 7.360481500641143}




Results Summary: {'Match': 13, 'No Evaluation': 1}

--- Evaluating model with feature H3K9K14Ac_log added back ---





Results Summary: {'Match': 13, 'Mismatch': 1, 'No Evaluation': 1}
Feature: H3K9K14Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K23me1_log ---

Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H4K5Ac', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.0045119583248001815, 'max_depth': 21, 'subsample': 0.7950858013576159, 'gamma': 0.9060137727122294, 'colsample_bytree': 0.8853414153636321, 'lambda': 6.946860495074167, 'alpha': 10.104917263253482}




Results Summary: {'Match': 14, 'Mismatch': 2}
Removing mismatch feature: H4K5Ac
Remaining Features: ['H2A.2', 'H3K4me1', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.01897084965416945, 'max_depth': 12, 'subsample': 0.8500996598966771, 'gamma': 0.4236533300711526, 'colsample_bytree': 0.6636634209076087, 'lambda': 0.5815948237097431, 'alpha': 9.08582841511768}




Results Summary: {'Match': 14, 'Mismatch': 1}
Removing mismatch feature: H3K4me1
Remaining Features: ['H2A.2', 'H2A.13', 'H3K4me3', 'H3K36me3', 'H2A.Z.11', 'expression_category', 'H3K27me3_log', 'H3K9Ac_log', 'cpg_percentage_log', 'H3K9K14Ac_log', 'H3Ac_log', 'chh_percentage_log', 'H4K20me1_log', 'chg_percentage_log', 'H2A.W.7_log', 'H3K9me1_log', 'H2A.W.6_log', 'H2A.Z.9_log', 'H3K9me2_log']




Best Parameters: {'learning_rate': 0.004006929054713544, 'max_depth': 12, 'subsample': 0.9070544386431157, 'gamma': 0.284668975235714, 'colsample_bytree': 0.8425872249755906, 'lambda': 9.942318123335548, 'alpha': 4.5872018009898685}




Results Summary: {'Match': 14}

--- Evaluating model with feature H3K23me1_log added back ---





Results Summary: {'Match': 14, 'No Evaluation': 1}
Feature: H3K23me1_log, Known Function: Unknown, Result: No Evaluation

--- Feature Analysis Results ---
Feature: H3K4me1, Total Score: 0.9504962702048942, Known Function: Activating, Result: Mismatch
Feature: H3K9me2_log, Total Score: 0.25640461000148207, Known Function: Repressive, Result: Match
Feature: H3K4me3, Total Score: 1.2852750404272228, Known Function: Activating, Result: Match
Feature: H3K36me3, Total Score: 0.2637489909720898, Known Function: Activating, Result: Match
Feature: H4K5Ac, Total Score: 1.0305808932462242, Known Function: Activating, Result: Mismatch
Feature: H3K27me3_log, Total Score: 0.4979779444402084, Known Function: Repressive, Result: Match
Feature: H3K9Ac_log, Total Score: 0.9557464671379421, Known Function: Activating, Result: Match
Feature: cpg_percentage_log, Total Score: 0.7295486981747672, Known Function: Repressive, Result: Match
Feature: chh_percentage_log, Total Score: 1.5150523303309456, Known Fun

## Accuracy as a metric

In [10]:
#No Feature Selection
optuna.logging.set_verbosity(optuna.logging.INFO)

total_matches = 0
total_mismatches = 0

features = function_map["Features"]
known_functions = function_map["Known Function"]

feature_scores = {}

for i, feature_to_leave_out in enumerate(features):
    print(f"\n--- Performing LOO for feature {feature_to_leave_out} ---\n")

    #Features for optuna optimization
    reduced_X_train = X_train.drop(columns = [feature_to_leave_out])
    reduced_X_test = X_test.drop(columns = [feature_to_leave_out])

    #Optuna Optimization
    def optuna_objective(trial, reduced_X_train, reduced_X_test, y_train, y_test):
        params = {
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'tree_method': 'hist',
            'device': 'cuda',
            'learning_rate': trial.suggest_float("learning_rate", 0.001, 0.05),
            'max_depth': trial.suggest_int("max_depth", 10, 30),
            'subsample': trial.suggest_float("subsample", 0.4, 1.0),
            'gamma': trial.suggest_float("gamma", 0.0, 1.0),
            'colsample_bytree': trial.suggest_float("colsample_bytree", 0.5, 1.0),
            'lambda': trial.suggest_float("lambda", 1e-3, 15),
            'alpha': trial.suggest_float("alpha", 1e-3, 15),
            }

        dtrain = xgb.DMatrix(reduced_X_train, label=y_train)
        dvalid = xgb.DMatrix(reduced_X_test, label=y_test)

        model = xgb.train(params, dtrain, num_boost_round=100,
                          evals=[(dvalid, 'validation')],
                          early_stopping_rounds=10,
                          verbose_eval=False)

        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(reduced_X_test)

        analyzer = SHAPAnalyzer(reduced_X_test, shap_values, function_map)
        analyzer.calculate_high_value_shap_means()

        results_df, total_score, result_summary = analyzer.get_results()
        total_mismatches = result_summary.get("Mismatch", 0)
        total_matches = result_summary.get("Match", 0)
        accuracy = total_matches / (total_matches + total_mismatches) if (total_matches + total_mismatches) > 0 else 0

        # Define the objective score
        alpha = 1.0  # Weight for accuracy
        beta = 1.0   # Weight for mismatch count
        objective_score = alpha * accuracy
        
        trial.set_user_attr("result_summary", result_summary)
        trial.set_user_attr("results_df", results_df)
        
        return objective_score
    
    # Create a study object and optimize
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: optuna_objective(trial, 
                                                    reduced_X_train, 
                                                    reduced_X_test, 
                                                    y_train, y_test), 
                    n_trials=40)

    # final_X_train = X_train
    # final_X_test = X_test

    print(f"\n--- Evaluating model with feature {feature_to_leave_out} added back ---\n")

    model, total_score, result_summary, results_df = evaluate_model(
        X_train, X_test, y_train, y_test, optuna_study=study
        )

    known_function = known_functions[i]
    feature_result = results_df[results_df["Features"] == feature_to_leave_out]

    if not feature_result.empty:
        match_result = feature_result["Result"].values[0]
        print(f"Feature: {feature_to_leave_out}, Known Function: {known_function}, Result: {match_result}")

        if match_result == "Match":
            total_matches += 1
        else:
            total_mismatches += 1

        feature_scores[feature_to_leave_out] = {
            "Total Score": total_score,
            "Known Function": known_function,
            "Result": match_result
        }

total_features = total_matches + total_mismatches
accuracy = total_matches / total_features if total_features > 0 else 0

print("\n--- Feature Analysis Results ---")

for feature, score in feature_scores.items():
    print(f"Feature: {feature}, Total Score: {score['Total Score']}, Known Function: {score['Known Function']}, Result: {score['Result']}")

print("\n--- Overall Accuracy ---")
print(f"Total Matches: {total_matches}")
print(f"Total Mismatches: {total_mismatches}")
print(f"Accuracy: {accuracy:.2f}")

[I 2025-03-13 03:55:27,841] A new study created in memory with name: no-name-5f28b0df-9e3b-4281-b39a-ab20bd304afc



--- Performing LOO for feature H3K4me1 ---



[I 2025-03-13 03:55:32,227] Trial 0 finished with value: 0.8 and parameters: {'learning_rate': 0.0176062152167535, 'max_depth': 13, 'subsample': 0.6238892554975046, 'gamma': 0.12369703872956894, 'colsample_bytree': 0.9974056698378566, 'lambda': 4.4712578598390245, 'alpha': 3.242548504160648}. Best is trial 0 with value: 0.8.
[I 2025-03-13 03:55:35,895] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.039478376291899986, 'max_depth': 29, 'subsample': 0.6699114355613465, 'gamma': 0.8454780574155977, 'colsample_bytree': 0.8000415679278192, 'lambda': 10.72858170140561, 'alpha': 7.567890782831754}. Best is trial 1 with value: 0.8666666666666667.
[I 2025-03-13 03:55:40,154] Trial 2 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.036575980591157554, 'max_depth': 24, 'subsample': 0.8412525139920434, 'gamma': 0.7033055485595445, 'colsample_bytree': 0.8374689685234845, 'lambda': 1.705169580481921, 'alpha': 12.96892227241463}. Best is


--- Evaluating model with feature H3K4me1 added back ---



[I 2025-03-13 03:58:33,788] A new study created in memory with name: no-name-67d0a4a9-1bf8-4bb9-bba4-ce9b765d8c12


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3K4me1, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K9me2_log ---



[I 2025-03-13 03:58:36,980] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.03493944900070799, 'max_depth': 30, 'subsample': 0.6555108724247987, 'gamma': 0.33482021145950425, 'colsample_bytree': 0.6502536951717756, 'lambda': 4.231709178040089, 'alpha': 14.802998501038987}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 03:58:45,163] Trial 1 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.029949550324957202, 'max_depth': 16, 'subsample': 0.7608321702535168, 'gamma': 0.19579763551272222, 'colsample_bytree': 0.8263042221498889, 'lambda': 5.420896912017756, 'alpha': 1.0454051924127163}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 03:58:48,306] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.010246349500354052, 'max_depth': 14, 'subsample': 0.9314346842848439, 'gamma': 0.04029465423363787, 'colsample_bytree': 0.6020244820015135, 'lambda': 4.87967806417353, 'al


--- Evaluating model with feature H3K9me2_log added back ---



[I 2025-03-13 04:01:22,043] A new study created in memory with name: no-name-8712bd5f-9c8d-46b2-a414-3f955d301d0b


Results Summary: {'Match': 12, 'Mismatch': 4, 'No Evaluation': 1}
Feature: H3K9me2_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K4me3 ---



[I 2025-03-13 04:01:23,831] Trial 0 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.0419906808211979, 'max_depth': 10, 'subsample': 0.864309366953081, 'gamma': 0.39797882763843484, 'colsample_bytree': 0.7719393880706126, 'lambda': 5.590702993493193, 'alpha': 8.201729886427222}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-03-13 04:01:30,158] Trial 1 finished with value: 0.6666666666666666 and parameters: {'learning_rate': 0.046003543002314955, 'max_depth': 28, 'subsample': 0.9777903866281757, 'gamma': 0.8415904830494915, 'colsample_bytree': 0.934337051622707, 'lambda': 11.445469349503261, 'alpha': 1.1021801767813135}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-03-13 04:01:33,588] Trial 2 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.023486613365379355, 'max_depth': 18, 'subsample': 0.7752981587411658, 'gamma': 0.951621008441955, 'colsample_bytree': 0.8533300694331629, 'lambda': 2.0785756950214154, 'alpha'


--- Evaluating model with feature H3K4me3 added back ---



[I 2025-03-13 04:03:48,444] A new study created in memory with name: no-name-91428ea5-6460-4191-8a49-ff6344e641e0


Results Summary: {'Match': 13, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H3K4me3, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K36me3 ---



[I 2025-03-13 04:03:53,161] Trial 0 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.0012858640677478313, 'max_depth': 23, 'subsample': 0.8040306847302765, 'gamma': 0.7394002930132677, 'colsample_bytree': 0.9920363482607437, 'lambda': 1.707623740917444, 'alpha': 11.80061629352293}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-03-13 04:04:02,570] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.02475510869509776, 'max_depth': 24, 'subsample': 0.8418541364240433, 'gamma': 0.027260307645996962, 'colsample_bytree': 0.7047849240816224, 'lambda': 9.002877432528026, 'alpha': 4.967618659227747}. Best is trial 1 with value: 0.8666666666666667.
[I 2025-03-13 04:04:04,305] Trial 2 finished with value: 0.6 and parameters: {'learning_rate': 0.03717529255678335, 'max_depth': 10, 'subsample': 0.6116628371255471, 'gamma': 0.94104858629566, 'colsample_bytree': 0.9402342264162753, 'lambda': 12.50446995479624, 'alpha': 3.75634410328


--- Evaluating model with feature H3K36me3 added back ---



[I 2025-03-13 04:10:53,241] A new study created in memory with name: no-name-a49337da-2967-4225-b577-ce20cd7b4b71


Results Summary: {'Match': 13, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H3K36me3, Known Function: Activating, Result: Match

--- Performing LOO for feature H4K5Ac ---



[I 2025-03-13 04:10:56,960] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.03844006949355115, 'max_depth': 12, 'subsample': 0.6357908961694291, 'gamma': 0.8070952663969504, 'colsample_bytree': 0.6617718973430983, 'lambda': 2.8865064738047055, 'alpha': 8.29357902384511}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:11:11,989] Trial 1 finished with value: 0.8 and parameters: {'learning_rate': 0.026463443218914324, 'max_depth': 26, 'subsample': 0.8441557094250585, 'gamma': 0.23873915496565157, 'colsample_bytree': 0.9963291086574464, 'lambda': 8.259024998484525, 'alpha': 7.119105526861598}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:11:19,874] Trial 2 finished with value: 0.9333333333333333 and parameters: {'learning_rate': 0.0144403288610955, 'max_depth': 18, 'subsample': 0.8565830489180195, 'gamma': 0.7916556426947101, 'colsample_bytree': 0.868230402170396, 'lambda': 10.606536243397949, 'alpha': 10.73398436481


--- Evaluating model with feature H4K5Ac added back ---



[I 2025-03-13 04:16:02,321] A new study created in memory with name: no-name-b8d445e7-b622-424c-8871-8883b16de612


Results Summary: {'Match': 13, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H4K5Ac, Known Function: Activating, Result: Mismatch

--- Performing LOO for feature H3K27me3_log ---



[I 2025-03-13 04:16:13,128] Trial 0 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.034728232179323755, 'max_depth': 28, 'subsample': 0.7038753390332919, 'gamma': 0.1301483428583906, 'colsample_bytree': 0.9368533218327216, 'lambda': 13.374276483544234, 'alpha': 5.843272064547408}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-03-13 04:16:25,948] Trial 1 finished with value: 0.6666666666666666 and parameters: {'learning_rate': 0.033517854388665926, 'max_depth': 28, 'subsample': 0.9020855979649495, 'gamma': 0.9629707073515447, 'colsample_bytree': 0.9874166487314763, 'lambda': 7.011683151747595, 'alpha': 1.9035190350752456}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-03-13 04:16:36,962] Trial 2 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.009274772708718751, 'max_depth': 24, 'subsample': 0.826686520834605, 'gamma': 0.11266935735869688, 'colsample_bytree': 0.6723781693493636, 'lambda': 11.799127671449217, 'al


--- Evaluating model with feature H3K27me3_log added back ---



[I 2025-03-13 04:22:16,784] A new study created in memory with name: no-name-34b5e55f-a800-423f-8853-2519ba800c8d


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3K27me3_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K9Ac_log ---



[I 2025-03-13 04:22:21,509] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.017506080493106663, 'max_depth': 23, 'subsample': 0.548754664366694, 'gamma': 0.6643472573115452, 'colsample_bytree': 0.744647149758086, 'lambda': 2.7913535867113173, 'alpha': 11.882536972146742}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:22:26,467] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.006250232578333364, 'max_depth': 18, 'subsample': 0.6460375546251381, 'gamma': 0.8967393607237335, 'colsample_bytree': 0.9981818675407789, 'lambda': 1.62368681445404, 'alpha': 11.04554406035705}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:22:30,528] Trial 2 finished with value: 0.8 and parameters: {'learning_rate': 0.014998264987951076, 'max_depth': 13, 'subsample': 0.7482760516083802, 'gamma': 0.34098955060481684, 'colsample_bytree': 0.8542791284905421, 'lambda': 5.068424299577798, 'alpha': 10.345962154


--- Evaluating model with feature H3K9Ac_log added back ---



[I 2025-03-13 04:25:46,423] A new study created in memory with name: no-name-d374e397-ee37-4f6d-a969-a0155b60cb6f


Results Summary: {'Match': 13, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H3K9Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature cpg_percentage_log ---



[I 2025-03-13 04:25:50,557] Trial 0 finished with value: 0.8 and parameters: {'learning_rate': 0.03804249682196989, 'max_depth': 15, 'subsample': 0.857878881240469, 'gamma': 0.36976848409226504, 'colsample_bytree': 0.5735629427165863, 'lambda': 9.370388701087741, 'alpha': 14.204586411049164}. Best is trial 0 with value: 0.8.
[I 2025-03-13 04:25:55,913] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.0178925122963486, 'max_depth': 16, 'subsample': 0.9826134823667807, 'gamma': 0.3779289166106663, 'colsample_bytree': 0.8254361641304366, 'lambda': 13.614877881663292, 'alpha': 11.95579840081737}. Best is trial 1 with value: 0.8666666666666667.
[I 2025-03-13 04:26:00,432] Trial 2 finished with value: 0.8 and parameters: {'learning_rate': 0.03421708378918642, 'max_depth': 27, 'subsample': 0.5967755337364756, 'gamma': 0.4943290403727105, 'colsample_bytree': 0.770136105966103, 'lambda': 10.870371190337732, 'alpha': 13.450503373373602}. Best is trial 1 with va


--- Evaluating model with feature cpg_percentage_log added back ---



[I 2025-03-13 04:29:39,613] A new study created in memory with name: no-name-4ca74280-444d-4be2-9353-6510fe2dfaa3


Results Summary: {'Match': 13, 'Mismatch': 3, 'No Evaluation': 1}
Feature: cpg_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature chh_percentage_log ---



[I 2025-03-13 04:29:46,897] Trial 0 finished with value: 0.8 and parameters: {'learning_rate': 0.0070130940042765055, 'max_depth': 20, 'subsample': 0.7835039545809377, 'gamma': 0.7584858095335608, 'colsample_bytree': 0.9785587620390925, 'lambda': 14.670591913888293, 'alpha': 1.1956620277331678}. Best is trial 0 with value: 0.8.
[I 2025-03-13 04:29:51,014] Trial 1 finished with value: 0.8 and parameters: {'learning_rate': 0.010402075146211026, 'max_depth': 19, 'subsample': 0.5260329891661542, 'gamma': 0.4638042618433177, 'colsample_bytree': 0.5556179149372825, 'lambda': 11.562708602017256, 'alpha': 7.720102142578109}. Best is trial 0 with value: 0.8.
[I 2025-03-13 04:29:56,275] Trial 2 finished with value: 0.6666666666666666 and parameters: {'learning_rate': 0.04375186096720396, 'max_depth': 28, 'subsample': 0.7882168199668249, 'gamma': 0.7022776987750384, 'colsample_bytree': 0.8154207509178737, 'lambda': 4.714413348197921, 'alpha': 11.640551998217086}. Best is trial 0 with value: 0.8.



--- Evaluating model with feature chh_percentage_log added back ---



[I 2025-03-13 04:33:53,484] A new study created in memory with name: no-name-7ab60a06-0c09-46e1-8464-27cf5e4d1958


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: chh_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H4K20me1_log ---



[I 2025-03-13 04:33:59,278] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.00270155228525425, 'max_depth': 29, 'subsample': 0.7087070144208427, 'gamma': 0.40114255934358956, 'colsample_bytree': 0.8890540040069662, 'lambda': 13.987465608189964, 'alpha': 7.026247630130014}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:34:08,745] Trial 1 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.029533096531386836, 'max_depth': 20, 'subsample': 0.6069617826790843, 'gamma': 0.39825692242829847, 'colsample_bytree': 0.9115775741550975, 'lambda': 0.861953590175155, 'alpha': 4.412863211804877}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:34:14,404] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.03880702516897036, 'max_depth': 21, 'subsample': 0.43673927938673396, 'gamma': 0.5408966146060177, 'colsample_bytree': 0.8702344089265154, 'lambda': 0.7758931805421632, 'al


--- Evaluating model with feature H4K20me1_log added back ---



[I 2025-03-13 04:38:56,028] A new study created in memory with name: no-name-c3af5356-8271-4c69-85db-88db3b36f89b


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H4K20me1_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature chg_percentage_log ---



[I 2025-03-13 04:39:12,263] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.011849661686092666, 'max_depth': 17, 'subsample': 0.8278435434521147, 'gamma': 0.422938756228589, 'colsample_bytree': 0.5972583226644917, 'lambda': 0.8565582354082683, 'alpha': 0.6563596834369774}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:39:21,263] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.011522173717621115, 'max_depth': 17, 'subsample': 0.9131410300160807, 'gamma': 0.4143240469399522, 'colsample_bytree': 0.5714355296765512, 'lambda': 13.462268872658539, 'alpha': 1.120593293337675}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:39:28,496] Trial 2 finished with value: 0.8 and parameters: {'learning_rate': 0.008167423281668383, 'max_depth': 25, 'subsample': 0.4868770762362251, 'gamma': 0.16392267274921746, 'colsample_bytree': 0.5378173626056438, 'lambda': 10.687409149312726, 'alpha': 0.757724


--- Evaluating model with feature chg_percentage_log added back ---



[I 2025-03-13 04:44:22,882] A new study created in memory with name: no-name-e8a33564-c807-401d-9e41-6129f00363e8


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: chg_percentage_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H2A.W.7_log ---



[I 2025-03-13 04:44:25,562] Trial 0 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.04427353473862213, 'max_depth': 10, 'subsample': 0.7215047098906474, 'gamma': 0.4227285247975149, 'colsample_bytree': 0.9849733820884943, 'lambda': 8.518122178898746, 'alpha': 3.8750053912273277}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-03-13 04:44:28,560] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.007257503021358336, 'max_depth': 11, 'subsample': 0.783626785579701, 'gamma': 0.7564124850257063, 'colsample_bytree': 0.7323948067013037, 'lambda': 14.68824535028891, 'alpha': 10.382449105394866}. Best is trial 1 with value: 0.8666666666666667.
[I 2025-03-13 04:44:33,171] Trial 2 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.03839163822086764, 'max_depth': 24, 'subsample': 0.5017490471644661, 'gamma': 0.2429566231977045, 'colsample_bytree': 0.5579347941878781, 'lambda': 2.2258183664931583, 'alpha


--- Evaluating model with feature H2A.W.7_log added back ---



[I 2025-03-13 04:48:05,816] A new study created in memory with name: no-name-ca0abfe1-9de9-4ce3-9c57-7fd6581c7bf0


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H2A.W.7_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3K9me1_log ---



[I 2025-03-13 04:48:09,363] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.023470071486168066, 'max_depth': 21, 'subsample': 0.6850174472737456, 'gamma': 0.9195731564934759, 'colsample_bytree': 0.5614918640599811, 'lambda': 8.188783056662755, 'alpha': 14.596065308917522}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:48:17,164] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.04692017848759534, 'max_depth': 20, 'subsample': 0.9082417714029822, 'gamma': 0.6296231628108809, 'colsample_bytree': 0.524123008055112, 'lambda': 0.7936746018985679, 'alpha': 4.180982582293577}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 04:48:25,673] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.009304778711517282, 'max_depth': 29, 'subsample': 0.5628127213418888, 'gamma': 0.355136261447784, 'colsample_bytree': 0.9607196621803246, 'lambda': 2.16252516307343, 'alpha':


--- Evaluating model with feature H3K9me1_log added back ---



[I 2025-03-13 04:52:40,606] A new study created in memory with name: no-name-78da3b4d-8426-4a87-b2bc-f42d8b2ab4f0


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3K9me1_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H2A.W.6_log ---



[I 2025-03-13 04:52:45,771] Trial 0 finished with value: 0.8 and parameters: {'learning_rate': 0.03849824371088967, 'max_depth': 24, 'subsample': 0.649260976537567, 'gamma': 0.361592075708414, 'colsample_bytree': 0.7923180924226483, 'lambda': 6.864286558726828, 'alpha': 14.235470057948818}. Best is trial 0 with value: 0.8.
[I 2025-03-13 04:52:48,496] Trial 1 finished with value: 0.8 and parameters: {'learning_rate': 0.014407639015083299, 'max_depth': 11, 'subsample': 0.6729095657724261, 'gamma': 0.8403002026308922, 'colsample_bytree': 0.9651432392767416, 'lambda': 12.018813257090416, 'alpha': 12.653385124521272}. Best is trial 0 with value: 0.8.
[I 2025-03-13 04:52:58,793] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.029745336211084668, 'max_depth': 25, 'subsample': 0.5908547826155621, 'gamma': 0.6089668296470772, 'colsample_bytree': 0.9704680984825799, 'lambda': 5.429848427453166, 'alpha': 1.5159695265368995}. Best is trial 0 with value: 0.8.
[I 


--- Evaluating model with feature H2A.W.6_log added back ---



[I 2025-03-13 04:57:07,085] A new study created in memory with name: no-name-2d2867a1-0da0-42b2-9db9-775f803c67a7


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H2A.W.6_log, Known Function: Repressive, Result: Match

--- Performing LOO for feature H3Ac_log ---



[I 2025-03-13 04:57:12,352] Trial 0 finished with value: 0.8 and parameters: {'learning_rate': 0.0325852247841365, 'max_depth': 18, 'subsample': 0.43911542894420164, 'gamma': 0.04337379531477825, 'colsample_bytree': 0.9258162954510679, 'lambda': 2.1493623894047644, 'alpha': 13.334855566120348}. Best is trial 0 with value: 0.8.
[I 2025-03-13 04:57:31,278] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.003235162990452931, 'max_depth': 20, 'subsample': 0.8704425063176423, 'gamma': 0.06621558768174463, 'colsample_bytree': 0.6904954903478915, 'lambda': 0.9373912101198404, 'alpha': 1.682521095403877}. Best is trial 1 with value: 0.8666666666666667.
[I 2025-03-13 04:57:38,727] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.03904993137179325, 'max_depth': 30, 'subsample': 0.9022986147631918, 'gamma': 0.8350999052124265, 'colsample_bytree': 0.6653922461882549, 'lambda': 14.826455511261512, 'alpha': 2.9217044301304154}. Be


--- Evaluating model with feature H3Ac_log added back ---



[I 2025-03-13 05:02:24,142] A new study created in memory with name: no-name-eb6fd24b-e6be-4a72-967f-c86467cd61ba


Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K9K14Ac_log ---



[I 2025-03-13 05:02:31,906] Trial 0 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.01762553057573311, 'max_depth': 25, 'subsample': 0.8932077061309556, 'gamma': 0.017181320199561978, 'colsample_bytree': 0.7420273527026836, 'lambda': 9.144335155204729, 'alpha': 12.21030059802792}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 05:02:36,394] Trial 1 finished with value: 0.8666666666666667 and parameters: {'learning_rate': 0.04414653401419054, 'max_depth': 16, 'subsample': 0.9103893298810384, 'gamma': 0.7083493668337324, 'colsample_bytree': 0.617270579772413, 'lambda': 1.64501834615724, 'alpha': 13.975125313436964}. Best is trial 0 with value: 0.8666666666666667.
[I 2025-03-13 05:02:40,789] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 0.03383154413679577, 'max_depth': 16, 'subsample': 0.5615455631064362, 'gamma': 0.7498508513651759, 'colsample_bytree': 0.9892223351941425, 'lambda': 6.399866400998154, 'alpha':


--- Evaluating model with feature H3K9K14Ac_log added back ---



[I 2025-03-13 05:06:58,467] A new study created in memory with name: no-name-cc11ee86-0ff9-4cbb-a68e-4c13e1a4be6c


Results Summary: {'Match': 13, 'Mismatch': 3, 'No Evaluation': 1}
Feature: H3K9K14Ac_log, Known Function: Activating, Result: Match

--- Performing LOO for feature H3K23me1_log ---



[I 2025-03-13 05:07:02,708] Trial 0 finished with value: 0.8125 and parameters: {'learning_rate': 0.017442171455456238, 'max_depth': 21, 'subsample': 0.5616273806561785, 'gamma': 0.48014708926641025, 'colsample_bytree': 0.8790536736076778, 'lambda': 12.652378191379597, 'alpha': 5.760391101658183}. Best is trial 0 with value: 0.8125.
[I 2025-03-13 05:07:06,827] Trial 1 finished with value: 0.6875 and parameters: {'learning_rate': 0.04568654770233403, 'max_depth': 15, 'subsample': 0.48132483112141533, 'gamma': 0.3404708576197504, 'colsample_bytree': 0.8486278526932878, 'lambda': 0.36312461500664706, 'alpha': 5.91284846899942}. Best is trial 0 with value: 0.8125.
[I 2025-03-13 05:07:12,260] Trial 2 finished with value: 0.875 and parameters: {'learning_rate': 0.00249994236986698, 'max_depth': 27, 'subsample': 0.7943922061450512, 'gamma': 0.18531555186303572, 'colsample_bytree': 0.7419451386482423, 'lambda': 7.946677853929178, 'alpha': 6.618798584364372}. Best is trial 2 with value: 0.875.



--- Evaluating model with feature H3K23me1_log added back ---





Results Summary: {'Match': 14, 'Mismatch': 2, 'No Evaluation': 1}
Feature: H3K23me1_log, Known Function: Unknown, Result: No Evaluation

--- Feature Analysis Results ---
Feature: H3K4me1, Total Score: 0.7987461101729423, Known Function: Activating, Result: Mismatch
Feature: H3K9me2_log, Total Score: 1.1884491329547018, Known Function: Repressive, Result: Match
Feature: H3K4me3, Total Score: 1.059755397029221, Known Function: Activating, Result: Match
Feature: H3K36me3, Total Score: 1.1901368650142103, Known Function: Activating, Result: Match
Feature: H4K5Ac, Total Score: 0.946912654035259, Known Function: Activating, Result: Mismatch
Feature: H3K27me3_log, Total Score: 0.9107447473797947, Known Function: Repressive, Result: Match
Feature: H3K9Ac_log, Total Score: 1.0977196507155895, Known Function: Activating, Result: Match
Feature: cpg_percentage_log, Total Score: 1.0289717165287584, Known Function: Repressive, Result: Match
Feature: chh_percentage_log, Total Score: 1.347812474705278

## Score as a metric