In [30]:
from string import punctuation
from nltk import word_tokenize
from nltk.corpus import stopwords
import sent2vec


expression_unified_ds = [line.strip().split("\t") for line in open("../unified_dataset/expression.tsv")][1:]
kinaseact_unified_ds = [line.strip().split("\t") for line in open("../unified_dataset/kinaseact.tsv")][1:]

stop_words = set(stopwords.words('english'))
def preprocess_sentence(text):
    text = text.replace('/', ' / ')
    text = text.replace('.-', ' .- ')
    text = text.replace('.', ' . ')
    text = text.replace('\'', ' \' ')
    text = text.lower()

    tokens = [token for token in word_tokenize(text) if token not in punctuation and token not in stop_words]

    return ' '.join(tokens)

sentences_only_expression_data = [preprocess_sentence(row[1]) for row in expression_unified_ds]
sentences_only_kinaseact_data = [preprocess_sentence(row[1]) for row in kinaseact_unified_ds]
bio_sent_vec_model_location = "../biosentvec/model.bin"

In [31]:
model_path = bio_sent_vec_model_location
biosentvec_model = sent2vec.Sent2vecModel()
try:
    biosentvec_model.load_model(model_path)
except Exception as e:
    print(e)
print('model successfully loaded')

model successfully loaded


In [32]:
corpus_expr = biosentvec_model.embed_sentences(sentences_only_expression_data)
corpus_kinaseact = biosentvec_model.embed_sentences(sentences_only_kinaseact_data)

In [33]:
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score
from sklearn.model_selection import RandomizedSearchCV, cross_validate, StratifiedKFold
from models import POSSIBLE_CLASSIFIERS


def train_classifier(X, y):

    best_score = 0
    best_classifier = None
    best_params = None
    best_classifier_name = ""

    stratified_k_folds = StratifiedKFold(n_splits=5)
    
    scoring = {'precision': make_scorer(precision_score),
           'recall': make_scorer(recall_score),
           'f1': make_scorer(f1_score)}

    best_results = {}
    best_index = 0
    # Iterate over classifiers and perform grid search
    for classifier_name, classifier_info in POSSIBLE_CLASSIFIERS.items():
        random_search = RandomizedSearchCV(estimator=classifier_info['model'], n_iter=100,
                                           param_distributions=classifier_info['params'], cv=stratified_k_folds,
                                           scoring=scoring, refit='f1',
                                           verbose=1, n_jobs=-1)
        random_search.fit(X, y)

        print(f"Finished training model and fitting best hyperparameters for {classifier_name}. F1 score: "
              f"{str(random_search.best_score_)}")

        if random_search.best_score_ > best_score:
            best_score = random_search.best_score_
            best_classifier = random_search.best_estimator_
            best_params = random_search.best_params_
            best_classifier_name = classifier_name
            best_results = random_search.cv_results_
            best_index = random_search.best_index_

    # Retrieve the average precision, recall, and F1 score
    average_precision = best_results['mean_test_precision'][best_index]
    std_precision = best_results['std_test_precision'][best_index]
    average_recall = best_results['mean_test_recall'][best_index]
    std_recall = best_results['std_test_recall'][best_index]
    average_f1 = best_results['mean_test_f1'][best_index]
    std_f1 = best_results['std_test_f1'][best_index]

    # Return the trained model and performance metrics
    return best_classifier, average_precision, average_recall, average_f1, best_classifier_name, best_params, std_precision, std_recall, std_f1


## Task 1

### Expression

In [34]:
import joblib

y = [int(row[2]) for row in expression_unified_ds]
classifier, precision, recall, fscore, classifier_name, params, std_precision, std_recall, std_f1 = train_classifier(corpus_expr, y)
joblib.dump(classifier, '../classifiers/all_info_expression.joblib')
print(f'precision: {str(precision)} ± {str(std_precision)}')
print(f'recall: {str(recall)} ± {str(std_recall)}')
print(f'fscore: {str(fscore)} ± {str(std_f1)}')
print(f'selected model: {classifier_name}')
print(f'selected params {str(params)}')

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for LogisticRegression. F1 score: 0.768103218955152
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for RandomForestClassifier. F1 score: 0.37648681208886836
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for GradientBoostingClassifier. F1 score: 0.6614182736283357
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for XGBClassifier. F1 score: 0.6857888216161057
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Note: You have installed the 'manylinux2014' variant of XGBoost. 

Finished training model and fitting best hyperparameters for MLPClassifier. F1 score: 0.7618424633844216
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(aver

Finished training model and fitting best hyperparameters for SVC. F1 score: 0.790664300587726
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for DecisionTreeClassifier. F1 score: 0.5050993820669383
Fitting 10 folds for each of 64 candidates, totalling 640 fits




Finished training model and fitting best hyperparameters for KNeighborsClassifier. F1 score: 0.727557811641403
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SGDClassifier. F1 score: 0.6771260297270549
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for Perceptron. F1 score: 0.6292955152654781
precision: 0.7646926711472338 ± 0.23953791950506634
recall: 0.8744959677419354 ± 0.04683162231575246
fscore: 0.790664300587726 ± 0.17318666807433064
selected model: SVC
selected params {'C': 28.568582885611278, 'class_weight': 'balanced', 'coef0': 4.2397761108428265, 'decision_function_shape': 'ovr', 'degree': 7, 'gamma': 0.0033310859629487252, 'kernel': 'rbf', 'random_state': 15, 'shrinking': False, 'tol': 0.0018441845456788398}


### Kinaseact

In [35]:
y = [int(row[2]) for row in kinaseact_unified_ds]
classifier, precision, recall, fscore, classifier_name, params, std_precision, std_recall, std_f1 = train_classifier(corpus_kinaseact, y)
joblib.dump(classifier, '../classifiers/all_info_kinase.joblib')
print(f'precision: {str(precision)} ± {str(std_precision)}')
print(f'recall: {str(recall)} ± {str(std_recall)}')
print(f'fscore: {str(fscore)} ± {str(std_f1)}')
print(f'selected model: {classifier_name}')
print(f'selected params {str(params)}')

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for LogisticRegression. F1 score: 0.7510095386685894
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for RandomForestClassifier. F1 score: 0.47028554527434696
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for GradientBoostingClassifier. F1 score: 0.6392249084621666
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for XGBClassifier. F1 score: 0.6614915843059804
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for MLPClassifier. F1 score: 0.7352327754765865
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SVC. F1 score: 0.7578243799397761
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for DecisionTreeClassifier. F1 score: 0.4947176178209123
Fitting 10 folds for each of 64 candidates, totalling 640 fits




Finished training model and fitting best hyperparameters for KNeighborsClassifier. F1 score: 0.6291448617319627
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SGDClassifier. F1 score: 0.6748231776505351
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for Perceptron. F1 score: 0.6523332468102717
precision: 0.705168655690192 ± 0.2342302410634475
recall: 0.9052873563218391 ± 0.08224489337617212
fscore: 0.7578243799397761 ± 0.16929877993922662
selected model: SVC
selected params {'C': 22.589375026627735, 'class_weight': 'balanced', 'coef0': 3.705718587956639, 'decision_function_shape': 'ovr', 'degree': 2, 'gamma': 0.0007716740275421502, 'kernel': 'rbf', 'random_state': 60, 'shrinking': False, 'tol': 0.005474983900251931}


## Task 2

### Expression

In [36]:
y = [int(row[3]) for row in expression_unified_ds]
classifier, precision, recall, fscore, classifier_name, params, std_precision, std_recall, std_f1 = train_classifier(corpus_expr, y)
joblib.dump(classifier, '../classifiers/curatable_expression.joblib')
print(f'precision: {str(precision)} ± {str(std_precision)}')
print(f'recall: {str(recall)} ± {str(std_recall)}')
print(f'fscore: {str(fscore)} ± {str(std_f1)}')
print(f'selected model: {classifier_name}')
print(f'selected params {str(params)}')

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for LogisticRegression. F1 score: 0.8205281713961847
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for RandomForestClassifier. F1 score: 0.6584769960004091
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for GradientBoostingClassifier. F1 score: 0.7832196539745143
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for XGBClassifier. F1 score: 0.7841873946056348
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for MLPClassifier. F1 score: 0.824470494635426
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SVC. F1 score: 0.8273243334683682
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for DecisionTreeClassifier. F1 score: 0.59400556288168
Fitting 10 folds for each of 64 candidates, totalling 640 fits




Finished training model and fitting best hyperparameters for KNeighborsClassifier. F1 score: 0.7936468773976294
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SGDClassifier. F1 score: 0.8076907893877996
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for Perceptron. F1 score: 0.7883397861457777
precision: 0.8015016691534613 ± 0.19228148944571122
recall: 0.8981501057082453 ± 0.1021860480206834
fscore: 0.8273243334683682 ± 0.14037381626918236
selected model: SVC
selected params {'C': 29.081232384223867, 'class_weight': 'balanced', 'coef0': 0.576935629929638, 'decision_function_shape': 'ovo', 'degree': 5, 'gamma': 0.0014991694922035514, 'kernel': 'rbf', 'random_state': 37, 'shrinking': True, 'tol': 0.0035692775915775617}


### Kinaseact

In [37]:
y = [int(row[3]) for row in kinaseact_unified_ds]
classifier, precision, recall, fscore, classifier_name, params, std_precision, std_recall, std_f1 = train_classifier(corpus_kinaseact, y)
joblib.dump(classifier, '../classifiers/curatable_kinase.joblib')
print(f'precision: {str(precision)} ± {str(std_precision)}')
print(f'recall: {str(recall)} ± {str(std_recall)}')
print(f'fscore: {str(fscore)} ± {str(std_f1)}')
print(f'selected model: {classifier_name}')
print(f'selected params {str(params)}')

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for LogisticRegression. F1 score: 0.7902444544357572
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for RandomForestClassifier. F1 score: 0.6134891568475498
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for GradientBoostingClassifier. F1 score: 0.7219375736442472
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for XGBClassifier. F1 score: 0.7107770779656004
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for MLPClassifier. F1 score: 0.7732155391285055
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SVC. F1 score: 0.7973157344707094
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for DecisionTreeClassifier. F1 score: 0.5657084363870298
Fitting 10 folds for each of 64 candidates, totalling 640 fits




Finished training model and fitting best hyperparameters for KNeighborsClassifier. F1 score: 0.6914677110413523
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SGDClassifier. F1 score: 0.7359439986841185
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for Perceptron. F1 score: 0.7168357601127779
precision: 0.7299360555539045 ± 0.2205095992865638
recall: 0.9396396396396396 ± 0.0588352974230052
fscore: 0.7973157344707094 ± 0.1709387603663735
selected model: SVC
selected params {'C': 3.6251422683698595, 'class_weight': 'balanced', 'coef0': 4.756391212482545, 'decision_function_shape': 'ovo', 'degree': 9, 'gamma': 0.0019365763496707199, 'kernel': 'rbf', 'random_state': 25, 'shrinking': True, 'tol': 0.002293237126256834}


## Task 3

### Expression

In [38]:
y = [int(row[4]) for row in expression_unified_ds]
classifier, precision, recall, fscore, classifier_name, params, std_precision, std_recall, std_f1 = train_classifier(corpus_expr, y)
joblib.dump(classifier, '../classifiers/language_expression.joblib')
print(f'precision: {str(precision)} ± {str(std_precision)}')
print(f'recall: {str(recall)} ± {str(std_recall)}')
print(f'fscore: {str(fscore)} ± {str(std_f1)}')
print(f'selected model: {classifier_name}')
print(f'selected params {str(params)}')

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for LogisticRegression. F1 score: 0.8964935475503399
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for RandomForestClassifier. F1 score: 0.8493586068046366
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for GradientBoostingClassifier. F1 score: 0.8760906165900707
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for XGBClassifier. F1 score: 0.8763060267705509
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for MLPClassifier. F1 score: 0.9017475298926667
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for SVC. F1 score: 0.8987123512065534
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
Finished training model and fitting best hyperparameters for DecisionTreeClassifier. F1 score: 0.720753362416306
Fitting 10 folds for each of 64 candidates, totalling 640 fits




Finished training model and fitting best hyperparameters for KNeighborsClassifier. F1 score: 0.838617335018283
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for SGDClassifier. F1 score: 0.9000404714079938
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for Perceptron. F1 score: 0.8856825497585372
precision: 0.9359785034228476 ± 0.04131086707334136
recall: 0.8846478416098668 ± 0.13102070589207435
fscore: 0.9017475298926667 ± 0.06685296819486246
selected model: MLPClassifier
selected params {'activation': 'relu', 'alpha': 0.0033939379529127015, 'beta_1': 0.5136442767343844, 'beta_2': 0.06373848694765605, 'epsilon': 0.053053924908449734, 'hidden_layer_sizes': (100, 100), 'learning_rate': 'adaptive', 'learning_rate_init': 0.0004381520210155821, 'solver': 'sgd'}


### Kinaseact

In [39]:
y = [int(row[4]) for row in kinaseact_unified_ds]
classifier, precision, recall, fscore, classifier_name, params, std_precision, std_recall, std_f1 = train_classifier(corpus_kinaseact, y)
joblib.dump(classifier, '../classifiers/language_kinase.joblib')
print(f'precision: {str(precision)} ± {str(std_precision)}')
print(f'recall: {str(recall)} ± {str(std_recall)}')
print(f'fscore: {str(fscore)} ± {str(std_f1)}')
print(f'selected model: {classifier_name}')
print(f'selected params {str(params)}')

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for LogisticRegression. F1 score: 0.9521847223627574
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Finished training model and fitting best hyperparameters for RandomForestClassifier. F1 score: 0.8884345242018805
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


KeyboardInterrupt: 