In [190]:
import sent2vec

expression_unified_ds = [line.strip().split("\t") for line in open("../../unified_dataset/expression.tsv")][1:]
kinaseact_unified_ds = [line.strip().split("\t") for line in open("../../unified_dataset/kinaseact.tsv")][1:]

sentences_only_expression_data = [row[0] for row in expression_unified_ds]
sentences_only_kinaseact_data = [row[0] for row in kinaseact_unified_ds]
bio_sent_vec_model_location = "../../biosentvec/model.bin"

In [191]:
model_path = bio_sent_vec_model_location
biosentvec_model = sent2vec.Sent2vecModel()
try:
    biosentvec_model.load_model(model_path)
except Exception as e:
    print(e)
print('model successfully loaded')

model successfully loaded


In [192]:
corpus_expr = biosentvec_model.embed_sentences(sentences_only_expression_data)
corpus_kinaseact = biosentvec_model.embed_sentences(sentences_only_kinaseact_data)

In [193]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_recall_fscore_support as score
import joblib

base_clf = MLPClassifier(hidden_layer_sizes=(10, 10), alpha=0.1)
training_set_expression = [corpus_expr[idx] for idx, row in enumerate(expression_unified_ds) if row[5] == "TRAINING"]
testing_set_expression = [corpus_expr[idx] for idx, row in enumerate(expression_unified_ds) if row[5] == "TESTING"]

training_set_kinaseact = [corpus_kinaseact[idx] for idx, row in enumerate(kinaseact_unified_ds) if row[5] == "TRAINING"]
testing_set_kinaseact = [corpus_kinaseact[idx] for idx, row in enumerate(kinaseact_unified_ds) if row[5] == "TESTING"]

## Task 1

### Expression

In [194]:
training_set_classes_expr = [int(row[1]) for row in expression_unified_ds if row[5] == "TRAINING"]
test_set_classes_expr = [int(row[1]) for row in expression_unified_ds if row[5] == "TESTING"]
clf = base_clf.fit(training_set_expression, training_set_classes_expr)
test_predictions = list(clf.predict(testing_set_expression))
precision, recall, fscore, support = score(test_set_classes_expr, test_predictions)
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.95264624 0.69369369]
recall: [0.95264624 0.69369369]
fscore: [0.95264624 0.69369369]
support: [718 111]




In [195]:
precision, recall, fscore, support = score(test_set_classes_expr, test_predictions, average='weighted')
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: 0.9179734620024126
recall: 0.9179734620024126
fscore: 0.9179734620024126
support: None


### Kinaseact

In [196]:
training_set_classes_kinaseact = [int(row[1]) for row in kinaseact_unified_ds if row[5] == "TRAINING"]
test_set_classes_kinaseact = [int(row[1]) for row in kinaseact_unified_ds if row[5] == "TESTING"]
clf = base_clf.fit(training_set_kinaseact, training_set_classes_kinaseact)
test_predictions = list(clf.predict(testing_set_kinaseact))
precision, recall, fscore, support = score(test_set_classes_kinaseact, test_predictions)
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.93304221 0.74774775]
recall: [0.95814649 0.64341085]
fscore: [0.94542773 0.69166667]
support: [669 129]




In [197]:
precision, recall, fscore, support = score(test_set_classes_kinaseact, test_predictions, average='weighted')
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: 0.9030885960327469
recall: 0.9072681704260651
fscore: 0.9044062035620024
support: None


## Task 2

### Expression

In [198]:
training_set_classes_expr = [int(row[2]) for row in expression_unified_ds if row[5] == "TRAINING"]
test_set_classes_expr = [int(row[2]) for row in expression_unified_ds if row[5] == "TESTING"]
clf = base_clf.fit(training_set_expression, training_set_classes_expr)
test_predictions = list(clf.predict(testing_set_expression))
precision, recall, fscore, support = score(test_set_classes_expr, test_predictions)
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.95266272 0.86928105]
recall: [0.96987952 0.80606061]
fscore: [0.96119403 0.83647799]
support: [664 165]


In [199]:
precision, recall, fscore, support = score(test_set_classes_expr, test_predictions, average='weighted')
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: 0.9360668514913122
recall: 0.9372738238841978
fscore: 0.9363711746024412
support: None


### Kinaseact

In [200]:
training_set_classes_kinaseact = [int(row[2]) for row in kinaseact_unified_ds if row[5] == "TRAINING"]
test_set_classes_kinaseact = [int(row[2]) for row in kinaseact_unified_ds if row[5] == "TESTING"]
clf = base_clf.fit(training_set_kinaseact, training_set_classes_kinaseact)
test_predictions = list(clf.predict(testing_set_kinaseact))
precision, recall, fscore, support = score(test_set_classes_kinaseact, test_predictions)
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.92921687 0.81343284]
recall: [0.96105919 0.69871795]
fscore: [0.94486983 0.75172414]
support: [642 156]




In [201]:
precision, recall, fscore, support = score(test_set_classes_kinaseact, test_predictions, average='weighted')
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: 0.9065823951174465
recall: 0.9097744360902256
fscore: 0.9071120267797338
support: None


## Task 3

### Expression

In [202]:
training_set_classes_expr = [int(row[3]) for row in expression_unified_ds if row[5] == "TRAINING"]
test_set_classes_expr = [int(row[3]) for row in expression_unified_ds if row[5] == "TESTING"]
clf = base_clf.fit(training_set_expression, training_set_classes_expr)
test_predictions = list(clf.predict(testing_set_expression))
precision, recall, fscore, support = score(test_set_classes_expr, test_predictions)
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.93632959 0.91864407]
recall: [0.95419847 0.88852459]
fscore: [0.94517958 0.90333333]
support: [524 305]




In [203]:
precision, recall, fscore, support = score(test_set_classes_expr, test_predictions, average='weighted')
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: 0.9298228525908518
recall: 0.9300361881785284
fscore: 0.9297837982461541
support: None


### Kinaseact

In [204]:
training_set_classes_kinaseact = [int(row[3]) for row in kinaseact_unified_ds if row[5] == "TRAINING"]
test_set_classes_kinaseact = [int(row[3]) for row in kinaseact_unified_ds if row[5] == "TESTING"]
clf = base_clf.fit(training_set_kinaseact, training_set_classes_kinaseact)
test_predictions = list(clf.predict(testing_set_kinaseact))
precision, recall, fscore, support = score(test_set_classes_kinaseact, test_predictions)
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.97383178 0.92775665]
recall: [0.96481481 0.94573643]
fscore: [0.96930233 0.93666027]
support: [540 258]




In [205]:
precision, recall, fscore, support = score(test_set_classes_kinaseact, test_predictions, average='weighted')
print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: 0.9589353077801287
recall: 0.9586466165413534
fscore: 0.9587488786242712
support: None
