In [4]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

In [5]:
compressed_df = pd.read_csv("drive/MyDrive/PPI/compressed_df.csv")
y_df = pd.read_csv("drive/MyDrive/PPI/not_shuffled_targets.csv")
compressed_df = compressed_df.astype('float32')

print(compressed_df.shape)

(104738, 256)


In [6]:
folds = StratifiedKFold(n_splits=3, shuffle=True)
svc_roc_auc_scores = []
svc_average_precision_score = []
svc_f1_score = []

for train_index, test_index in folds.split(np.array(compressed_df), np.array(y_df)):
    X_train, X_test, y_train, y_test = np.array(compressed_df)[train_index], np.array(compressed_df)[test_index], np.array(y_df)[train_index], np.array(y_df)[test_index]
    y_train = y_train.ravel()
    y_test = y_test.ravel()
    model = KNeighborsClassifier(n_neighbors=3)
    print("model:", model)
    print("X_train legth:", len(X_train))
    print("X_test legth:", len(X_test))
    model.fit(X_train, y_train)
    test_predictions_np = np.array(model.predict(X_test))
    svc_roc_auc_scores.append(roc_auc_score(y_test, test_predictions_np.ravel()))
    svc_average_precision_score.append(average_precision_score(y_test, test_predictions_np))
    svc_f1_score.append(f1_score(y_test, test_predictions_np))

print("svc_roc_auc_scores:", svc_roc_auc_scores)
print("Average roc_auc_score:", sum(svc_roc_auc_scores) / len(svc_roc_auc_scores))
print("svc_average_precision_score:", svc_average_precision_score)
print("Average average_precision_score:", sum(svc_average_precision_score) / len(svc_average_precision_score))
print("svc_f1_score:", svc_f1_score)
print("Average f1_score:", sum(svc_f1_score) / len(svc_f1_score))

model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
X_train legth: 69826
X_test legth: 34912
svc_roc_auc_scores: [0.7817419889709885, 0.780512954260743, 0.7799896883593034]
Average roc_auc_score: 0.7807482105303448
svc_average_precision_score: [0.7141178054481945, 0.7119394254978164, 0.7109394033509978]
Average average_precision_score: 0.7123322114323362
svc_f1_score: [0.79053273956787

In [7]:
folds = StratifiedKFold(n_splits=3, shuffle=True)
knn_roc_auc_scores = []
knn_average_precision_score = []
knn_f1_score = []

for train_index, test_index in folds.split(np.array(compressed_df), np.array(y_df)):
    X_train, X_test, y_train, y_test = np.array(compressed_df)[train_index], np.array(compressed_df)[test_index], np.array(y_df)[train_index], np.array(y_df)[test_index]
    y_train = y_train.ravel()
    y_test = y_test.ravel()
    model = KNeighborsClassifier(n_neighbors=5)
    print("model:", model)
    print("X_train legth:", len(X_train))
    print("X_test legth:", len(X_test))
    model.fit(X_train, y_train)
    test_predictions_np = np.array(model.predict(X_test))
    knn_roc_auc_scores.append(roc_auc_score(y_test, test_predictions_np.ravel()))
    knn_average_precision_score.append(average_precision_score(y_test, test_predictions_np))
    knn_f1_score.append(f1_score(y_test, test_predictions_np))
    print("roc_auc_score :", roc_auc_score(y_test, test_predictions_np.ravel()))
    print("average_precision_score: ", average_precision_score(y_test, test_predictions_np))
    print("f1_score: ", f1_score(y_test, test_predictions_np))

print("knn_roc_auc_scores:", knn_roc_auc_scores)
print("Average roc_auc_score:", sum(knn_roc_auc_scores) / len(knn_roc_auc_scores))
print("knn_average_precision_score:", knn_average_precision_score)
print("Average average_precision_score:", sum(knn_average_precision_score) / len(knn_average_precision_score))
print("knn_f1_score:", knn_f1_score)
print("Average f1_score:", sum(knn_f1_score) / len(knn_f1_score))

model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
roc_auc_score : 0.7916518535238637
average_precision_score:  0.7219420052992843
f1_score:  0.8032352304695954
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
roc_auc_score : 0.7959514010384663
average_precision_score:  0.7274069939591284
f1_score:  0.8058961364503298
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')
X_train legth: 69826
X_test legth: 34912
roc_auc_score : 0.7967174610449129
average_precision_score:  0.727175494945

In [8]:
folds = StratifiedKFold(n_splits=3, shuffle=True)
knn_roc_auc_scores = []
knn_average_precision_score = []
knn_f1_score = []

for train_index, test_index in folds.split(np.array(compressed_df), np.array(y_df)):
    X_train, X_test, y_train, y_test = np.array(compressed_df)[train_index], np.array(compressed_df)[test_index], np.array(y_df)[train_index], np.array(y_df)[test_index]
    y_train = y_train.ravel()
    y_test = y_test.ravel()
    model = KNeighborsClassifier(n_neighbors=15)
    print("model:", model)
    print("X_train legth:", len(X_train))
    print("X_test legth:", len(X_test))
    model.fit(X_train, y_train)
    test_predictions_np = np.array(model.predict(X_test))
    knn_roc_auc_scores.append(roc_auc_score(y_test, test_predictions_np.ravel()))
    knn_average_precision_score.append(average_precision_score(y_test, test_predictions_np))
    knn_f1_score.append(f1_score(y_test, test_predictions_np))
    print("roc_auc_score :", roc_auc_score(y_test, test_predictions_np.ravel()))
    print("average_precision_score: ", average_precision_score(y_test, test_predictions_np))
    print("f1_score: ", f1_score(y_test, test_predictions_np))

print("knn_roc_auc_scores:", knn_roc_auc_scores)
print("Average roc_auc_score:", sum(knn_roc_auc_scores) / len(knn_roc_auc_scores))
print("knn_average_precision_score:", knn_average_precision_score)
print("Average average_precision_score:", sum(knn_average_precision_score) / len(knn_average_precision_score))
print("knn_f1_score:", knn_f1_score)
print("Average f1_score:", sum(knn_f1_score) / len(knn_f1_score))

model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
roc_auc_score : 0.8082078032695973
average_precision_score:  0.7419130670726619
f1_score:  0.815760510675765
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
roc_auc_score : 0.8082962112168032
average_precision_score:  0.7412369228748934
f1_score:  0.8166552527051089
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')
X_train legth: 69826
X_test legth: 34912
roc_auc_score : 0.8059692942254812
average_precision_score:  0.7395408971

In [10]:
all_df = pd.read_csv("drive/MyDrive/PPI/processed_protein_pairs_df.csv")
all_df = all_df.astype('float32')

folds = StratifiedKFold(n_splits=3, shuffle=True)
knn_roc_auc_scores = []
knn_average_precision_score = []
knn_f1_score = []

for train_index, test_index in folds.split(np.array(compressed_df), np.array(y_df)):
    X_train, X_test, y_train, y_test = np.array(compressed_df)[train_index], np.array(compressed_df)[test_index], np.array(y_df)[train_index], np.array(y_df)[test_index]
    y_train = y_train.ravel()
    y_test = y_test.ravel()
    model = KNeighborsClassifier(n_neighbors=15)
    print("model:", model)
    print("X_train legth:", len(X_train))
    print("X_test legth:", len(X_test))
    model.fit(X_train, y_train)
    test_predictions_np = np.array(model.predict(X_test))
    knn_roc_auc_scores.append(roc_auc_score(y_test, test_predictions_np.ravel()))
    knn_average_precision_score.append(average_precision_score(y_test, test_predictions_np))
    knn_f1_score.append(f1_score(y_test, test_predictions_np))
    print("roc_auc_score :", roc_auc_score(y_test, test_predictions_np.ravel()))
    print("average_precision_score: ", average_precision_score(y_test, test_predictions_np))
    print("f1_score: ", f1_score(y_test, test_predictions_np))

print("knn_roc_auc_scores:", knn_roc_auc_scores)
print("Average roc_auc_score:", sum(knn_roc_auc_scores) / len(knn_roc_auc_scores))
print("knn_average_precision_score:", knn_average_precision_score)
print("Average average_precision_score:", sum(knn_average_precision_score) / len(knn_average_precision_score))
print("knn_f1_score:", knn_f1_score)
print("Average f1_score:", sum(knn_f1_score) / len(knn_f1_score))

model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
roc_auc_score : 0.8062886349341714
average_precision_score:  0.7392391147215778
f1_score:  0.8146158274170117
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')
X_train legth: 69825
X_test legth: 34913
roc_auc_score : 0.8125924229849151
average_precision_score:  0.7468854751417195
f1_score:  0.8196676129316759
model: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')
X_train legth: 69826
X_test legth: 34912
roc_auc_score : 0.8058260769935839
average_precision_score:  0.738837595