In [72]:
# define hosts for which model should be built
hosts = ["A", "B"]

In [73]:
# create a untrained classifier for each host
from classification.ElkanotoSVCClassifier import ElkanotoSVCClassifier

classifiers = {}
for host in hosts:
    classifiers[host] = ElkanotoSVCClassifier()

In [74]:
# load datasets
import numpy as np

# read in file
featureFilePath = "mock_features.csv"

featuresPerHost = {}
for host in hosts:
    featuresPerHost[host] = []

# read line by line
with open(featureFilePath, "r") as file:
    for line in file:
        # split line
        parts = line.split(",")
        host = parts[0]
        virus = parts[1]
        features = parts[2:]
        features = [float(feature) for feature in features]
        featuresPerHost[host].append(features)

for host in hosts:
    featuresPerHost[host] = np.array(featuresPerHost[host])

In [75]:
# transform features into input data
X = np.concatenate([featuresPerHost[host] for host in hosts])

def getYForHost(h):
    y = []
    for host in hosts:
        y += [1 if host == h else 0] * len(featuresPerHost[host])

    return np.array(y)

In [98]:
# run k-fold cross validation for each host
from sklearn.model_selection import KFold, cross_validate
import numpy as np

scores = {}
for host in hosts:
    # load the training data
    X = X
    y = getYForHost(host)

    # run k-fold cross validation
    cv = KFold(n_splits=5, shuffle=True)
    scores[host] = cross_validate(classifiers[host], X, y, cv=cv, scoring=['balanced_accuracy', 'recall', 'precision'])

In [99]:
# print out the metrics we want to see using the scores
scores

{'A': {'fit_time': array([0.00281906, 0.0013957 , 0.00198793, 0.00200129, 0.00100255]),
  'score_time': array([0.00508285, 0.0051415 , 0.00584149, 0.00300026, 0.00403333]),
  'test_balanced_accuracy': array([1., 1., 1., 1., 1.]),
  'test_recall': array([1., 1., 1., 1., 1.]),
  'test_precision': array([1., 1., 1., 1., 1.])},
 'B': {'fit_time': array([0.00099826, 0.00202584, 0.00099373, 0.00101185, 0.00200987]),
  'score_time': array([0.00545621, 0.00300026, 0.00445247, 0.00414681, 0.00300479]),
  'test_balanced_accuracy': array([1., 1., 1., 1., 1.]),
  'test_recall': array([1., 1., 1., 1., 1.]),
  'test_precision': array([1., 1., 1., 1., 1.])}}