In [1]:
import pandas as pd
import os, re
import numpy as np
import math

from mhystic.preprocessing import log_meas, select_by_allele, meas2binary, unify_alleles

In [2]:
path = "./models_local/cnn.128.64_bal/"
hists = os.listdir(path)[1:-3]
lists = []
for i in hists:
    lists.append(i.split("."))
    
alleles = np.array(lists)[:, 2]

f1s = {}
for i, j in zip(hists, alleles):
    cur_f1 = np.genfromtxt(path+i)[-1]
    f1s[j.split(",")[0]] = cur_f1

In [3]:
bdata = pd.read_csv("./data/bdata.2009.tsv", sep="\t")
blind = pd.read_csv("./data/blind.tsv", sep="\t")

bdata.mhc = bdata.mhc.apply(unify_alleles)
blind.mhc = blind.mhc.apply(unify_alleles)

bdata.meas = bdata.meas.apply(log_meas)
blind.meas = blind.meas.apply(log_meas)

In [4]:
f1_df = pd.DataFrame(columns=["mhc", "f1", "bind_train", "nonbind_train", "bind_test", "nonbind_test"])

for cur_allele in list(f1s.keys()):
    
    cur_data_train = select_by_allele(bdata, cur_allele)
    cur_data_test = select_by_allele(blind, cur_allele)
    
    binary_meas_train = np.array(meas2binary(cur_data_train.meas))
    binary_meas_test = np.array(meas2binary(cur_data_test.meas))
    
    zeros_train, ones_train = len(np.where(binary_meas_train == False)[0]), len(np.where(binary_meas_train == True)[0])
    zeros_test, ones_test = len(np.where(binary_meas_test == False)[0]), len(np.where(binary_meas_test == True)[0])

    cur_row = [cur_allele, f1s[cur_allele], ones_train, zeros_train, ones_test, zeros_test]
    f1_df.loc[len(f1_df)] = cur_row

In [5]:
f1_df

Unnamed: 0,mhc,f1,bind_train,nonbind_train,bind_test,nonbind_test
0,HLAA0101,0.666667,512.0,3213.0,99.0,597.0
1,HLAA0201,0.890739,3150.0,6415.0,1148.0,978.0
2,HLAA0202,0.76087,1758.0,2161.0,44.0,82.0
3,HLAA0203,0.93254,1953.0,3589.0,426.0,225.0
4,HLAA0206,0.817248,1797.0,3030.0,419.0,263.0
5,HLAA0301,0.870482,1533.0,4608.0,486.0,325.0
6,HLAA1101,0.861985,1758.0,3641.0,341.0,382.0
7,HLAA2301,0.75,398.0,1623.0,172.0,219.0
8,HLAA2402,0.701031,420.0,2113.0,232.0,341.0
9,HLAA2501,0.5,66.0,453.0,5.0,411.0


In [6]:
f1_df.to_csv("./data/local_models_f1s.csv", index=False)