This file is used to read the output of CHTC and save the result as readable files.

In [1]:
import json
import numpy as np

In [2]:
# read the result of CV into a directory
# require the direct output of CHTC and will generate a file named "res.txt"
nlabel = 4
supervised_res = "./submit_CHTC/supervised/MOFA{}.txt" # from 1 to 75
unsupervised_res = "./submit_CHTC/unsupervised/MOFA_un{}.txt" # from 1 to 75
save_to = "./CV_res/res.txt"

supervised = {}
unsupervised = {}

for k0 in range(1, 16):
    k1 = k0
    
    cur= {}
    start_file = (k0 - 1) * 5 + 1
    for i in range(0,5):
        with open(supervised_res.format(str(start_file+i))) as f:
            x = f.read()
        cur[i] = json.loads(x)
    supervised[k0] = cur

    cur= {}
    start_file = (k0 - 1) * 5 + 1
    for i in range(0,5):
        with open(unsupervised_res.format(str(start_file+i))) as f:
            x = f.read()
        cur[i] = json.loads(x)
        # cur[i]["accuracy"] = sum(cur[i]["prediction"]["Y"] == Y[indexes[i]])/len(indexes[i])
    unsupervised[k0] = cur

res = {"supervised": supervised, "unsupervised": unsupervised}

with open(save_to, "w") as f:
    f.write(json.dumps(res))

In [3]:
# save the prediction for CV
# each record contains: index, k0, true label, prediction, method
# require res.txt, which is a file contains all result

indexes = [[1, 15, 13, 12,  8, 25, 28, 50, 41, 35, 34, 54],
[16,  3, 18, 19, 21, 30, 24 ,51, 47, 46, 33, 55],
[17,  0, 11 , 6 ,29 ,23 ,44 ,39 ,43 ,40, 53],
[10,  4,  9 , 2 ,26 ,27 ,38 ,36, 32, 42 ,52],
[5 ,22 ,20, 14 , 7 ,31 ,45, 37, 48, 49, 56]]
nlabel = 4

res_file = "./CV_res/res.txt"
data_file = "../data/cleaned_data.csv"
save_to = "./CV_res/prediction.csv"

with open(res_file, "r") as f:
    x = f.read()
res = json.loads(x)

with open(data_file,"r") as f:
    x = f.readlines()
X = []
Y = []
for i in range(1, len(x)):
    tem = x[i].strip().split(",")
    X.append([float(j) for j in tem[1:-1]])
    Y.append(int(tem[-1]))
X = np.array(X)
Y = np.array(Y)

header_label = "index, k0, true, prediction, method\n"
for meth in ["supervised", "unsupervised"]:
    for i in range(1,16):
        for no_fold in range(5):
            cur = res[meth][str(i)][str(no_fold)]["prediction"]["Y"] # where the prediction is saved
            for j in range(len(indexes[no_fold])):
                ind_sample = indexes[no_fold][j] # the index of this sample
                true_label = Y[ind_sample]
                pre_label = cur[j]
                infos = [ind_sample, i, true_label, pre_label, meth]
                header_label += ",".join([str(info) for info in infos]) + "\n"

with open(save_to, "w") as f:
    f.write(header_label)    





In [4]:
# read the result for whole set 
# this result is mainly for analyze the memberships

nlabel = 4
supervised_res = "./submit_CHTC/supervised/MOFA_whole{}.txt" # from 1 to 15
unsupervised_res = "./submit_CHTC/unsupervised/MOFA_un_whole{}.txt" # from 1 to 15
save_to = "./wholeset_res/res.txt"

supervised = {}
unsupervised = {}

for k0 in range(1, 16):
    with open(supervised_res.format(str(k0))) as f:
        x = f.read()
    supervised[k0] = json.loads(x)
    with open(unsupervised_res.format(str(k0))) as f:
        x = f.read()
    unsupervised[k0] = json.loads(x)

res = {"supervised": supervised, "unsupervised": unsupervised}

with open(save_to, "w") as f:
    f.write(json.dumps(res))


In [6]:
# read the model for whole set
# for supervised model, result contains: Mu, S, G, U
# for unsupervised model, result contains: Mu, S, U

res_file = "./wholeset_res/res.txt"
data_file = "../data/cleaned_data.csv"
save_to = "./wholeset_res/{}_{}_{}.txt" # method, k0, parameter name

# read result
with open(res_file, "r") as f:
    x = f.read()
res = json.loads(x)

for k0 in range(1, 16):
    # save topics
    for meth in ["supervised", "unsupervised"]:
        estimate = res[meth][str(k0)]["estimation"]
        Mu, Lambda = [estimate["Mu1"], estimate["Mu2"], estimate["Mu3"]], [estimate["Lambda1"], estimate["Lambda2"], estimate["Lambda3"]]
        Mu, Lambda = [np.array(mus) for mus in Mu], [np.array(lambdas) for lambdas in Lambda]
        Mu = np.concatenate((Mu[0], Mu[1], Mu[2]), axis=1)
        Lambda = np.concatenate((Lambda[0], Lambda[1], Lambda[2]), axis=1)
        S = 1/Lambda
        np.savetxt(save_to.format(meth,str(k0),"Mu"), Mu, delimiter=',', header='',fmt="%.5f")
        np.savetxt(save_to.format(meth,str(k0),"S"), S, delimiter=',', header='',fmt="%.5f")

    # save membership
    # supervised
    meth = "supervised"
    G = np.array(res[meth][str(k0)]["estimation"]["G"])
    np.savetxt(save_to.format(meth,str(k0),"G"), G, delimiter=',', header='',fmt="%.5f")
    U = np.array(res[meth][str(k0)]["test"]["U"])
    np.savetxt(save_to.format(meth,str(k0),"U"), U, delimiter=',', header='',fmt="%.5f")
    # unsupervised
    meth = "unsupervised"
    U = np.array(res[meth][str(k0)]["estimation"]["U"])
    np.savetxt(save_to.format(meth,str(k0),"U"), U, delimiter=',', header='',fmt="%.5f")




        


In [9]:
print(res.keys())
print(res['supervised'].keys())
print(res['unsupervised'].keys())
print(res['supervised']["1"].keys())
print(res['unsupervised']["1"].keys())
print(res['supervised']["1"]["estimation"].keys())
print(res['supervised']["1"]["test"].keys())
print(res['unsupervised']["1"]["estimation"].keys())

dict_keys(['supervised', 'unsupervised'])
dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'])
dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'])
dict_keys(['estimation', 'prediction', 'test'])
dict_keys(['estimation', 'prediction'])
dict_keys(['a', 'rho', 'G', 'Mu1', 'Mu2', 'Mu3', 'Lambda1', 'Lambda2', 'Lambda3'])
dict_keys(['U'])
dict_keys(['a', 'rho', 'U', 'Mu1', 'Mu2', 'Mu3', 'Lambda1', 'Lambda2', 'Lambda3'])


In [8]:
# simulate x from learned model

sim_N = 30
nlabel = 4
res_file = "./wholeset_res/res.txt"
N = 57
save_to = "./simulated/{}_{}.csv" # method k0


with open(res_file, "r") as f:
    x = f.read()
res = json.loads(x)


for k0 in range(1, 16):
    k1 = k0

    # supervised
    T = []
    for i in range(nlabel):
        tem = np.block([
            [np.zeros((k0*i,k0+k1))],
            [np.eye(k0), np.zeros((k0, k1))],
            [np.zeros((k0*(nlabel-i-1),k0+k1))],
            [np.zeros((k1,k0)), np.eye(k1)]
        ])
        T.append(tem)
    
    sim_x = np.array([list(range(241))]) # len(each row) = 241
    estimate = res["supervised"][str(k0)]["estimation"]
    Mu, Lambda = [estimate["Mu1"], estimate["Mu2"], estimate["Mu3"]], [estimate["Lambda1"], estimate["Lambda2"], estimate["Lambda3"]]
    Mu, Lambda = [np.array(mus) for mus in Mu], [np.array(lambdas) for lambdas in Lambda]
    Mu = np.concatenate((Mu[0], Mu[1], Mu[2]), axis=1)
    Lambda = np.concatenate((Lambda[0], Lambda[1], Lambda[2]), axis=1)
    Tau = Mu * Lambda
    G = res["supervised"][str(k0)]["prediction"]["G"]
    Ybar = res["supervised"][str(k0)]["prediction"]["Y"]
    U = np.array([np.dot(T[Ybar[i]], G[i]) for i in range(len(Ybar))])
    for j in range(N):
            u = U[j]
            tem = np.zeros((sim_N, 241)) + j
            for k in range(0,240):
                lambdax = np.dot(u, Lambda[:, k])
                taux = np.dot(u, Tau[:, k])
                sx = 1/lambdax
                mux = sx*taux
                tem[:,k] = np.random.normal(mux, np.sqrt(sx),sim_N)
            sim_x = np.concatenate((sim_x, tem), axis = 0)
    np.savetxt(save_to.format("supervised",str(k0)), sim_x[1:,:], delimiter=",",fmt="%.2f")

    sim_x = np.array([list(range(241))]) # len(each row) = 241
    estimate = res["unsupervised"][str(k0)]["estimation"]
    Mu, Lambda = [estimate["Mu1"], estimate["Mu2"], estimate["Mu3"]], [estimate["Lambda1"], estimate["Lambda2"], estimate["Lambda3"]]
    Mu, Lambda = [np.array(mus) for mus in Mu], [np.array(lambdas) for lambdas in Lambda]
    Mu = np.concatenate((Mu[0], Mu[1], Mu[2]), axis=1)
    Lambda = np.concatenate((Lambda[0], Lambda[1], Lambda[2]), axis=1)
    Tau = Mu * Lambda
    U = np.array(res["unsupervised"][str(k0)]["prediction"]["U"])
    for j in range(N):
            u = U[j]
            tem = np.zeros((sim_N, 241)) + j
            for k in range(0,240):
                lambdax = np.dot(u, Lambda[:, k])
                taux = np.dot(u, Tau[:, k])
                sx = 1/lambdax
                mux = sx*taux
                tem[:,k] = np.random.normal(mux, np.sqrt(sx),sim_N)
            sim_x = np.concatenate((sim_x, tem), axis = 0)
    np.savetxt(save_to.format("unsupervised",str(k0)), sim_x[1:,:], delimiter=",",fmt="%.2f")