-
Notifications
You must be signed in to change notification settings - Fork 1
/
functions.py
100 lines (86 loc) · 2.62 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import random
def quan_detector(dataset,label):
"""
the percent of zeros(n) promoter
"""
n = len(dataset[1])
N = len(dataset)
p = [0]*n
count = 0.
pos_count = 0.
neg_count = 0.
for i in range(N):
d = dataset[i]
if all(d == p):
count += 1.
if label[i][0] == 1:
pos_count += 1.
else:
neg_count += 1.
return count/N, pos_count,neg_count
#quan_detector([[0,0,0],[0,1,0],[0,2,0],[1,0,1]])
def most_repeared_promoter(dataset,label):
N = len(dataset)
n = len(dataset[1])
zeros = ','.join(map(str,['0']*n))
# print len(zeros)-n
dict_count = {}
for i in range(N):
str_prom = ','.join(map(str,dataset[i]))
if str_prom not in dict_count.keys():
dict_count[str_prom] = [1,0,0]
if label[i][0] == 1:
dict_count[str_prom][1] = 1
else:
dict_count[str_prom][2] = 1
else:
dict_count[str_prom][0] += 1
if label[i][0] == 1:
dict_count[str_prom][1] += 1
else:
dict_count[str_prom][2] += 1
if zeros in dict_count.keys():
dict_count.pop(zeros) # remove without
count = np.array(dict_count.values())[:,0]
max_count = max(count)
for k,v in dict_count.items():
if v[0] == max_count:
idx_temp = k
idx = idx_temp.split(',')
return idx, max_count, dict_count[idx_temp]
###############################################################
######### Dataset generation ##############
###############################################################
def indx(lab):
# lab = np.argmax(lab,axis=1)
p = [] # positive samples index-- ALS
n = [] # negative samples index-- Non-ALS
for i in range(len(lab)):
if lab[i] == 0:
p.append(i)
else:
n.append(i)
return p, n
def dataset(X, Y, test_ratio):
lab = np.argmax(Y, axis=1)
pos_s, neg_s = indx(lab)
N = len(lab)
idx = range(N)
N_te = int(N * test_ratio) / 5 * 5 # number of test samples
N_tr = N - N_te # number of training samples
pos_s_te = int(N_te * 0.5)
neg_s_te = int(N_te * 0.5)
random.shuffle(pos_s)
random.shuffle(neg_s)
pos_idx_te = pos_s[:pos_s_te]
neg_idx_te = neg_s[:neg_s_te]
te_idx = pos_idx_te + neg_idx_te
tr_idx = list(set(idx) - set(te_idx))
random.shuffle(te_idx)
random.shuffle(tr_idx)
tr_X = X[tr_idx]
tr_Y = Y[tr_idx]
te_X = X[te_idx]
te_Y = Y[te_idx]
return tr_X, tr_Y, te_X, te_Y