-
Notifications
You must be signed in to change notification settings - Fork 2
/
pCRE_criteria_rank.py
executable file
·53 lines (41 loc) · 1.26 KB
/
pCRE_criteria_rank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import sys
import numpy as np
import pandas as pd
for i in range (1,len(sys.argv),2):
if sys.argv[i] == "-df": # Df of pCRE criteria
DF = sys.argv[i+1]
if sys.argv[i] == "-key": # Key with column name \t If best value is high or low
KEY = sys.argv[i+1]
if len(sys.argv) <= 1:
print(__doc__)
exit()
# Load Criteria Dataframe
df = pd.read_csv(DF, sep='\t', header =0, index_col = 0)
# Get all the column names you want to rank
key = {}
with open(KEY, 'r') as f:
for l in f:
column, up_down = l.strip().split('\t')
key[column] = up_down
# Adds any GO term columns since those will have different names depending on the cluster
for c in list(df):
if "GO_" in c and "_%" in c:
key[c] = 'high'
ranks = pd.DataFrame(index = df.index.values, columns = key.keys())
for col in key:
if key[col] == 'low':
r_low = df[col].rank(ascending = True)
ranks[col] = r_low
elif key[col] == 'high':
r_high = df[col].rank(ascending = False)
ranks[col] = r_high
elif key[col] == 'binary':
print('hello')
r_high = df[col].rank(ascending = False)
ranks[col] = r_high
else:
print("error with: " + col)
#print(df.head())
print(ranks.head())
name = DF + '_Rank'
ranks.to_csv(name, sep = '\t', na_rep = 'na')