-
Notifications
You must be signed in to change notification settings - Fork 5
/
rnaseqSQLreport
executable file
·102 lines (85 loc) · 2.29 KB
/
rnaseqSQLreport
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env ruby
$VERBOSE=nil
require 'optimist'
require 'sqlite3'
require 'csv'
$:.unshift File.dirname(File.expand_path($0))
require 'rnaseq_lib'
ARGV.push("--help") if ARGV.empty?
opts = Optimist::options do
banner File.basename($0)
opt :exp, "experiment name", :required=>:true, :type=>:string
opt :db, "sqlite database name", :required=>true, :type=>:string
end
if File.exists?(opts.db)
db = SQLite3::Database.new(opts.db)
else
db = nil
STDERR << opts.db << " not found!\n"
exit(1)
end
org = db.get_first_value("SELECT org FROM experiments WHERE name = ?", opts.exp)
if !org
STDERR << opts.exp << " not found!\n"
STDERR << "Existing Samples:\n"
db.query("SELECT name FROM experiments").each do |row|
STDERR << row[0] << "\n"
end
exit(1)
end
samples = []
counts = Hash.new
rpkms = Hash.new
db.query("SELECT sample, gene, counts, rpkm FROM gene_counts WHERE experiment = ?", opts.exp).each do |row|
sample, gene, count, rpkm = row
gene = gene.to_i
samples.push(sample) if !samples.include?(sample)
counts[gene] = Hash.new if !counts[gene]
rpkms[gene] = Hash.new if !rpkms[gene]
counts[gene][sample] = count
rpkms[gene][sample] = rpkm
end
samples.sort!
sortsamp = []
db.query("SELECT sample FROM sample_order WHERE experiment = ? ORDER BY order_num", opts.exp).each do |row|
sortsamp.push(row.first)
end
samples = sortsamp if !sortsamp.empty?
avrpkm = Hash.new
rpkms.keys.each do |gene|
total = 0
rpkms[gene].keys.each do |sample|
total += rpkms[gene][sample].to_f
end
avrpkm[gene] = total/samples.size
end
ann = Hash.new
ann_fields = []
if org == "pt3"
ann = Hash.new
db.query("SELECT * FROM pt2pt3").each do |row|
pt3 = row[1].to_i
ann_fields = row.fields if ann_fields.empty?
row.delete_at(1)
ann[pt3] = row
end
ann_fields.delete_at(1)
end
header = ["gene_id"] + ["Avg_RPKM"] + samples.collect{|x| x + "_RPKM"} + samples.collect{|x| x + "_Count"} + ann_fields
print header.to_csv
counts.keys.sort {|x,y| avrpkm[y] <=>avrpkm[x]}.each do |gene|
row = [gene]
row.push(sprintf("%8.2f", avrpkm[gene]))
samples.each do |sample|
row.push(rpkms[gene][sample].to_f)
end
samples.each do |sample|
row.push(counts[gene][sample].to_i)
end
begin
row += ann[gene]
rescue
STDERR << gene << "\n"
end
print row.to_csv
end