Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 168 lines (131 sloc) 4.611 kb
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
1 #!/usr/bin/env python
2
3 import sys, os
4 import numpy as np
5
6 from util import mrag, page
117fc201 »
2011-09-20 Allow page splitter to use multiple finders.
7 from finders import *
0d228cfb »
2011-09-09 Remove printit use from pagesplit.py
8 from printer import do_print
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
9
5dc57b1b »
2012-02-08 Fix missing config import
10 import config
0c9e866f »
2012-02-20 Fix pagesplitter for new finder code.
11 import finders
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
12
13 class PageWriter(object):
14 def __init__(self, outdir):
15 self.outdir = outdir
3b2d0699 »
2012-01-29 This is a new algorithm for splitting pages which handles pages that …
16 self.count = 1
efa2a681 »
2011-09-12 Improve debugging output on page splitter.
17 self.bad = 0
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
18
19 def write_page(self, ps):
20 if ps[0].me or ps[0].pe:
efa2a681 »
2011-09-12 Improve debugging output on page splitter.
21 self.bad += 1
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
22 else:
23 m = str(ps[0].m)
24 p = '%02x' % ps[0].p
25 path = os.path.join('.', self.outdir, m)
26 if not os.path.isdir(path):
27 os.makedirs(path)
28 f = os.path.join(path, p)
29 of = file(f, 'ab')
30 for p in ps:
31 of.write(p.tt)
32 of.close()
efa2a681 »
2011-09-12 Improve debugging output on page splitter.
33 if self.count % 50 == 0:
34 print f, '- ',
35 print do_print(np.fromstring(ps[0].tt, dtype=np.uint8)), "%4.1f" % (100.0*self.count/(self.count+self.bad))
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
36 self.count += 1
37
c62e5129 »
2011-09-12 Remove junk from page splitter.
38
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
39
40 class PacketHolder(object):
c62e5129 »
2011-09-12 Remove junk from page splitter.
41
42 sequence = 0
43
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
44 def __init__(self, tt):
c62e5129 »
2011-09-12 Remove junk from page splitter.
45
46 self.sequence = PacketHolder.sequence
47 PacketHolder.sequence += 1
48
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
49 (self.m,self.r),e = mrag(np.fromstring(tt[:2], dtype=np.uint8))
117fc201 »
2011-09-20 Allow page splitter to use multiple finders.
50 match = False
d0b551cb »
2012-02-19 Tweak finders and output individual files
51 F = finders.test(finders.all_headers, tt)
52 if F:
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
53 self.r = 0
117fc201 »
2011-09-20 Allow page splitter to use multiple finders.
54 F.check_page_info()
55 self.me = False #F.me
56 self.pe = False #F.pe
57 self.p = F.p
58 match = True
0c9e866f »
2012-02-20 Fix pagesplitter for new finder code.
59
117fc201 »
2011-09-20 Allow page splitter to use multiple finders.
60 if not match and self.r == 0:
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
61 self.r = -1
62 self.m = -1
63 self.tt = tt
c62e5129 »
2011-09-12 Remove junk from page splitter.
64 self.used = False
65
66
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
67
68 class NullHandler(object):
c62e5129 »
2011-09-12 Remove junk from page splitter.
69 def __init__(self):
70 self.highest_packet = 100000000
71
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
72 def add_packet(self, p):
c62e5129 »
2011-09-12 Remove junk from page splitter.
73 pass
74
75
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
76
77 class MagHandler(object):
c62e5129 »
2011-09-12 Remove junk from page splitter.
78 packet_order = [0, 27, 1, 2, 3, 4, 5, 6, 7, 8, 9,
79 10, 11, 12, 13, 14, 15, 16, 17,
80 18, 19, 20, 21, 22, 23, 24]
81 pol = len(packet_order)
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
82
83 def __init__(self, m, pagewriter):
84 self.m = m
85 self.packets = []
86 self.seen_header = False
87 self.pagewriter = pagewriter
88
89 def good_page(self):
90 self.pagewriter.write_page(self.packets)
91 self.packets = []
92
93 def bad_page(self):
efa2a681 »
2011-09-12 Improve debugging output on page splitter.
94 self.pagewriter.bad += 1
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
95 self.packets = []
96
3b2d0699 »
2012-01-29 This is a new algorithm for splitting pages which handles pages that …
97 def check_page1(self):
96c90092 »
2011-09-12 Try to trim extra packets to make a good page.
98 if len(self.packets) >= MagHandler.pol:
99 self.packets = self.packets[:MagHandler.pol]
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
100 rows = [p.r for p in self.packets]
c62e5129 »
2011-09-12 Remove junk from page splitter.
101 c = [a1 == b1 for a1,b1 in zip(rows,MagHandler.packet_order)].count(True)
3b2d0699 »
2012-01-29 This is a new algorithm for splitting pages which handles pages that …
102 if c >= (MagHandler.pol - 2): # flawless subpage
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
103 self.good_page()
104 return
105
106 self.bad_page()
107
3b2d0699 »
2012-01-29 This is a new algorithm for splitting pages which handles pages that …
108 def fill_missing(self):
109 rows = [p.r for p in self.packets]
110 ans = []
111 for n in MagHandler.packet_order:
112 try:
113 ans.append(self.packets[rows.index(n)])
114 except ValueError:
115 ans.append(PacketHolder("\x00"*42))
116 self.packets = ans
117
118 def check_page(self):
119 self.packets[0].good = True
120 highgood = 0
121 badcount = 0
122 for n in range(1, len(self.packets)-1):
123 a = (self.packets[n].ro - self.packets[n-1].ro)
124 b = (self.packets[n+1].ro - self.packets[n].ro)
125 c = (self.packets[n].r == MagHandler.packet_order[-1])
126 if badcount < 20 and self.packets[n].ro > highgood and (c or a == 1 or b == 1 or (a > 0 and b > 0)):
127 self.packets[n].good = True
128 highgood = self.packets[n].ro
129 else:
130 self.packets[n].good = False
131 badcount += 1
132 self.packets[-1].good = self.packets[-1].ro > highgood
133
134 self.packets = [p for p in self.packets if p.good]
135 if len(self.packets) > (MagHandler.pol*0.5):
136 self.fill_missing()
137 self.good_page()
138 else:
139 self.bad_page()
140
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
141 def add_packet(self, p):
142 if p.r == 0:
3b2d0699 »
2012-01-29 This is a new algorithm for splitting pages which handles pages that …
143 if self.seen_header:
144 self.check_page()
145 else:
146 self.bad_page()
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
147 self.seen_header = True
3b2d0699 »
2012-01-29 This is a new algorithm for splitting pages which handles pages that …
148 p.ro = MagHandler.packet_order.index(p.r)
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
149 self.packets.append(p)
150
151
152
153 if __name__=='__main__':
154
155 w = PageWriter(sys.argv[1])
156
aa115b1b »
2012-01-29 Add a configuration file for tweakable parameters etc. This should si…
157 mags = [MagHandler(n, w) if n in config.magazines else NullHandler() for n in range(8)]
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
158
159 packet_list = []
160
161 while(True):
162 tt = sys.stdin.read(42)
163 if len(tt) < 42:
164 exit(0)
165
166 p = PacketHolder(tt)
c62e5129 »
2011-09-12 Remove junk from page splitter.
167 if p.r in MagHandler.packet_order:
22066236 »
2011-09-09 Page splitter: this takes a stream of packets and splits it into pages.
168 mags[p.m].add_packet(p)
c62e5129 »
2011-09-12 Remove junk from page splitter.
169
Something went wrong with that request. Please try again.