Skip to content

Commit

Permalink
Revert "Update to use pytesseract rather than do the subprocess stuff…
Browse files Browse the repository at this point in the history
… ourselves"

This reverts commit f82757a.
  • Loading branch information
gb119 committed Aug 1, 2018
1 parent f82757a commit cc4e3b9
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ install:
- source activate test-environment
# This will ensure we collect all the necessary dependencies so long as master doesn't change them!
- conda install --yes stoner
- conda install --yes pytesseract
# Now we unistall the stable stoner
- conda remove --yes stoner
- pip install opencv_python
Expand Down
27 changes: 24 additions & 3 deletions Stoner/Image/kerr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
GRAY_RANGE=(0,65535) #2^16
IM_SIZE=(512,672) #Standard Kerr image size
AN_IM_SIZE=(554,672) #Kerr image with annotation not cropped
pattern_file=os.path.join(os.path.dirname(__file__),"kerr_patterns.txt")

class KerrArray(ImageArray):

Expand Down Expand Up @@ -155,18 +156,38 @@ def _tesseract_image(self, im, key):
key is the metadata key we're trying to find, it may give a
hint for parsing the text generated.
"""
#first set up temp files to work with
tmpdir=tempfile.mkdtemp()
textfile=os.path.join(tmpdir,'tmpfile.txt')
stdoutfile=os.path.join(tmpdir,'logfile.txt')
imagefile=os.path.join(tmpdir,'tmpim.tif')
with open(textfile,'w') as tf:#open a text file to export metadata to temporarily
pass

#process image to make it easier to read
i=1.0*im / np.max(im) #change to float and normalise
i=exposure.rescale_intensity(i,in_range=(0.49,0.5)) #saturate black and white pixels
i=exposure.rescale_intensity(i) #make sure they're black and white
i=transform.rescale(i, 5.0,mode="constant",multichannel=False, anti_aliasing=True) #rescale to get more pixels on text
data=pytesseract.image_to_string(i)
i=transform.rescale(i, 5.0,mode="constant") #rescale to get more pixels on text
io.imsave(imagefile,(255.0*i).astype("uint8"),plugin='pil') #python imaging library will save according to file extension

#call tesseract
if self.tesseractable:
with open(stdoutfile,"w") as stdout:
subprocess.call(['tesseract', imagefile, textfile[:-4]],stdout=stdout,stderr=subprocess.STDOUT) #adds '.txt' extension itself
os.unlink(stdoutfile)
with open(textfile,'r') as tf:
data=tf.readline()

#delete the temp files
os.remove(textfile)
os.remove(imagefile)
os.rmdir(tmpdir)

#parse the reading
if len(data)==0:
print('No data read for {}'.format(key))
data=self._parse_text(data, key=key)
print(key,data)
return data

def _get_scalebar(self):
Expand Down
Binary file modified tests/Stoner/Image/coretestdata/testsave.tiff
Binary file not shown.

0 comments on commit cc4e3b9

Please sign in to comment.