Using new config file and format, cleanup and optimizations

bishoph · Dec 2, 2017 · 6739f9c · 6739f9c
1 parent 8507ae6
commit 6739f9c
Show file tree

Hide file tree

Showing 21 changed files with 258 additions and 301 deletions.
diff --git a/readme.md b/readme.md
@@ -115,6 +115,8 @@ Usage:
  -d --delete [word]  : delete [word] from dictionary and exits.
                        '*' deletes everything!
 
+ -i --ini    [file]  : use alternative configuration file
+
  -a --analysis       : show dictionary analysis and exits.
 
  -u --unit           : run unit tests

diff --git a/sopare.py b/sopare.py
@@ -19,9 +19,9 @@
 
 import sys
 import getopt
+import sopare.config as config
 import sopare.util as util
 import sopare.recorder as recorder
-import sopare.hatch as hatch
 import sopare.log as log
 import test.unit_tests as tests
 from sopare.version import __version__
@@ -35,14 +35,18 @@ def main(argv):
     plot = False
     wave = False
     error = False
+    cfg_ini = None
+
+    recreate = False
+    unit = False
 
     print ("sopare "+__version__)
 
     if (len(argv) > 0):
         try:
-            opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:",
+            opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:i:",
              ["analysis", "help", "error", "loop", "plot", "verbose", "wave", "create", "overview", "unit",
-              "show=", "write=", "read=", "train=", "delete="
+              "show=", "write=", "read=", "train=", "delete=", "ini="
              ])
         except getopt.GetoptError:
             usage()
@@ -66,8 +70,7 @@ def main(argv):
             if (opt in ("-~", "--wave")):
                 wave = True
             if opt in ("-c", "--create"):
-                recreate_dict(debug)
-                sys.exit(0)
+                recreate = True
             if opt in ("-o", "--overview"):
                 show_dict_ids(debug)
                 sys.exit(0)
@@ -86,56 +89,77 @@ def main(argv):
             if opt in ("-d", "--delete"):
                 delete_word(arg, debug)
                 sys.exit(0)
+            if opt in ("-i", "--ini"):
+                cfg_ini = arg
             if opt in ("-u", "--unit"):
-                unit_tests(debug)
-                sys.exit(0)
+                unit = True
+
+    cfg = create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error)
 
+    if (recreate == True):
+        recreate_dict(debug, cfg)
+        sys.exit(0)
 
-    hatched = hatch.hatch()
-    hatched.add("endless_loop", endless_loop)
-    hatched.add("debug", debug)
-    hatched.add("plot", plot)
-    hatched.add("wave", wave)
-    hatched.add("outfile", outfile)
-    hatched.add("infile",infile )
-    hatched.add("dict", dict)
-    logger = log.log(debug, error)
-    hatched.add("logger", logger)
-    recorder.recorder(hatched)
+    if (unit == True):
+        unit_tests(debug, cfg)
+        sys.exit(0)
 
-def recreate_dict(debug):
+
+    recorder.recorder(cfg)
+
+def create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error):
+    if (cfg_ini == None):
+        cfg = config.config()
+    else:
+        cfg = config.config(cfg_ini)
+    logger = log.log(debug, error, cfg)
+    cfg.addsection('cmdlopt')
+    cfg.setoption('cmdlopt', 'endless_loop', str(endless_loop))
+    cfg.setoption('cmdlopt', 'debug', str(debug))
+    cfg.setoption('cmdlopt', 'plot', str(plot))
+    cfg.setoption('cmdlopt', 'wave', str(wave))
+    cfg.setoption('cmdlopt', 'outfile', outfile)
+    cfg.setoption('cmdlopt', 'infile', infile)
+    cfg.setoption('cmdlopt', 'dict', dict)
+    cfg.addlogger(logger)
+    return cfg
+
+def recreate_dict(debug, cfg):
     print ("recreating dictionary from raw input files...")
-    utilities = util.util(debug)
+    utilities = util.util(debug, cfg.getfloatoption('characteristic', 'PEAK_FACTOR'))
     utilities.recreate_dict_from_raw_files()
 
 def delete_word(dict, debug):
     if (dict != "*"):
         print ("deleting "+dict+" from dictionary")
     else:
         print ("deleting all enttries from dictionary")
-    utilities = util.util(debug)
+    utilities = util.util(debug, None)
     utilities.deletefromdict(dict)
 
 def show_word_entries(dict, debug):
     print (dict+" entries in dictionary:")
     print
-    utilities = util.util(debug)
+    utilities = util.util(debug, None)
     utilities.showdictentry(dict)
 
 def show_dict_ids(debug):
     print ("current entries in dictionary:")
-    utilities = util.util(debug)
+    utilities = util.util(debug, None)
     utilities.showdictentriesbyid()
 
 def show_dict_analysis(debug):
     print ("dictionary analysis:")
-    utilities = util.util(debug)
-    print (utilities.compile_analysis(utilities.getDICT()))
-
-def unit_tests(debug):
+    utilities = util.util(debug, None)
+    analysis = utilities.compile_analysis(utilities.getDICT())
+    for id in analysis:
+        print (id)
+        for k, v in analysis[id].iteritems():
+            print (' ' + str(k) + ' ' + str(v))
+
+def unit_tests(debug, cfg):
     print ("starting unit tests...")
-    utilities = util.util(debug)
-    tests.unit_tests(debug)
+    tests.unit_tests(debug, cfg)
     print ("done.")
 
 def usage():
@@ -156,6 +180,7 @@ def usage():
     print (" -t --train  [word]  : add raw data to raw dictionary file\n")
     print (" -d --delete [word]  : delete [word] from dictionary and exits.")
     print ("                       '*' deletes everything!\n")
+    print (" -i --ini    [file]  : use alternative configuration file\n")
     print (" -a --analysis       : show dictionary analysis and exits.\n")
     print (" -u --unit           : run unit tests\n")
 

diff --git a/sopare/analyze.py b/sopare/analyze.py
@@ -19,7 +19,6 @@
 
 from operator import itemgetter
 import sopare.characteristics
-import sopare.config
 import sopare.stm
 import sopare.path
 import sopare.util
@@ -29,13 +28,13 @@
 
 class analyze():
 
-    def __init__(self, debug):
-        self.debug = debug
-        self.characteristic = sopare.characteristics.characteristic(debug)
-        self.util = sopare.util.util(debug)
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.debug = self.cfg.getbool('cmdlopt', 'debug')
+        self.util = sopare.util.util(self.debug, self.cfg.getfloatoption('characteristic', 'PEAK_FACTOR'))
         self.learned_dict = self.util.getDICT()
         self.dict_analysis = self.util.compile_analysis(self.learned_dict)
-        self.stm = sopare.stm.short_term_memory(debug)
+        self.stm = sopare.stm.short_term_memory(self.cfg)
         self.plugins = [ ]
         self.load_plugins()
         self.last_results = None
@@ -67,18 +66,18 @@ def framing(self, results, data_length):
             for i, row in enumerate(results[id]):
                 row = self.row_validation(row, id)
                 row_result = sum(row[0:len(row)]) / self.dict_analysis[id]['min_tokens']
-                if (row_result >= sopare.config.MARGINAL_VALUE):
+                if (row_result >= self.cfg.getfloatoption('compare', 'MARGINAL_VALUE')):
                     arr.append([row_result, i, id])
                 else:
                     logging.debug('removing '+id + ' from potential start position '+str(i) + ' bc MARGINAL_VALUE > ' +str(row_result))
         sorted_arr = sorted(arr, key=itemgetter(0), reverse = True)
         for el in sorted_arr:
-            if (el[1] not in framing[el[2]] and (sopare.config.MAX_WORD_START_RESULTS == 0 or len(framing[el[2]]) < sopare.config.MAX_WORD_START_RESULTS)):
+            if (el[1] not in framing[el[2]] and (self.cfg.getintoption('compare', 'MAX_WORD_START_RESULTS') == 0 or len(framing[el[2]]) < self.cfg.getintoption('compare', 'MAX_WORD_START_RESULTS'))):
                 framing[el[2]].append(el[1])
         return framing
 
     def row_validation(self, row, id):
-        if (row[0] == 0 or len(row) <= sopare.config.MIN_START_TOKENS):
+        if (row[0] == 0 or len(row) <= self.cfg.getintoption('compare', 'MIN_START_TOKENS')):
             return [ 0 ] * len(row)
         return row
 
@@ -95,27 +94,27 @@ def deep_search(self, framing, data):
         for match in framing_match:
             sorted_framing_match = sorted(match, key=lambda x: (x[1] + x[2], -x[0]))
             nobm = 1
-            if (hasattr(sopare.config, 'NUMBER_OF_BEST_MATCHES') and sopare.config.NUMBER_OF_BEST_MATCHES > 0):
-                nobm = sopare.config.NUMBER_OF_BEST_MATCHES
+            if (self.cfg.hasoption('compare', 'NUMBER_OF_BEST_MATCHES') and self.cfg.getintoption('compare', 'NUMBER_OF_BEST_MATCHES') > 0):
+                nobm = self.cfg.getintoption('compare', 'NUMBER_OF_BEST_MATCHES')
             for x in range(0, nobm):
                if (x < len(sorted_framing_match)):
                    best_match.append(sorted_framing_match[x])
         sorted_best_match = sorted(best_match, key=lambda x: (x[1] +  x[2], -x[0]))
         self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n'])
         for i, best in enumerate(sorted_best_match):
-            if (best[0] >= sopare.config.MIN_CROSS_SIMILARITY and best[1] <= sopare.config.MIN_LEFT_DISTANCE and best[2] <= sopare.config.MIN_RIGHT_DISTANCE):
+            if (best[0] >= self.cfg.getfloatoption('compare', 'MIN_CROSS_SIMILARITY') and best[1] <= self.cfg.getfloatoption('compare', 'MIN_LEFT_DISTANCE') and best[2] <= self.cfg.getfloatoption('compare', 'MIN_RIGHT_DISTANCE')):
                 for x in range(best[3], best[3] + best[4]):
                     if (match_results[x] == ''):
                         match_results[x] = best[5]
-            if (sopare.config.MAX_TOP_RESULTS > 0 and i > sopare.config.MAX_TOP_RESULTS):
+            if (self.cfg.getintoption('compare', 'MAX_TOP_RESULTS') > 0 and i > self.cfg.getintoption('compare', 'MAX_TOP_RESULTS')):
                 break
         self.debug_info += str(match_results).join(['match_results: ', '\n\n'])
         return match_results
 
     def token_sim(self, characteristic, dcharacteristic):
-        sim_norm = self.util.similarity(characteristic['norm'], dcharacteristic['norm']) * sopare.config.SIMILARITY_NORM
-        sim_token_peaks = self.util.similarity(characteristic['token_peaks'], dcharacteristic['token_peaks']) * sopare.config.SIMILARITY_HEIGHT
-        sim_df = self.util.single_similarity(characteristic['df'], dcharacteristic['df']) * sopare.config.SIMILARITY_DOMINANT_FREQUENCY
+        sim_norm = self.util.similarity(characteristic['norm'], dcharacteristic['norm']) * self.cfg.getfloatoption('compare', 'SIMILARITY_NORM')
+        sim_token_peaks = self.util.similarity(characteristic['token_peaks'], dcharacteristic['token_peaks']) * self.cfg.getfloatoption('compare', 'SIMILARITY_HEIGHT')
+        sim_df = self.util.single_similarity(characteristic['df'], dcharacteristic['df']) * self.cfg.getfloatoption('compare', 'SIMILARITY_DOMINANT_FREQUENCY')
         sim = sim_norm + sim_token_peaks + sim_df
         sl, sr = self.util.manhatten_distance(characteristic['norm'], dcharacteristic['norm'])
         return sim, sl, sr
@@ -146,12 +145,12 @@ def deep_inspection(self, id, startpos, data):
                         c += 1.0
                 if (c > 0):
                     token_sim[0] = token_sim[0] / c
-                    if (token_sim[0] > 1.0 and c >= sopare.config.MIN_START_TOKENS and c >= self.dict_analysis[id]['min_tokens']):
+                    if (token_sim[0] > 1.0 and c >= self.cfg.getintoption('compare', 'MIN_START_TOKENS') and c >= self.dict_analysis[id]['min_tokens']):
                         logging.warning('Your calculation basis seems to be wrong as we get results > 1.0!')
                     token_sim[1] = token_sim[1] / c
                     token_sim[2] = token_sim[2] / c
                     token_sim[4] = int(c)
-                if ((sopare.config.STRICT_LENGTH_CHECK == False and c >= sopare.config.MIN_START_TOKENS ) or c >= self.dict_analysis[id]['min_tokens'] - sopare.config.STRICT_LENGTH_UNDERMINING):
+                if ((self.cfg.getbool('compare', 'STRICT_LENGTH_CHECK') == False and c >= self.cfg.getintoption('compare', 'MIN_START_TOKENS')) or c >= self.dict_analysis[id]['min_tokens'] - self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING')):
                     word_sim.append(token_sim)
         return word_sim
 
@@ -166,7 +165,7 @@ def get_match(self, framing):
                     match_results = self.validate_match_result(framing[s:], s, x, match_results)
             elif (x == len(framing)-1):
                 match_results = self.validate_match_result(framing[s:], s, x, match_results)
-        if (framing.count('') > len(framing) * sopare.config.FILL_RESULT_PERCENTAGE):
+        if (framing.count('') > len(framing) * self.cfg.getfloatoption('compare', 'FILL_RESULT_PERCENTAGE')):
             if (self.debug):
                 self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results'
             return [ ] * len(match_results)
@@ -175,7 +174,7 @@ def get_match(self, framing):
     def validate_match_result(self, result, start, end, match_results):
         if (len(result) == 0 or result[0] == ''):
             return match_results
-        if (sopare.config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - sopare.config.STRICT_LENGTH_UNDERMINING or len(result) > self.dict_analysis[result[0]]['max_tokens'])):
+        if (self.cfg.getbool('compare', 'STRICT_LENGTH_CHECK') == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING') or len(result) > self.dict_analysis[result[0]]['max_tokens'])):
             if (self.debug):
                 self.debug_info += 'STRICT_LENGTH_CHECK failed for '+result[0] + ': ' + str(self.dict_analysis[result[0]]['min_tokens']) + ' > ' + str(len(result)) + ' < ' + str(self.dict_analysis[result[0]]['max_tokens']) + '\n'
             match_results.append('')

diff --git a/sopare/audio_factory.py b/sopare/audio_factory.py
@@ -19,13 +19,12 @@
 
 import pyaudio
 import logging
-import sopare.hatch
 
 class audio_factory():
 
-    def __init__(self, hatch):
-        self.hatch = hatch
-        self.logger = self.hatch.get('logger').getlog()
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.logger = self.cfg.getlogger().getlog()
         self.logger = logging.getLogger(__name__)
         self.stream = None
         self.pa = pyaudio.PyAudio()
@@ -43,7 +42,7 @@ def open(self, sample_rate, input_format=pyaudio.paInt16):
                 rate=sample_rate,
                 input=True,
                 output=False,
-                frames_per_buffer = sopare.config.CHUNK)
+                frames_per_buffer = self.cfg.getintoption('stream', 'CHUNK'))
         except IOError as e:
             self.logger.error("Error: " + str(e))
             return None

diff --git a/sopare/buffering.py b/sopare/buffering.py
@@ -20,26 +20,25 @@
 import multiprocessing
 import logging
 import sopare.processing
-import sopare.hatch
 
 class buffering(multiprocessing.Process):
 
-    def __init__(self, hatch, queue):
+    def __init__(self, cfg, queue):
         multiprocessing.Process.__init__(self, name="buffering queue")
-        self.hatch = hatch
+        self.cfg = cfg
         self.queue = queue
-        self.proc = sopare.processing.processor(hatch, self)
+        self.proc = sopare.processing.processor(self.cfg, self)
         self.PROCESS_ROUND_DONE = False
         self.test_counter = 0
-        self.logger = self.hatch.get('logger').getlog()
+        self.logger = self.cfg.getlogger().getlog()
         self.logger = logging.getLogger(__name__)
         self.start()
 
     def run(self):
         self.logger.info("buffering queue runner")
         while True:
             buf = self.queue.get()
-            if ((self.hatch.get('endless_loop') == False or self.hatch.get('outfile') != None) and self.PROCESS_ROUND_DONE):
+            if ((self.cfg.getbool('cmdlopt', 'endless_loop') == False or self.cfg.getoption('cmdlopt', 'outfile') != None) and self.PROCESS_ROUND_DONE):
                 break
             self.proc.check_silence(buf)
         self.logger.info("terminating queue runner")

diff --git a/sopare/characteristics.py b/sopare/characteristics.py
@@ -18,13 +18,11 @@
 """
 
 import numpy
-import sopare.config
-import sopare.hatch
 
 class characteristic:
 
-    def __init__(self, hatch):
-        self.hatch = hatch
+    def __init__(self, peak_factor):
+        self.peak_factor = peak_factor
 
     def getcharacteristic(self, fft, chunked_norm, meta):
         fft = numpy.abs(fft)
@@ -33,7 +31,7 @@ def getcharacteristic(self, fft, chunked_norm, meta):
         fc = 0
         peaks = [ ]
         if (len(chunked_norm) > 0):
-            where_range = numpy.mean(chunked_norm) / sopare.config.PEAK_FACTOR
+            where_range = numpy.mean(chunked_norm) / self.peak_factor
             peaks = list(numpy.array(numpy.where(chunked_norm > where_range))[0])
             where_range = numpy.mean(chunked_norm)
             npeaks = numpy.array(numpy.where(chunked_norm > where_range))