Skip to content

Commit

Permalink
Using new config file and format, cleanup and optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
bishoph committed Dec 2, 2017
1 parent 8507ae6 commit 6739f9c
Show file tree
Hide file tree
Showing 21 changed files with 258 additions and 301 deletions.
2 changes: 2 additions & 0 deletions readme.md
Expand Up @@ -115,6 +115,8 @@ Usage:
-d --delete [word] : delete [word] from dictionary and exits.
'*' deletes everything!
-i --ini [file] : use alternative configuration file
-a --analysis : show dictionary analysis and exits.
-u --unit : run unit tests
Expand Down
83 changes: 54 additions & 29 deletions sopare.py
Expand Up @@ -19,9 +19,9 @@

import sys
import getopt
import sopare.config as config
import sopare.util as util
import sopare.recorder as recorder
import sopare.hatch as hatch
import sopare.log as log
import test.unit_tests as tests
from sopare.version import __version__
Expand All @@ -35,14 +35,18 @@ def main(argv):
plot = False
wave = False
error = False
cfg_ini = None

recreate = False
unit = False

print ("sopare "+__version__)

if (len(argv) > 0):
try:
opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:",
opts, args = getopt.getopt(argv, "ahelpv~cous:w:r:t:d:i:",
["analysis", "help", "error", "loop", "plot", "verbose", "wave", "create", "overview", "unit",
"show=", "write=", "read=", "train=", "delete="
"show=", "write=", "read=", "train=", "delete=", "ini="
])
except getopt.GetoptError:
usage()
Expand All @@ -66,8 +70,7 @@ def main(argv):
if (opt in ("-~", "--wave")):
wave = True
if opt in ("-c", "--create"):
recreate_dict(debug)
sys.exit(0)
recreate = True
if opt in ("-o", "--overview"):
show_dict_ids(debug)
sys.exit(0)
Expand All @@ -86,56 +89,77 @@ def main(argv):
if opt in ("-d", "--delete"):
delete_word(arg, debug)
sys.exit(0)
if opt in ("-i", "--ini"):
cfg_ini = arg
if opt in ("-u", "--unit"):
unit_tests(debug)
sys.exit(0)
unit = True

cfg = create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error)

if (recreate == True):
recreate_dict(debug, cfg)
sys.exit(0)

hatched = hatch.hatch()
hatched.add("endless_loop", endless_loop)
hatched.add("debug", debug)
hatched.add("plot", plot)
hatched.add("wave", wave)
hatched.add("outfile", outfile)
hatched.add("infile",infile )
hatched.add("dict", dict)
logger = log.log(debug, error)
hatched.add("logger", logger)
recorder.recorder(hatched)
if (unit == True):
unit_tests(debug, cfg)
sys.exit(0)

def recreate_dict(debug):

recorder.recorder(cfg)

def create_config(cfg_ini, endless_loop, debug, plot, wave, outfile, infile, dict, error):
if (cfg_ini == None):
cfg = config.config()
else:
cfg = config.config(cfg_ini)
logger = log.log(debug, error, cfg)
cfg.addsection('cmdlopt')
cfg.setoption('cmdlopt', 'endless_loop', str(endless_loop))
cfg.setoption('cmdlopt', 'debug', str(debug))
cfg.setoption('cmdlopt', 'plot', str(plot))
cfg.setoption('cmdlopt', 'wave', str(wave))
cfg.setoption('cmdlopt', 'outfile', outfile)
cfg.setoption('cmdlopt', 'infile', infile)
cfg.setoption('cmdlopt', 'dict', dict)
cfg.addlogger(logger)
return cfg

def recreate_dict(debug, cfg):
print ("recreating dictionary from raw input files...")
utilities = util.util(debug)
utilities = util.util(debug, cfg.getfloatoption('characteristic', 'PEAK_FACTOR'))
utilities.recreate_dict_from_raw_files()

def delete_word(dict, debug):
if (dict != "*"):
print ("deleting "+dict+" from dictionary")
else:
print ("deleting all enttries from dictionary")
utilities = util.util(debug)
utilities = util.util(debug, None)
utilities.deletefromdict(dict)

def show_word_entries(dict, debug):
print (dict+" entries in dictionary:")
print
utilities = util.util(debug)
utilities = util.util(debug, None)
utilities.showdictentry(dict)

def show_dict_ids(debug):
print ("current entries in dictionary:")
utilities = util.util(debug)
utilities = util.util(debug, None)
utilities.showdictentriesbyid()

def show_dict_analysis(debug):
print ("dictionary analysis:")
utilities = util.util(debug)
print (utilities.compile_analysis(utilities.getDICT()))

def unit_tests(debug):
utilities = util.util(debug, None)
analysis = utilities.compile_analysis(utilities.getDICT())
for id in analysis:
print (id)
for k, v in analysis[id].iteritems():
print (' ' + str(k) + ' ' + str(v))

def unit_tests(debug, cfg):
print ("starting unit tests...")
utilities = util.util(debug)
tests.unit_tests(debug)
tests.unit_tests(debug, cfg)
print ("done.")

def usage():
Expand All @@ -156,6 +180,7 @@ def usage():
print (" -t --train [word] : add raw data to raw dictionary file\n")
print (" -d --delete [word] : delete [word] from dictionary and exits.")
print (" '*' deletes everything!\n")
print (" -i --ini [file] : use alternative configuration file\n")
print (" -a --analysis : show dictionary analysis and exits.\n")
print (" -u --unit : run unit tests\n")

Expand Down
39 changes: 19 additions & 20 deletions sopare/analyze.py
Expand Up @@ -19,7 +19,6 @@

from operator import itemgetter
import sopare.characteristics
import sopare.config
import sopare.stm
import sopare.path
import sopare.util
Expand All @@ -29,13 +28,13 @@

class analyze():

def __init__(self, debug):
self.debug = debug
self.characteristic = sopare.characteristics.characteristic(debug)
self.util = sopare.util.util(debug)
def __init__(self, cfg):
self.cfg = cfg
self.debug = self.cfg.getbool('cmdlopt', 'debug')
self.util = sopare.util.util(self.debug, self.cfg.getfloatoption('characteristic', 'PEAK_FACTOR'))
self.learned_dict = self.util.getDICT()
self.dict_analysis = self.util.compile_analysis(self.learned_dict)
self.stm = sopare.stm.short_term_memory(debug)
self.stm = sopare.stm.short_term_memory(self.cfg)
self.plugins = [ ]
self.load_plugins()
self.last_results = None
Expand Down Expand Up @@ -67,18 +66,18 @@ def framing(self, results, data_length):
for i, row in enumerate(results[id]):
row = self.row_validation(row, id)
row_result = sum(row[0:len(row)]) / self.dict_analysis[id]['min_tokens']
if (row_result >= sopare.config.MARGINAL_VALUE):
if (row_result >= self.cfg.getfloatoption('compare', 'MARGINAL_VALUE')):
arr.append([row_result, i, id])
else:
logging.debug('removing '+id + ' from potential start position '+str(i) + ' bc MARGINAL_VALUE > ' +str(row_result))
sorted_arr = sorted(arr, key=itemgetter(0), reverse = True)
for el in sorted_arr:
if (el[1] not in framing[el[2]] and (sopare.config.MAX_WORD_START_RESULTS == 0 or len(framing[el[2]]) < sopare.config.MAX_WORD_START_RESULTS)):
if (el[1] not in framing[el[2]] and (self.cfg.getintoption('compare', 'MAX_WORD_START_RESULTS') == 0 or len(framing[el[2]]) < self.cfg.getintoption('compare', 'MAX_WORD_START_RESULTS'))):
framing[el[2]].append(el[1])
return framing

def row_validation(self, row, id):
if (row[0] == 0 or len(row) <= sopare.config.MIN_START_TOKENS):
if (row[0] == 0 or len(row) <= self.cfg.getintoption('compare', 'MIN_START_TOKENS')):
return [ 0 ] * len(row)
return row

Expand All @@ -95,27 +94,27 @@ def deep_search(self, framing, data):
for match in framing_match:
sorted_framing_match = sorted(match, key=lambda x: (x[1] + x[2], -x[0]))
nobm = 1
if (hasattr(sopare.config, 'NUMBER_OF_BEST_MATCHES') and sopare.config.NUMBER_OF_BEST_MATCHES > 0):
nobm = sopare.config.NUMBER_OF_BEST_MATCHES
if (self.cfg.hasoption('compare', 'NUMBER_OF_BEST_MATCHES') and self.cfg.getintoption('compare', 'NUMBER_OF_BEST_MATCHES') > 0):
nobm = self.cfg.getintoption('compare', 'NUMBER_OF_BEST_MATCHES')
for x in range(0, nobm):
if (x < len(sorted_framing_match)):
best_match.append(sorted_framing_match[x])
sorted_best_match = sorted(best_match, key=lambda x: (x[1] + x[2], -x[0]))
self.debug_info += str(sorted_best_match).join(['sorted_best_match: ', '\n\n'])
for i, best in enumerate(sorted_best_match):
if (best[0] >= sopare.config.MIN_CROSS_SIMILARITY and best[1] <= sopare.config.MIN_LEFT_DISTANCE and best[2] <= sopare.config.MIN_RIGHT_DISTANCE):
if (best[0] >= self.cfg.getfloatoption('compare', 'MIN_CROSS_SIMILARITY') and best[1] <= self.cfg.getfloatoption('compare', 'MIN_LEFT_DISTANCE') and best[2] <= self.cfg.getfloatoption('compare', 'MIN_RIGHT_DISTANCE')):
for x in range(best[3], best[3] + best[4]):
if (match_results[x] == ''):
match_results[x] = best[5]
if (sopare.config.MAX_TOP_RESULTS > 0 and i > sopare.config.MAX_TOP_RESULTS):
if (self.cfg.getintoption('compare', 'MAX_TOP_RESULTS') > 0 and i > self.cfg.getintoption('compare', 'MAX_TOP_RESULTS')):
break
self.debug_info += str(match_results).join(['match_results: ', '\n\n'])
return match_results

def token_sim(self, characteristic, dcharacteristic):
sim_norm = self.util.similarity(characteristic['norm'], dcharacteristic['norm']) * sopare.config.SIMILARITY_NORM
sim_token_peaks = self.util.similarity(characteristic['token_peaks'], dcharacteristic['token_peaks']) * sopare.config.SIMILARITY_HEIGHT
sim_df = self.util.single_similarity(characteristic['df'], dcharacteristic['df']) * sopare.config.SIMILARITY_DOMINANT_FREQUENCY
sim_norm = self.util.similarity(characteristic['norm'], dcharacteristic['norm']) * self.cfg.getfloatoption('compare', 'SIMILARITY_NORM')
sim_token_peaks = self.util.similarity(characteristic['token_peaks'], dcharacteristic['token_peaks']) * self.cfg.getfloatoption('compare', 'SIMILARITY_HEIGHT')
sim_df = self.util.single_similarity(characteristic['df'], dcharacteristic['df']) * self.cfg.getfloatoption('compare', 'SIMILARITY_DOMINANT_FREQUENCY')
sim = sim_norm + sim_token_peaks + sim_df
sl, sr = self.util.manhatten_distance(characteristic['norm'], dcharacteristic['norm'])
return sim, sl, sr
Expand Down Expand Up @@ -146,12 +145,12 @@ def deep_inspection(self, id, startpos, data):
c += 1.0
if (c > 0):
token_sim[0] = token_sim[0] / c
if (token_sim[0] > 1.0 and c >= sopare.config.MIN_START_TOKENS and c >= self.dict_analysis[id]['min_tokens']):
if (token_sim[0] > 1.0 and c >= self.cfg.getintoption('compare', 'MIN_START_TOKENS') and c >= self.dict_analysis[id]['min_tokens']):
logging.warning('Your calculation basis seems to be wrong as we get results > 1.0!')
token_sim[1] = token_sim[1] / c
token_sim[2] = token_sim[2] / c
token_sim[4] = int(c)
if ((sopare.config.STRICT_LENGTH_CHECK == False and c >= sopare.config.MIN_START_TOKENS ) or c >= self.dict_analysis[id]['min_tokens'] - sopare.config.STRICT_LENGTH_UNDERMINING):
if ((self.cfg.getbool('compare', 'STRICT_LENGTH_CHECK') == False and c >= self.cfg.getintoption('compare', 'MIN_START_TOKENS')) or c >= self.dict_analysis[id]['min_tokens'] - self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING')):
word_sim.append(token_sim)
return word_sim

Expand All @@ -166,7 +165,7 @@ def get_match(self, framing):
match_results = self.validate_match_result(framing[s:], s, x, match_results)
elif (x == len(framing)-1):
match_results = self.validate_match_result(framing[s:], s, x, match_results)
if (framing.count('') > len(framing) * sopare.config.FILL_RESULT_PERCENTAGE):
if (framing.count('') > len(framing) * self.cfg.getfloatoption('compare', 'FILL_RESULT_PERCENTAGE')):
if (self.debug):
self.debug_info += 'Results contain too many empty tokens. ' + str(framing.count('')) + ' / ' + str(len(framing)) + ' Eliminating results'
return [ ] * len(match_results)
Expand All @@ -175,7 +174,7 @@ def get_match(self, framing):
def validate_match_result(self, result, start, end, match_results):
if (len(result) == 0 or result[0] == ''):
return match_results
if (sopare.config.STRICT_LENGTH_CHECK == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - sopare.config.STRICT_LENGTH_UNDERMINING or len(result) > self.dict_analysis[result[0]]['max_tokens'])):
if (self.cfg.getbool('compare', 'STRICT_LENGTH_CHECK') == True and (len(result) < self.dict_analysis[result[0]]['min_tokens'] - self.cfg.getintoption('compare', 'STRICT_LENGTH_UNDERMINING') or len(result) > self.dict_analysis[result[0]]['max_tokens'])):
if (self.debug):
self.debug_info += 'STRICT_LENGTH_CHECK failed for '+result[0] + ': ' + str(self.dict_analysis[result[0]]['min_tokens']) + ' > ' + str(len(result)) + ' < ' + str(self.dict_analysis[result[0]]['max_tokens']) + '\n'
match_results.append('')
Expand Down
9 changes: 4 additions & 5 deletions sopare/audio_factory.py
Expand Up @@ -19,13 +19,12 @@

import pyaudio
import logging
import sopare.hatch

class audio_factory():

def __init__(self, hatch):
self.hatch = hatch
self.logger = self.hatch.get('logger').getlog()
def __init__(self, cfg):
self.cfg = cfg
self.logger = self.cfg.getlogger().getlog()
self.logger = logging.getLogger(__name__)
self.stream = None
self.pa = pyaudio.PyAudio()
Expand All @@ -43,7 +42,7 @@ def open(self, sample_rate, input_format=pyaudio.paInt16):
rate=sample_rate,
input=True,
output=False,
frames_per_buffer = sopare.config.CHUNK)
frames_per_buffer = self.cfg.getintoption('stream', 'CHUNK'))
except IOError as e:
self.logger.error("Error: " + str(e))
return None
Expand Down
11 changes: 5 additions & 6 deletions sopare/buffering.py
Expand Up @@ -20,26 +20,25 @@
import multiprocessing
import logging
import sopare.processing
import sopare.hatch

class buffering(multiprocessing.Process):

def __init__(self, hatch, queue):
def __init__(self, cfg, queue):
multiprocessing.Process.__init__(self, name="buffering queue")
self.hatch = hatch
self.cfg = cfg
self.queue = queue
self.proc = sopare.processing.processor(hatch, self)
self.proc = sopare.processing.processor(self.cfg, self)
self.PROCESS_ROUND_DONE = False
self.test_counter = 0
self.logger = self.hatch.get('logger').getlog()
self.logger = self.cfg.getlogger().getlog()
self.logger = logging.getLogger(__name__)
self.start()

def run(self):
self.logger.info("buffering queue runner")
while True:
buf = self.queue.get()
if ((self.hatch.get('endless_loop') == False or self.hatch.get('outfile') != None) and self.PROCESS_ROUND_DONE):
if ((self.cfg.getbool('cmdlopt', 'endless_loop') == False or self.cfg.getoption('cmdlopt', 'outfile') != None) and self.PROCESS_ROUND_DONE):
break
self.proc.check_silence(buf)
self.logger.info("terminating queue runner")
Expand Down
8 changes: 3 additions & 5 deletions sopare/characteristics.py
Expand Up @@ -18,13 +18,11 @@
"""

import numpy
import sopare.config
import sopare.hatch

class characteristic:

def __init__(self, hatch):
self.hatch = hatch
def __init__(self, peak_factor):
self.peak_factor = peak_factor

def getcharacteristic(self, fft, chunked_norm, meta):
fft = numpy.abs(fft)
Expand All @@ -33,7 +31,7 @@ def getcharacteristic(self, fft, chunked_norm, meta):
fc = 0
peaks = [ ]
if (len(chunked_norm) > 0):
where_range = numpy.mean(chunked_norm) / sopare.config.PEAK_FACTOR
where_range = numpy.mean(chunked_norm) / self.peak_factor
peaks = list(numpy.array(numpy.where(chunked_norm > where_range))[0])
where_range = numpy.mean(chunked_norm)
npeaks = numpy.array(numpy.where(chunked_norm > where_range))
Expand Down

0 comments on commit 6739f9c

Please sign in to comment.