From 3b75304f38def66410222f8394f7f39e5ef1aee3 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Thu, 30 Nov 2017 10:39:54 +0100
Subject: [PATCH 1/9] first working tensorflow version

---
 speech_recognition/__init__.py | 48 ++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 26ec8118..644a19de 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1201,6 +1201,54 @@ def recognize_ibm(self, audio_data, username, password, language="en-US", show_a
                     transcription.append(hypothesis["transcript"])
         return "\n".join(transcription)
 
+    def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_actions_frozen.pb', tensor_label='tensorflow-data/conv_actions_labels.txt', show_all=False):
+        """
+        Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).
+
+        Tensor loaded from ``tensor_graph``.
+
+        Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__ as a JSON dictionary.
+
+        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+        """
+        assert isinstance(audio_data, AudioData), "Data must be audio data"
+        assert isinstance(tensor_graph, str), "``tensor_graph`` must be a string"
+        assert isinstance(tensor_label, str), "``tensor_label`` must be a string"
+
+        import tensorflow as tf
+        from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
+
+        wav_data = audio_data.get_wav_data(
+            convert_rate=16000, convert_width=2
+        )
+        # load graph
+        with tf.gfile.FastGFile(tensor_graph, 'rb') as f:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+            tf.import_graph_def(graph_def, name='')
+
+        labels = [line.rstrip() for line in tf.gfile.GFile(tensor_label)]
+
+        with tf.Session() as sess:
+            input_layer_name = 'wav_data:0'
+            output_layer_name = 'labels_softmax:0'
+            num_top_predictions = 1
+    # Feed the audio data as input to the graph.
+    #   predictions  will contain a two-dimensional array, where one
+    #   dimension represents the input image count, and the other has
+    #   predictions per class
+            softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
+            predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data})
+
+    # Sort to show labels in order of confidence
+            top_k = predictions.argsort()[-num_top_predictions:][::-1]
+            for node_id in top_k:
+                human_string = labels[node_id]
+                score = predictions[node_id]
+                print('%s (score = %.5f)' % (human_string, score))
+                return human_string
+
+
 
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""

From d46c28855c4df951405be348c28e6073e0c18f12 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Fri, 1 Dec 2017 12:06:53 +0100
Subject: [PATCH 2/9] tensorflow returns only best label

---
 speech_recognition/__init__.py | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 644a19de..31887f4b 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1201,22 +1201,23 @@ def recognize_ibm(self, audio_data, username, password, language="en-US", show_a
                     transcription.append(hypothesis["transcript"])
         return "\n".join(transcription)
 
-    def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_actions_frozen.pb', tensor_label='tensorflow-data/conv_actions_labels.txt', show_all=False):
+    def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_actions_frozen.pb', tensor_label='tensorflow-data/conv_actions_labels.txt'):
         """
         Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).
 
-        Tensor loaded from ``tensor_graph``.
+        Path to Tensor loaded from ``tensor_graph``. You can download a model here: http://download.tensorflow.org/models/speech_commands_v0.01.zip
 
-        Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__ as a JSON dictionary.
-
-        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+        Path to Tensor Labels file loaded from ``tensor_label``.
         """
         assert isinstance(audio_data, AudioData), "Data must be audio data"
         assert isinstance(tensor_graph, str), "``tensor_graph`` must be a string"
         assert isinstance(tensor_label, str), "``tensor_label`` must be a string"
 
-        import tensorflow as tf
-        from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
+        try:
+            import tensorflow as tf
+            from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
+        except ImportError:
+            raise RequestError("missing tensorflow module: ensure that tensorflow is set up correctly.")
 
         wav_data = audio_data.get_wav_data(
             convert_rate=16000, convert_width=2
@@ -1226,30 +1227,22 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
             graph_def = tf.GraphDef()
             graph_def.ParseFromString(f.read())
             tf.import_graph_def(graph_def, name='')
-
+        # load labels
         labels = [line.rstrip() for line in tf.gfile.GFile(tensor_label)]
 
         with tf.Session() as sess:
             input_layer_name = 'wav_data:0'
             output_layer_name = 'labels_softmax:0'
             num_top_predictions = 1
-    # Feed the audio data as input to the graph.
-    #   predictions  will contain a two-dimensional array, where one
-    #   dimension represents the input image count, and the other has
-    #   predictions per class
             softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
             predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data})
 
-    # Sort to show labels in order of confidence
-            top_k = predictions.argsort()[-num_top_predictions:][::-1]
+            # Sort labels in order of confidence
+            top_k = predictions.argsort()[-1:][::-1]
             for node_id in top_k:
                 human_string = labels[node_id]
-                score = predictions[node_id]
-                print('%s (score = %.5f)' % (human_string, score))
                 return human_string
 
-
-
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
     flac_converter = shutil_which("flac")  # check for installed version first

From 890f1f4c488cfc3f4933b31f217c0f32c8c39421 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Fri, 1 Dec 2017 12:25:36 +0100
Subject: [PATCH 3/9] remove unused imports and vars

---
 speech_recognition/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 31887f4b..fb4f27f6 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1215,7 +1215,6 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
 
         try:
             import tensorflow as tf
-            from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
         except ImportError:
             raise RequestError("missing tensorflow module: ensure that tensorflow is set up correctly.")
 
@@ -1233,7 +1232,6 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
         with tf.Session() as sess:
             input_layer_name = 'wav_data:0'
             output_layer_name = 'labels_softmax:0'
-            num_top_predictions = 1
             softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
             predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data})
 

From b4507e6c7f4c9fe321dcab722341c424661ccba5 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Fri, 1 Dec 2017 14:57:36 +0100
Subject: [PATCH 4/9] unused import is needed

---
 speech_recognition/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index fb4f27f6..86850305 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1215,6 +1215,7 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
 
         try:
             import tensorflow as tf
+            from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
         except ImportError:
             raise RequestError("missing tensorflow module: ensure that tensorflow is set up correctly.")
 

From f5e72fb1a994f8df9484edb2da1700dc053e4e19 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Tue, 5 Dec 2017 11:04:26 +0100
Subject: [PATCH 5/9] load graph once

---
 speech_recognition/__init__.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 86850305..a67fd249 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1201,6 +1201,9 @@ def recognize_ibm(self, audio_data, username, password, language="en-US", show_a
                     transcription.append(hypothesis["transcript"])
         return "\n".join(transcription)
 
+    lasttfgraph = ''
+    tflabels = None
+
     def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_actions_frozen.pb', tensor_label='tensorflow-data/conv_actions_labels.txt'):
         """
         Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).
@@ -1219,16 +1222,20 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
         except ImportError:
             raise RequestError("missing tensorflow module: ensure that tensorflow is set up correctly.")
 
+        if not (tensor_graph == self.lasttfgraph):
+            self.lasttfgraph = tensor_graph
+            
+            # load graph
+            with tf.gfile.FastGFile(tensor_graph, 'rb') as f:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(f.read())
+                tf.import_graph_def(graph_def, name='')
+            # load labels
+            self.tflabels = [line.rstrip() for line in tf.gfile.GFile(tensor_label)]
+        
         wav_data = audio_data.get_wav_data(
             convert_rate=16000, convert_width=2
         )
-        # load graph
-        with tf.gfile.FastGFile(tensor_graph, 'rb') as f:
-            graph_def = tf.GraphDef()
-            graph_def.ParseFromString(f.read())
-            tf.import_graph_def(graph_def, name='')
-        # load labels
-        labels = [line.rstrip() for line in tf.gfile.GFile(tensor_label)]
 
         with tf.Session() as sess:
             input_layer_name = 'wav_data:0'
@@ -1239,7 +1246,7 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
             # Sort labels in order of confidence
             top_k = predictions.argsort()[-1:][::-1]
             for node_id in top_k:
-                human_string = labels[node_id]
+                human_string = self.tflabels[node_id]
                 return human_string
 
 def get_flac_converter():

From 06c9353e78a26cc071d9ce577aaea81b26793fd1 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Wed, 6 Dec 2017 15:38:07 +0100
Subject: [PATCH 6/9] removed spaces in blank lines

---
 speech_recognition/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index f7a1eeae..ca101dfe 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1200,7 +1200,7 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
 
         if not (tensor_graph == self.lasttfgraph):
             self.lasttfgraph = tensor_graph
-            
+
             # load graph
             with tf.gfile.FastGFile(tensor_graph, 'rb') as f:
                 graph_def = tf.GraphDef()
@@ -1208,7 +1208,7 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
                 tf.import_graph_def(graph_def, name='')
             # load labels
             self.tflabels = [line.rstrip() for line in tf.gfile.GFile(tensor_label)]
-        
+
         wav_data = audio_data.get_wav_data(
             convert_rate=16000, convert_width=2
         )
@@ -1225,6 +1225,7 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
                 human_string = self.tflabels[node_id]
                 return human_string
 
+
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
     flac_converter = shutil_which("flac")  # check for installed version first

From 5b2f5208a30a8f9ef412fffa75e03152acbf7592 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Mon, 11 Dec 2017 11:18:27 +0100
Subject: [PATCH 7/9] no unused import

---
 speech_recognition/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index ca101dfe..fbf1ccfb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1194,7 +1194,6 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
 
         try:
             import tensorflow as tf
-            from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
         except ImportError:
             raise RequestError("missing tensorflow module: ensure that tensorflow is set up correctly.")
 

From 513959cf85c5439ef1ceca279006abdc9044f759 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Mon, 11 Dec 2017 11:19:52 +0100
Subject: [PATCH 8/9] added example for tensorflow

---
 examples/tensorflow_commands.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 examples/tensorflow_commands.py

diff --git a/examples/tensorflow_commands.py b/examples/tensorflow_commands.py
new file mode 100644
index 00000000..5028f5d8
--- /dev/null
+++ b/examples/tensorflow_commands.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import time
+import speech_recognition as sr
+from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
+
+# obtain audio from the microphone
+r = sr.Recognizer()
+m = sr.Microphone()
+
+with m as source:
+    r.adjust_for_ambient_noise(source)
+
+def callback(recognizer, audio):
+    try:
+        # You can download the data here: http://download.tensorflow.org/models/speech_commands_v0.01.zip
+        spoken = recognizer.recognize_tensorflow(audio, tensor_graph='speech_recognition/tensorflow-data/conv_actions_frozen.pb', tensor_label='speech_recognition/tensorflow-data/conv_actions_labels.txt')
+        print(spoken)
+    except sr.UnknownValueError:
+        print("Tensorflow could not understand audio")
+    except sr.RequestError as e:
+        print("Could not request results from Tensorflow service; {0}".format(e))
+
+stop_listening = r.listen_in_background(m, callback, phrase_time_limit=0.6)
+time.sleep(100)
\ No newline at end of file

From 0a7bf7cb2b3ad3af6635535670f2f5a639211fc3 Mon Sep 17 00:00:00 2001
From: Christian <chriamue@gmail.com>
Date: Mon, 11 Dec 2017 11:35:09 +0100
Subject: [PATCH 9/9] remove linter errors

---
 examples/tensorflow_commands.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/tensorflow_commands.py b/examples/tensorflow_commands.py
index 5028f5d8..50306c6d 100644
--- a/examples/tensorflow_commands.py
+++ b/examples/tensorflow_commands.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 import time
 import speech_recognition as sr
-from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
+from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio # noqa
 
 # obtain audio from the microphone
 r = sr.Recognizer()
@@ -10,6 +10,7 @@
 with m as source:
     r.adjust_for_ambient_noise(source)
 
+
 def callback(recognizer, audio):
     try:
         # You can download the data here: http://download.tensorflow.org/models/speech_commands_v0.01.zip
@@ -20,5 +21,6 @@ def callback(recognizer, audio):
     except sr.RequestError as e:
         print("Could not request results from Tensorflow service; {0}".format(e))
 
+
 stop_listening = r.listen_in_background(m, callback, phrase_time_limit=0.6)
-time.sleep(100)
\ No newline at end of file
+time.sleep(100)