Throw warnings for misconfigurations

carlini · Oct 15, 2018 · fc322d0 · fc322d0
1 parent 9f81cb5
commit fc322d0
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 8 deletions.
diff --git a/README b/README
@@ -2,22 +2,17 @@ This is the code corresponding to the paper
 "Audio Adversarial Examples: Targeted Attacks on Speech-to-Text"
 Nicholas Carlini and David Wagner
 
-This release of the code is preliminary; it includes the CTC-based attack with a
-batch size of 1. This means it requires a slightly larger distortion, and is
-slower to run than the algorithm that is presented in the paper. Soon (TM) I
-will add these two improvements to this codebase.
-
 To generate adversarial examples for your own files, follow the below process
 and modify the arguments to attack,py. Ensure that the file is sampled at
-16KHz. You may want to modify the number of iterations that the attack algorithm
-is allowed to run.
+16KHz and uses signed 16-bit ints as the data type. You may want to modify
+the number of iterations that the attack algorithm is allowed to run.
 
 
 Instructions for basic use:
 
 1. Install the dependencies
 
-pip3 install --user numpy scipy tensorflow-gpu pandas python_speech_features
+pip3 install --user numpy scipy tensorflow-gpu==1.8.0 pandas python_speech_features
 
 2. Clone the Mozilla DeepSpeech repository into a folder called DeepSpeech:
 
@@ -50,3 +45,12 @@ pip3 install deepspeech-gpu
 8. Classify the generated phrase
 
 deepspeech models/output_graph.pb adversarial.wav models/alphabet.txt
+
+
+---
+
+WARNING: THE CODE TO HOOK INTO DEEPSPEECH IS UGLY. This means I require a
+very specific version of DeepSpeech (0.1.1) and TensorFlow (1.8.0) using
+python 3.5. I can't promise it won't set your computer on fire if you use
+any other versioning setup. (In particular, it WILL NOT work with
+DeepSpeech 0.2.0+, and WILL NOT work with TensorFlow 1.10+.)
diff --git a/attack.py b/attack.py
@@ -342,6 +342,7 @@ def main():
         for i in range(len(args.input)):
             fs, audio = wav.read(args.input[i])
             assert fs == 16000
+            assert audio.dtype == np.int16
             print('source dB', 20*np.log10(np.max(np.abs(audio))))
             audios.append(list(audio))
             lengths.append(len(audio))

diff --git a/make_checkpoint.py b/make_checkpoint.py
@@ -9,12 +9,33 @@
 import numpy as np
 import tensorflow as tf
 
+if tf.__version__ != "1.8.0":
+    print("-"*80)
+    print("-"*80)
+    print("WARNING")
+    print("It looks like you have the wrong version of DeepSpeech installed.")
+    print("Please ensure you are using TensorFlow 1.8.0")
+    print("Everything may or may not work otherwise.")
+    print("-"*80)
+    print("-"*80)
+
 import sys
 sys.path.append("DeepSpeech")
 
 from util.audio import audiofile_to_input_vector
 from util.text import ctc_label_dense_to_sparse
 
+import binascii
+if binascii.crc32(open("DeepSpeech/DeepSpeech.py","rb").read()) != 1142193310:
+    print("-"*80)
+    print("-"*80)
+    print("WARNING")
+    print("It looks like you have the wrong version of DeepSpeech installed.")
+    print("Please ensure you are using DeepSpeech 0.1.1")
+    print("Everything may or may not work otherwise.")
+    print("-"*80)
+    print("-"*80)
+
 # Okay, so this is ugly. We don't want DeepSpeech to crash
 # when we haven't built the language model.
 # So we're just going to monkeypatch TF and make it a no-op.

diff --git a/sample.wav b/sample.wav