Adding pre-built networks to releases and updating README

git-svn-id: https://cups.cs.cmu.edu/chilisvn/neural-network-cracking/trunk@310 2726a97d-1bfb-4365-8d96-b87bb51a8088
cupslab · Mar 14, 2016 · 5f95858 · 5f95858
1 parent 5f269ba
commit 5f95858
Show file tree

Hide file tree

Showing 12 changed files with 355 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,7 @@
 *.pyc
 __pycache__
 build
-generator.so
+generator.*so
 pwd_guess_ctypes.c
 js/examples/*.json
 js/node_modules

diff --git a/README b/README
@@ -615,3 +615,33 @@ Example guessing configuration for a complex policy
         "save_model_versioned" : true
     }
 }
+
+
+Pretrained Usage
+----------------
+
+
+Enumerating passwords
+
+Edit guess_len8_config.json to replace "g1_len8.tsv" in the "enumerate_ofile"
+key with the output file you would like.
+
+If you want to guess more passwords, you should change the value of
+"lower_probability_threshold" to something lower, e.g. 1e-8.
+
+
+Monte Carlo Simulation
+
+Edit guess_len8_config.json to replace "g1_len8.tsv" in the "enumerate_ofile"
+key with the output file you would like. Edit "<input_file>" in the
+"password_test_fname" key to set the password input file. This file should
+point to a line-delimited password file where each line is one password.
+
+
+Command:
+
+python3 <path_to_root>/pwd_guess.py --config-args <config_file.json>
+
+e.g.:
+
+python3 ../pwd_guess.py --config-args guess_len8_config.json
diff --git a/pre_built_networks/arch.json b/pre_built_networks/arch.json
@@ -0,0 +1 @@
+{"name": "Sequential", "loss": "categorical_crossentropy", "class_mode": "categorical", "theano_mode": null, "layers": [{"input_dim": null, "activation": "tanh", "return_sequences": false, "input_shape": [10, 51], "inner_init": "orthogonal", "input_length": null, "truncate_gradient": -1, "inner_activation": "sigmoid", "name": "JZS2", "go_backwards": true, "trainable": false, "output_dim": 1000, "init": "glorot_uniform"}, {"name": "RepeatVector", "n": 1, "trainable": false}, {"input_dim": null, "return_sequences": true, "truncate_gradient": -1, "inner_init": "orthogonal", "input_length": null, "init": "glorot_uniform", "inner_activation": "sigmoid", "name": "JZS2", "trainable": false, "activation": "tanh", "output_dim": 1000, "go_backwards": true}, {"input_dim": null, "return_sequences": true, "truncate_gradient": -1, "inner_init": "orthogonal", "input_length": null, "init": "glorot_uniform", "inner_activation": "sigmoid", "name": "JZS2", "trainable": false, "activation": "tanh", "output_dim": 1000, "go_backwards": true}, {"name": "TimeDistributedDense", "W_constraint": null, "input_dim": null, "activation": "linear", "W_regularizer": null, "activity_regularizer": null, "b_regularizer": null, "init": "glorot_uniform", "b_constraint": null, "output_dim": 512, "input_length": null}, {"name": "TimeDistributedDense", "W_constraint": null, "input_dim": null, "activation": "linear", "W_regularizer": null, "activity_regularizer": null, "b_regularizer": null, "init": "glorot_uniform", "b_constraint": null, "output_dim": 51, "input_length": null}, {"name": "Activation", "beta": 0.1, "target": 0, "activation": "softmax"}], "optimizer": {"name": "Adam", "lr": 0.0010000000474974513, "beta_2": 0.999, "beta_1": 0.9, "epsilon": 1e-08}}
diff --git a/pre_built_networks/enumerate_config.json b/pre_built_networks/enumerate_config.json
@@ -0,0 +1,32 @@
+{
+    "args" : {
+        "arch_file" : "arch.json",
+        "weight_file" : "nn_len8.h5",
+        "log_file" : "guess_log.txt",
+        "enumerate_ofile" : "g1_enumerate.tsv"
+    },
+    "config" : {
+        "training_chunk" : 10000,
+        "min_len" : 8,
+        "max_len" : 30,
+        "context_length" : 10,
+        "chunk_print_interval" : 100,
+        "layers" : 2,
+        "hidden_size" : 1000,
+        "model_type" : "JZS2",
+        "simulated_frequency_optimization" : true,
+        "intermediate_fname" : "intermediate_data.sqlite",
+        "randomize_training_order" : true,
+        "uppercase_character_optimization" : true,
+        "rare_character_optimization" : true,
+        "rare_character_optimization_guessing" : true,
+        "parallel_guessing" : false,
+        "lower_probability_threshold" : 1e-8,
+        "chunk_size_guesser" : 20000,
+        "guess_serialization_method" : "human",
+        "random_walk_seed_num" : 100000,
+        "max_gpu_prediction_size" : 20000,
+        "random_walk_seed_iterations" : 1,
+        "no_end_word_cache" : true
+    }
+}
diff --git a/pre_built_networks/fine_tune_len8_config.json b/pre_built_networks/fine_tune_len8_config.json
@@ -0,0 +1,55 @@
+{
+    "args" : {
+        "arch_file" : "arch.json",
+        "weight_file" : "all_trained_len8.h5",
+        "log_file" : "fine_tune_long_train_log.txt",
+        "pwd_file" : [
+            "../leaks/all_combined_long_feeder_v2.utf8.shuffled.txt"
+        ],
+        "pwd_format" : [
+            "list"
+        ],
+        "retrain" : true,
+        "train_secondary_only" : true
+    },
+    "config" : {
+        "training_chunk" : 500,
+        "training_main_memory_chunk": 10000000,
+        "min_len" : 4,
+        "max_len" : 30,
+        "context_length" : 10,
+        "chunk_print_interval" : 100,
+        "layers" : 2,
+        "hidden_size" : 1000,
+        "generations" : 3,
+        "training_accuracy_threshold" : -1,
+        "train_test_ratio" : 20,
+        "model_type" : "JZS2",
+        "tokenize_words" : false,
+        "bidirectional_rnn" : false,
+        "train_backwards" : true,
+        "dense_layers" : 1,
+        "dense_hidden_size" : 512,
+        "secondary_training" : true,
+        "secondary_train_sets" : {
+            "pwd_file" : [
+                "../leaks/all_combined_long_feeder_v2.len8.shuffled.txt"
+            ],
+            "pwd_format" : [
+                "list"
+            ]
+        },
+        "simulated_frequency_optimization" : false,
+        "randomize_training_order" : true,
+        "uppercase_character_optimization" : true,
+        "rare_character_optimization" : true,
+        "rare_character_optimization_guessing" : true,
+        "chunk_size_guesser" : 40000,
+        "random_walk_seed_num" : 100000,
+        "max_gpu_prediction_size" : 20000,
+        "random_walk_seed_iterations" : 1,
+        "no_end_word_cache" : true,
+        "intermediate_fname" : "intermediate_data.sqlite",
+        "save_model_versioned" : true
+    }
+}
diff --git a/pre_built_networks/g1_enumerate.tsv b/pre_built_networks/g1_enumerate.tsv
diff --git a/pre_built_networks/guess_len8_config.json b/pre_built_networks/guess_len8_config.json
@@ -0,0 +1,51 @@
+{
+    "args" : {
+        "arch_file" : "arch.json",
+        "weight_file" : "nn_len8.h5",
+        "log_file" : "guess_log.txt",
+        "enumerate_ofile": "g1_len8.tsv"
+    },
+    "config" : {
+        "training_chunk" : 1000,
+        "training_main_memory_chunk": 10000000,
+        "min_len" : 8,
+        "max_len" : 30,
+        "context_length" : 10,
+        "chunk_print_interval" : 100,
+        "layers" : 2,
+        "hidden_size" : 1000,
+        "generations" : 3,
+        "training_accuracy_threshold" : -1,
+        "train_test_ratio" : 20,
+        "model_type" : "JZS2",
+        "tokenize_words" : false,
+        "train_backwards" : true,
+        "dense_layers" : 1,
+        "dense_hidden_size" : 512,
+        "secondary_training" : true,
+        "secondary_train_sets" : {
+            "pwd_file" : [
+                "../leaks/all_combined_long_v2.txt"
+            ],
+            "pwd_format" : [
+                "list"
+            ]
+        },
+        "simulated_frequency_optimization" : false,
+        "randomize_training_order" : true,
+        "uppercase_character_optimization" : true,
+        "rare_character_optimization" : true,
+        "rare_character_optimization_guessing" : true,
+        "parallel_guessing" : false,
+        "lower_probability_threshold" : 1e-7,
+        "chunk_size_guesser" : 40000,
+        "guess_serialization_method" : "delamico_random_walk",
+        "password_test_fname" : "<input_file>",
+        "random_walk_seed_num" : 100000,
+        "max_gpu_prediction_size" : 10000,
+        "random_walk_seed_iterations" : 50,
+        "no_end_word_cache" : true,
+        "intermediate_fname" : "intermediate_data.sqlite",
+        "save_model_versioned" : true
+    }
+}
diff --git a/pre_built_networks/guess_log.txt b/pre_built_networks/guess_log.txt
@@ -0,0 +1,132 @@
+2016-03-14 10:43:57,892 INFO: Beginning...
+2016-03-14 10:43:57,893 INFO: Arguments: {
+    "args": null,
+    "log_level": "info",
+    "pwd_file": null,
+    "weight_file": "nn_len8.h5",
+    "stats_only": false,
+    "config": null,
+    "train_secondary_only": false,
+    "profile": null,
+    "enumerate_ofile": "g1_enumerate.tsv",
+    "pwd_format": "list",
+    "retrain": false,
+    "log_file": "guess_log.txt",
+    "arch_file": "arch.json",
+    "config_args": "enumerate_config.json",
+    "version": false,
+    "forked": null,
+    "calc_probability_only": false,
+    "pre_processing_only": false
+}
+2016-03-14 10:43:57,899 INFO: Version: 5f269ba9e2faac9f63528fa2dd991d9a37c6bd5d
+2016-03-14 10:43:57,899 INFO: Configuration: {
+    "random_walk_seed_iterations": 1,
+    "guess_serialization_method": "human",
+    "random_walk_confidence_bound_z_value": 1.96,
+    "parallel_guessing": false,
+    "trie_intermediate_storage": ":memory:",
+    "relevel_not_matching_passwords": true,
+    "enforced_policy": "basic",
+    "uppercase_character_optimization": true,
+    "cpu_limit": 8,
+    "trie_serializer_type": "reg",
+    "freeze_feature_layers_during_secondary_training": true,
+    "model_optimizer": "adam",
+    "layers": 2,
+    "most_common_token_count": 2000,
+    "trie_serializer_encoding": "utf8",
+    "use_mmap": true,
+    "trie_implementation": null,
+    "secondary_training_save_freqs": false,
+    "dropouts": false,
+    "secondary_training": false,
+    "random_walk_confidence_percent": 5,
+    "context_length": 10,
+    "no_end_word_cache": true,
+    "chunk_print_interval": 100,
+    "dropout_ratio": 0.25,
+    "training_chunk": 10000,
+    "rare_character_optimization_guessing": true,
+    "prob_striation_step": 0.05,
+    "simulated_frequency_optimization": true,
+    "intermediate_fname": "intermediate_data.sqlite",
+    "fuzzy_training_smoothing": false,
+    "max_gpu_prediction_size": 20000,
+    "secondary_train_sets": null,
+    "chunk_size_guesser": 20000,
+    "char_bag": "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789~!@#$%^&*(),.<>/?'\"{}[]\\|-_=+;: `\n",
+    "lower_probability_threshold": 1e-08,
+    "max_len": 30,
+    "dense_layers": 0,
+    "train_backwards": false,
+    "fork_length": 2,
+    "guesser_class": null,
+    "train_test_ratio": 10,
+    "bidirectional_rnn": false,
+    "compute_stats": false,
+    "dense_hidden_size": 128,
+    "preprocess_trie_on_disk_buff_size": 100000,
+    "rare_character_lowest_threshold": 20,
+    "hidden_size": 1000,
+    "guessing_secondary_training": false,
+    "tokenize_guessing": true,
+    "cleanup_guesser_files": true,
+    "probability_striation": false,
+    "pwd_list_weights": {},
+    "rare_character_optimization": true,
+    "training_accuracy_threshold": 1e-10,
+    "save_model_versioned": false,
+    "deep_model": false,
+    "model_type": "JZS2",
+    "randomize_training_order": true,
+    "min_len": 8,
+    "random_walk_upper_bound": 10,
+    "final_schedule_ratio": 0.05,
+    "tokenize_words": false,
+    "trie_fname": ":memory:",
+    "toc_chunk_size": 1000,
+    "scheduled_sampling": false,
+    "freq_format": "hex",
+    "password_test_fname": null,
+    "save_always": true,
+    "generations": 20,
+    "training_main_memory_chunksize": 1000000,
+    "guesser_intermediate_directory": "guesser_files",
+    "random_walk_seed_num": 100000,
+    "preprocess_trie_on_disk": false,
+    "gpu_fork_bias": 2
+}
+2016-03-14 10:43:57,899 INFO: Loading model...
+2016-03-14 10:43:57,917 INFO: Loading model architecture
+2016-03-14 10:43:57,918 CRITICAL: Uncaught exception
+Traceback (most recent call last):
+  File "../pwd_guess.py", line 3199, in <module>
+    main_bundle()
+  File "../pwd_guess.py", line 3195, in <lambda>
+    main_bundle = lambda: main(args)
+  File "../pwd_guess.py", line 3134, in main
+    guess(args, config)
+  File "../pwd_guess.py", line 3077, in guess
+    .add_file(args['enumerate_ofile'])).build()
+  File "../pwd_guess.py", line 2740, in build
+    model_or_serializer = self.serializer.load_model()
+  File "../pwd_guess.py", line 770, in load_model
+    model = self.model_creator_from_json(arch_data)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/models.py", line 116, in model_from_json
+    return model_from_config(config, custom_layers=custom_layers)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/models.py", line 125, in model_from_config
+    model = container_from_config(config, custom_layers=custom_layers)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/utils/layer_utils.py", line 38, in container_from_config
+    init_layer = container_from_config(layer, custom_layers=custom_layers)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/utils/layer_utils.py", line 85, in container_from_config
+    base_layer = get_layer(name, layer_dict, custom_layers=custom_layers)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/utils/layer_utils.py", line 131, in get_layer
+    return get_from_module(identifier, globals(), 'layer', instantiate=True, kwargs=kwargs)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/utils/generic_utils.py", line 16, in get_from_module
+    return res(**kwargs)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/layers/recurrent.py", line 627, in __init__
+    super(JZS2, self).__init__(**kwargs)
+  File "/usr/local/lib/python3.5/site-packages/Keras-0.2.0-py3.5.egg/keras/layers/core.py", line 23, in __init__
+    assert kwarg in {'input_shape'}, "Keyword argument not understood: " + kwarg
+AssertionError: Keyword argument not understood: trainable
diff --git a/pre_built_networks/intermediate_data.sqlite b/pre_built_networks/intermediate_data.sqlite
diff --git a/pre_built_networks/nn_len8.h5 b/pre_built_networks/nn_len8.h5
diff --git a/pre_built_networks/train_config.json b/pre_built_networks/train_config.json
@@ -0,0 +1,49 @@
+{
+    "args" : {
+        "arch_file" : "arch.json",
+        "weight_file" : "weight.h5",
+        "log_file" : "train_log.txt",
+        "pwd_file" : [
+            "../leaks/all_combined_long_feeder_v2.utf8.shuffled.txt"
+        ],
+        "pwd_format" : [
+            "list"
+        ]
+    },
+    "config" : {
+        "training_chunk" : 1000,
+        "training_main_memory_chunk": 10000000,
+        "min_len" : 8,
+        "max_len" : 30,
+        "context_length" : 10,
+        "chunk_print_interval" : 100,
+        "layers" : 2,
+        "hidden_size" : 1000,
+        "generations" : 3,
+        "training_accuracy_threshold" : -1,
+        "train_test_ratio" : 20,
+        "model_type" : "JZS2",
+        "train_backwards" : true,
+
+        "dense_layers" : 1,
+        "dense_hidden_size" : 512,
+        "secondary_training" : true,
+        "secondary_train_sets" : {
+            "pwd_file" : [
+                "../leaks/all_combined_long_v2.txt"
+            ],
+            "pwd_format" : [
+                "list"
+            ]
+        },
+
+        "simulated_frequency_optimization" : false,
+        "randomize_training_order" : true,
+        "uppercase_character_optimization" : true,
+        "rare_character_optimization" : true,
+        "rare_character_optimization_guessing" : true,
+        "no_end_word_cache" : true,
+        "intermediate_fname" : "intermediate_data.sqlite",
+        "save_model_versioned" : true
+    }
+}
diff --git a/pwd_guess.py b/pwd_guess.py
@@ -10,8 +10,10 @@
 if 'KERAS_PATH' in os.environ:
     sys.path.insert(0, os.environ['KERAS_PATH'])
 import keras
-sys.stderr.write('Using keras version %s\n' % (keras.__version__))
-
+try:
+    sys.stderr.write('Using keras version %s\n' % (keras.__version__))
+except AttributeError as e:
+    pass
 
 from keras.models import Sequential, slice_X, model_from_json
 from keras.layers.core import Activation, Dense, RepeatVector, TimeDistributedDense, Dropout, Masking