In [None]:
%matplotlib inline

import time
import numpy as np
import pandas
import matplotlib.pyplot as plt
import matplotlib.cm as cm

def bargraph_results(stats, title=None, word_length=(8, 24), ylim=(0, 100), threshold=.75, filename=None):
    #colors = np.append(plt.cm.Greys(np.linspace(.1, .8, 12)), plt.cm.Reds(np.linspace(.6, .8, 10)), axis=0)
    #colors = plt.cm.Spectral_r(np.linspace(.1, 1, 25))
    f = plt.figure(figsize=(16,12), dpi=100)
    if title:
        f.suptitle(title, y=.92)
    plt.subplots_adjust(wspace=.3)
    f.tight_layout()

    ax1 = f.add_subplot(2, 4, 1)
    ax2 = f.add_subplot(2, 4, 2)
    ax3 = f.add_subplot(2, 4, 3)
    ax4 = f.add_subplot(2, 4, 4)
    ax5 = f.add_subplot(2, 4, 5)
    ax6 = f.add_subplot(2, 4, 6)
    ax7 = f.add_subplot(2, 4, 7)
    ax8 = f.add_subplot(2, 4, 8)

    if not stats.get('zeros'):
        print('No data found for zeros')
        plt.close()
        return
    zeros = stats['zeros']
    ones = stats['ones']
    combos = stats['combos']
    distance = stats['distance']
    direction = stats['direction']
    words = stats['words']
    wordlen = stats['wordlen']
    distlen = stats['distlen']

    total_zero = np.zeros(word_length[1])
    total_one = np.zeros(word_length[1])
    total_combo = np.zeros(word_length[1])
    total_distance = np.zeros(word_length[1])
    total_direction = np.zeros(word_length[1])
    total_words = np.zeros(word_length[1])
    total_wordlen = np.zeros(word_length[1])
    total_distlen = np.zeros(word_length[1])

    diff = word_length[1] - word_length[0]
    if diff <= 10:
        xticks = np.arange(word_length[0], word_length[1], 1)
    elif diff <= 25:
        xticks = np.arange(word_length[0], word_length[1], 2)
    else:
        xticks = np.arange(word_length[0], word_length[1], 5)
        
    # iterate through password lengths
    for i in range(word_length[0], word_length[1]):
        # colors will be black below threshold and red above
        grays = 0
        reds = 0
    
        for j in range(0, i+1):
            if j / i >= threshold:
                reds += 1
            else:
                grays += 1
        colors = np.append(plt.cm.Greys(np.linspace(.2, .8, grays)), plt.cm.Reds(np.linspace(.3, .9, reds)), axis=0)
        
        if distlen.get(i, 0) == 0 or wordlen.get(i, 0) == 0:
            continue
            
        last_legend = 0
        last_zero = 0
        last_ones = 0
        last_combo = 0
        last_dist = 0
        last_dir = 0
        last_word = 0

        index = 0
        for j in np.arange(0, i + 1):

            if j / i >= threshold:
                total_zero[i] += zeros[i][j]
                total_one[i] += ones[i][j]
                total_combo[i] += combos[i][j]
                total_distance[i] += distance[i][j]
                total_direction[i] += direction[i][j]
                total_words[i] += words[i][j]
                total_distlen[i] += distlen[i]
                total_wordlen[i] += wordlen[i]
                
            #print(i, j, zero, ones, seq)
            p_zeros = (zeros[i][j] / distlen[i]) * 100
            p_ones = (ones[i][j] / distlen[i]) * 100
            p_combos = (combos[i][j] / distlen[i]) * 100
            p_distance = (distance[i][j] / distlen[i]) * 100
            p_direction = (direction[i][j] / distlen[i]) * 100
            p_words = (words[i][j] / wordlen[i]) * 100
            p_legend = ((j + 1) / (i + 1)) * 100

            ax1.bar(i, p_legend, bottom=last_legend, color=colors[index], label=j)
            ax2.bar(i, p_zeros, bottom=last_zero, color=colors[index], label=j)
            ax3.bar(i, p_ones, bottom=last_ones, color=colors[index], label=j)
            ax4.bar(i, p_combos, bottom=last_combo, color=colors[index], label=j)
            ax6.bar(i, p_distance, bottom=last_dist, color=colors[index], label=j)
            ax7.bar(i, p_direction, bottom=last_dir, color=colors[index], label=j)
            ax8.bar(i, p_words, bottom=last_word, color=colors[index], label=j)
            

            last_zero += p_zeros
            last_ones += p_ones
            last_combo += p_combos
            last_dist += p_distance
            last_dir += p_direction
            last_word += p_words
            last_legend = p_legend
            
            index += 1

        ax1.set_ylabel('Color Scale for Number of Characters')
        ax1.set_ylim((0,100))
        ax1.set_xticks(xticks)
        ax1.set_yticks([])
        ax1.set_xlabel('Password Length')
        ax1.set_title('Legend for %d%% threshold' % (threshold * 100))
        
        ax2.set_ylabel('Percent')
        ax2.set_ylim(ylim)
        ax2.set_xticks(xticks)
        ax2.set_xlabel('Password Length')
        ax2.set_title('Ratio of characters with distance=0')
        
        ax3.set_ylabel('Percent')
        ax3.set_ylim(ylim)
        ax3.set_xticks(xticks)
        ax3.set_xlabel('Password Length')
        ax3.set_title('Ratio of characters with distance=1')

        ax4.set_ylabel('Percent')
        ax4.set_ylim(ylim)
        ax4.set_xticks(xticks)
        ax4.set_xlabel('Password Length')
        ax4.set_title('Ratio of characters with distance=0 or 1')

        ax6.set_ylabel('Percent')
        ax6.set_ylim(ylim)
        ax6.set_xticks(xticks)
        ax6.set_xlabel('Password Length')
        ax6.set_title('Ratio of distances part of a sequence')
        
        ax7.set_ylabel('Percent')
        ax7.set_ylim(ylim)
        ax7.set_xticks(xticks)
        ax7.set_xlabel('Password Length')
        ax7.set_title('Ratio of directions part of a sequence')
        
        ax8.set_ylabel('Percent')
        ax8.set_ylim(ylim)
        ax8.set_xticks(xticks)
        ax8.set_xlabel('Password Length')
        ax8.set_title('Ratio of characters part of a sequence')
    
    colors = np.append(plt.cm.Greys(np.linspace(.2, .8, 2)), plt.cm.Reds(np.linspace(.3, .9, 5)), axis=0)
    ax5.plot(np.arange(word_length[0], word_length[1]), total_distlen[word_length[0]:word_length[1]], ls='-', color=colors[0], marker='o', label='Distance Vector Size')
    ax5.plot(np.arange(word_length[0], word_length[1]), total_wordlen[word_length[0]:word_length[1]], ls='-', color=colors[1], marker='o', label='Word Size')
    ax5.plot(np.arange(word_length[0], word_length[1]), total_zero[word_length[0]:word_length[1]], ls='-', color=colors[2], marker='o', label='Zeros')
    ax5.plot(np.arange(word_length[0], word_length[1]), total_one[word_length[0]:word_length[1]], ls='-', color=colors[3], marker='o', label='Ones')
    ax5.plot(np.arange(word_length[0], word_length[1]), total_combo[word_length[0]:word_length[1]], ls='-', color=colors[4], marker='o', label='Zeros or Ones')
    ax5.plot(np.arange(word_length[0], word_length[1]), total_distance[word_length[0]:word_length[1]], ls='-', color=colors[5], marker='o', label='Distance Seq')
    ax5.plot(np.arange(word_length[0], word_length[1]), total_direction[word_length[0]:word_length[1]], ls='-', color=colors[6], marker='o', label='Direction Seq')
    ax5.legend()
    ax5.set_yscale('log')
    ax5.set_ylim(ymin=1)
    ax5.set_ylabel('Count')
    ax5.set_xticks(xticks)     
    ax5.set_xlabel('Password Length')
    ax5.set_title('Total Counts above Threshold')

    if filename:
        f.savefig(filename, bbox_inches='tight')
        plt.close()
    else:
        plt.show()


In [None]:
import os
import subprocess as sp

def parse_1d_stat(fields):
    buffer = {}
    for f in fields:
        value, count = [int(x) for x in f.split('=')]
        buffer[value] = count
    return buffer
    
def parse_2d_stat(fields):
    buffer = {}
    for f in fields:
        length, subfield = f.split(':')
        value, count = [int(x) for x in subfield.split('=')]
        buffer[value] = count
    return int(length), buffer

def parse_summary(data):
    stats = {}
    for line in data.split('\n'):
        if len(line) == 0:
            continue
        if line[0].isalpha():
            dataset = line
            continue
        if not line[0].isdigit():
            continue

        col = line.split()
        if col[0].find(':') == -1:
            b = parse_1d_stat(col)
            stats[dataset] = b
        else:
            l, b = parse_2d_stat(col)
            if not stats.get(dataset):
                stats[dataset] = {}
            stats[dataset][l] = b
    return stats

def create_summary_graphic(path, passmin=8, passmax=24, suffix='', ylim=(0, 100), graphonly=False):
    basename = os.path.basename(path)
    cmd = './keywalk -n %d -x %d -w -s %s' % (passmin, passmax, path)
    data = sp.check_output(cmd.split())
    s = parse_summary(data.decode('utf-8'))
        
    if graphonly:
        bargraph_results(s, word_length=(passmin, passmax), title='%s Data' % (basename), ylim=ylim, threshold=.75)
    else:
        bargraph_results(s, word_length=(passmin, passmax), title='%s Data' % (basename),
                         ylim=ylim, threshold=.75, filename='notebooks/images/%s%s.png' % (basename, suffix))

In [None]:
create_summary_graphic('notebooks/wordlists/hashcat_kwprocessor.txt', passmax=17)
create_summary_graphic('notebooks/wordlists/english.txt', passmax=24)
create_summary_graphic('notebooks/wordlists/english.txt', passmax=24, ylim=(94, 100), suffix='_zoom')
create_summary_graphic('notebooks/wordlists/pwqgen.txt', passmax=40)
create_summary_graphic('notebooks/wordlists/pwqgen.txt', passmax=40, ylim=(95, 100), suffix='_zoom')
create_summary_graphic('notebooks/wordlists/keepassxc.txt', passmin=10, passmax=20)
create_summary_graphic('notebooks/wordlists/keepassxc.txt', passmin=10, passmax=20, ylim=(99,100), suffix="_zoom")
create_summary_graphic('notebooks/wordlists/rockyou.txt', passmax=50)
create_summary_graphic('notebooks/wordlists/rockyou.txt', passmax=30, ylim=(93,100), suffix='_zoom')

## rockyou2021.txt can't happen in real time so just parse the existing results

In [None]:
import os
import subprocess as sp

passmin = 8
passmax = 20
ylim = (0, 100)
path = 'notebooks/wordlists/results/rockyou2021.summary'
basename = 'rockyou2021.txt'
cmd = 'cat %s' % (path)
data = sp.check_output(cmd.split())
s = parse_summary(data.decode('utf-8'))
        
bargraph_results(s, word_length=(passmin, passmax), title='%s Data' % (basename), ylim=ylim, threshold=.75, filename='notebooks/images/%s%s.png' % (basename, ''))
bargraph_results(s, word_length=(passmin, passmax), title='%s Data' % (basename), ylim=(86, 100), threshold=.75, filename='notebooks/images/%s%s.png' % (basename, '_zoom'))
