# Coupon Code Generator

We're implementing a coupon code redemption system at work. Since I was in a hurry I purchased a block of codes from a commecial service, but I thought recreating it would be a fun programming exercise.

In [1]:
import string
import random
import pandas as pd
import re

def generate(pattern, disallow='O0ILil'):
    '''
    Generate a coupon code based on a given pattern.
    Pattern is generated character-by-character so we can
    extend the pattern options fairly easily.
    X = Uppercase (A, B, C, ...)
    x = Lowercase (a, b, c, ...)
    9 = Digits (0, 1, 2, ...)
    A = Uppercase + Digits (A, B, .. + 0, 1 ..)
    a = Lowercase + Digits (a, b, .. + 0, 1 ..)
    # = Special characters (@, #, $, %, ^, &, *)
    v = Vowels (a, e, i, ...)
    V = Uppercase Voels (A, E, I, ...)
    c = Consonants (b, c, d, ...)
    C = Uppercase Consonants (B, C, D, ...)
    ? = Random from all characters above
    - = - 
    / = Escape character 
    Any character not on this list will be used as-is
    '''
    uppercase = string.ascii_uppercase
    lowercase = string.ascii_lowercase
    digits = ''.join(map(str, [i for i in range(10)]))
    upperdigits = uppercase + digits
    lowerdigits = lowercase + digits
    special = '@#$%^&*'
    vowels = 'aeiou'
    uppervowels = 'AEIOU'
    consonants = 'bcdfghjklmnpqrstvwxz'
    upperconsonants = 'BCDFGHJKLMNPQRSTVWXZ'
    everything = uppercase + lowercase + digits + special
    
    # Remove any disallowed characters such as the always-confusing 0/O
    for char in disallow:
        uppercase = uppercase.replace(char, '')
        lowercase = lowercase.replace(char, '')
        digits = digits.replace(char, '')
        upperdigits = upperdigits.replace(char, '')
        lowerdigits = lowerdigits.replace(char, '')
        vowels = vowels.replace(char, '')
        uppervowels = uppervowels.replace(char, '')
        consonants = consonants.replace(char, '')
        upperconsonants = upperconsonants.replace(char, '')
        everything = everything.replace(char, '')
    
    escape = 0
    output = ''
    
    for option in pattern:
        if escape == 1:
            output += option
            escape = 0
        elif option == 'X':
            output += random.choice(uppercase)
        elif option == 'x':
            output += random.choice(lowercase)
        elif option == '9':
            output += random.choice(digits)
        elif option == 'A':
            output += random.choice(upperdigits)
        elif option == 'a':
            output += random.choice(lowerdigits)
        elif option == '#':
            output += random.choice(special)
        elif option == 'v':
            output += random.choice(vowels)
        elif option == 'V':
            output += random.choice(uppervowels)
        elif option == 'c':
            output += random.choice(consonants)
        elif option == 'C':
            output += random.choice(upperconsonants)
        elif option == '?':
            output += random.choice(everything)
        elif option == '-':
            output += '-'
        elif option == '/':
            escape = 1
        else:
            output += option
    return output

print(generate('Xx9A-a#vV-cC?/9/C-1234'))

Ub97-y%uU-rVh9C-1234


In [2]:
def load(path='codetest.csv'):
    '''
    Load a csv to a dataframe
    '''
    df = pd.read_csv(path)
    return df

In [3]:
def generate_and_save_naive(pattern, disallow='O0ILil', quantity='100', path='codes.csv'):
    '''
    Generate many unique codes and save them to a file. 
    This naive implementation runs in O(n^2), since every element 
    has to check the whole list for membership as it is added. 
    '''
    # Generate the last few codes if we had to remove any duplicates
    data = []
    i = len(data)
    while i < quantity:
        code = generate(pattern, disallow)
        if code not in data:
            data.append(code)
            i += 1
    
    # If we're using a subset of the codes, we might not want them
    # to share several characters at the start. TODO: make optional
    random.shuffle(data)
    
    data = {'codes': data}
    df = pd.DataFrame(data, columns = ['codes'])
    df.to_csv(path)

In [4]:
def generate_and_save_lists(pattern, disallow='O0ILil', quantity='100', path='codes.csv'):
    '''
    Generate many unique codes and save them to a file.
    This implementation runs in roughly O(n), with the last little bit in O(n^2)
    '''
    
    # Generate codes, sort them, strip out duplicates
    data = [generate(pattern, disallow) for i in range(quantity)]
    data.sort()
    data = [x for ind, x in enumerate(data) if ind < len(data)-1 and x != data[ind+1]]
       
    # Generate the last few codes if we had to remove any duplicates
    i = len(data)
    while i < quantity:
        code = generate(pattern, disallow)
        if code not in data:
            data.append(code)
            i += 1
    
    # If we're using a subset of the codes, we might not want them
    # to share several characters at the start. TODO: make optional
    random.shuffle(data)
    
    data = {'codes': data}
    df = pd.DataFrame(data, columns = ['codes'])
    df.to_csv(path)
    


In [1]:
def generate_and_save(pattern, disallow='O0ILil', quantity='100', path='codes.csv'):
    '''
    Generate many unique codes and save them to a file.
    This implementation runs as a slightly faster O(n), because sets natively support
    adding items without duplication, where we had to strip out the duplicates with lists.
    
    TODO: Generate once and break up into sets of specified size (default 50000)
    '''
    # Generate codes
    # Added in an artificial termination in case the code space is small
    codes = set()
    count = 0
    while len(codes) < quantity:
        codes.add(generate(pattern, disallow))
        count += 1
        if count > quantity+100:
            print("Unable to generate enough codes, aborting. Try a longer code pattern.")
            break
    
    codes = [code for code in codes]
    data = {'codes': codes}
    df = pd.DataFrame(data, columns = ['codes'])
    df.to_csv(path, index=False)

In [6]:
%timeit generate_and_save_naive('Xx9A-a#vV-cC?/9/C-1234', quantity=50000, path='codestest.csv')
%timeit generate_and_save_lists('Xx9A-a#vV-cC?/9/C-1234', quantity=50000, path='codestest.csv')
%timeit generate_and_save('Xx9A-a#vV-cC?/9/C-1234', quantity=50000, path='codestest.csv')

1 loop, best of 3: 18.8 s per loop
1 loop, best of 3: 1.51 s per loop
1 loop, best of 3: 1.47 s per loop


In [7]:
def validate(code, pattern, disallow='O0ILil'):
    '''
    Given a code and the rules for code generation, determine whether the code is valid.
    TODO: Let this loop through a list of codes without generating the regex over and over.
    '''
    
    escape = 0
    regex = ''
    special = '.^$*+?{}[]\|()'
    
    # Check for disallowed characters here rather than make the regex crazy
    for char in disallow:
        if char in code:
            return 'False'
    
    # This pretty much repeats the logic from generate, but generates a regular expresison.
    for char in pattern:
        if char in special and not '?':
            regex += '\\'
        
        if escape == 1:
            regex += char
            escape = 0
        elif char == 'X':
            regex += '[A-Z]'
        elif char == 'x':
            regex += '[a-z]'
        elif char == '9':
            regex += '[0-9]'
        elif char == 'A':
            regex += '[A-Z0-9]'
        elif char == 'a':
            regex += '[a-z0-9]'
        elif char == '#':
            regex += '[@#\$%\^&\*]'
        elif char == 'v':
            regex += '[aeiou]'
        elif char == 'V':
            regex += '[AEIOU]'
        elif char == 'c':
            regex += '[bcdfghjklmnpqrstvwxz]'
        elif char == 'C':
            regex += '[BCDFGHJKLMNPQRSTVWXZ]'
        elif char == '?':
            regex += '\S'
        elif char == '/':
            escape = 1
        else:
            regex += char
    
    regex = re.compile(regex)
    match = regex.match(code)
    print()
    if match: 
        return 'True'
    else: 
        return 'False'

In [8]:
def validate_from_file(pattern, path='codetest.csv'):
    df = load(path)
    
    df['valid'] = df['codes'].apply(validate, pattern=pattern)
    
    df.to_csv('validated.csv')
    return df

In [10]:
pattern = 'Xx9A-a#vV-cC?/9/C-1234'

# I manually inserted some invalid codes into codetest
validate_from_file(pattern)










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Unnamed: 0.1,Unnamed: 0,codes,error added,valid
0,0,Qc38-5&eU-sPZ9C-1234,,True
1,1,Cn85-b$uA-qT79C-1234,,True
2,2,Ws6Y-d@eE-rVA9C-1234,,True
3,3,Du3Q-p#aA-mX#9C-1234,,True
4,4,Km7F-3$oU-mK%9C-1234,,True
5,5,Dy55-5@eU-pSW9C-1234,,True
6,6,Zu7H-h&eE-mWV9C-1234,,True
7,7,Sf1F-p&oU-vX%9C-1234,,True
8,8,Cb11-3$uU-rKD9C-1234,,True
9,9,Yx2K-5%oU-mS&9C-1234,,True
