# Helper functions

In [6]:
import numpy as np
import string
import random
import io

In [1]:
# From Xavier
def strToBits(string):
    res = []
    byte_string = string.encode('utf-8')
    for b in byte_string:
        bit_array = bin(b)[2:]
        bit_array = '00000000'[len(bit_array):] + bit_array
        res.extend(bit_array[:])
    return np.array(res, dtype='int64')

def stringToChannelInput(string):
    bits = np.array(strToBits(string), dtype='int64')
    return 2*bits - 1

def channelOutputToString(channel_output):
    bits = ((channel_output+1)/2).astype('int64').tolist()
    byte_string = ""
    for char_index in range(len(bits)//8):
        bit_list = bits[char_index*8:(char_index+1)*8]
        byte = chr(int(''.join([str(bit) for bit in bit_list]), 2))
        byte_string += byte
    return byte_string

def binaryToString(bits):
    bits = bits.tolist()
    byte_string = ""
    for char_index in range(len(bits)//8):
        bit_list = bits[char_index*8:(char_index+1)*8]
        byte = chr(int(''.join([str(bit) for bit in bit_list]), 2))
        byte_string += byte
    return byte_string

In [27]:
# From handout
def channel(chan_input, erasedIndex=None):
    chan_input = np.clip(chan_input,-1,1)
    if (erasedIndex == None):
        erasedIndex = np.random.randint(3)
        print("erase index is: " + str(erasedIndex))
    chan_input[erasedIndex:len(chan_input):3] = 0
    return chan_input + np.sqrt(10)*np.random.randn(len(chan_input))

In [3]:
def channelWoNoise(chan_input, erasedIndex):
    """
    Emulates communication channel without noise.
    erasedIndex is used to specify H
    """
    chan_input = np.clip(chan_input,-1,1)
    chan_input[erasedIndex:len(chan_input):3] = 0
    return chan_input

In [4]:
def generateTestString(characters=80):
    """
    Function to generate a random utf-8 encoded string.
    """
    return ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=characters))

In [5]:
def generateTestFile(characters=80, filename="scratch"):
    """
    Function to generate a file containing a random utf-8 encoded string.
    Returns the generated text.
    """
    text = generateTestString(characters)
    with io.open(filename+".txt", "w", encoding='utf8') as f:
        f.write(text)
    return text

In [7]:
def readTestFile(filename="scratch"):
    """
    Function to read a text file as channel input.
    Returns the file text in the channel input format.
    """
    text = ''
    with io.open(filename+".txt", encoding='utf8') as f:
        text = f.read()
    return stringToChannelInput(text)

In [8]:
def genCodebook(initial=[[1]], k=5):

    def inverseSign(word):
        return [-i for i in word]

    if k == 0:
        return initial
    else:
        new = [word + inverseSign(word) for word in initial] + [word + word for word in initial]
        return genCodebook(new, k-1)


def pruneFirstInCodebook(codebook):
    return [codeword[1:] for codeword in codebook]

def binaryToDecimal(bits):
    res = 0
    for i in range(len(bits)):
        res += bits[i] * (2**i)
    return int(res)

def decimalToBinary(d, s):
    res = np.zeros(s, dtype='int64')
    tmp = list(bin(d).replace("0b", ""))[::-1]
    if (len(tmp) > s):
        print("WARNING number too big to fit into expected binary length!")
    res[:len(tmp)] = tmp
    return res

In [9]:
def encode(chan_input, codebook, k):
    """
    Makes channel input ready for transmission.
    Each input bit pair is mapped to a length 3 signal which is then repeated

    param: chan_input the input bit string
    param: codebook the orthogonal code codebook
    returns: the encoded input string ready for transmission through the channel
    """
    # first do the utf-8 encoding of the string
    chan_input = strToBits(chan_input)

    # pad with zeros
    n = len(chan_input)
    n_blocks = n // k  

    # map each block of bits to a code from the codebook
    res = np.array([], dtype='int64')
    
    for i in range(n_blocks):
        res = np.append(res, codebook[binaryToDecimal(chan_input[i*k:(i+1)*k])])

    chan_input = res

    # map each input pair to the signal whose two first elements are said pair
    def encodeBitPairs(bits):
        if (np.array_equal(bits, [1, 1])):
            return signal_set[0]
        elif (np.array_equal(bits, [1, -1])):
            return signal_set[1]
        elif (np.array_equal(bits, [-1, 1])):
            return signal_set[2]
        elif (np.array_equal(bits, [-1, -1])):
            return signal_set[3]

    chan_input = np.split(chan_input, len(chan_input)/2)
    chan_input = np.array([encodeBitPairs(bit) for bit in chan_input])

    # flatten result
    chan_input = chan_input.flatten()
    return chan_input

In [28]:
def decode(chan_output, codebook, k):
    """
    Decodes channel output.
    The value of the erasure index is computed for each signal and the most occuring one is kept.

    param: chan_output the channel output
    param: codebook the orthogonal codebook used
    return: the decoded string
    """
    # split output into triplets
    chan_output = np.split(chan_output, len(chan_output)/len(signal_set[0]))

    # decide on erasure index
    def decide_on_erasure_index(triplet):
        return np.argmin(np.array([x**2 for x in triplet]))
    
    erasure_index_candidates = np.array([decide_on_erasure_index(triplet) for triplet in chan_output])
    erasure_index = np.bincount(erasure_index_candidates).argmax()

    # print("decided on erasure index: " + str(erasure_index))

    # decide on index of codeword
    def decoder_H0(input):
        if (input[1] > 0 and input[2] > 0):
            return 0
        elif (input[1] > 0 and input[2] < 0):
            return 2
        elif (input[1] < 0 and input[2] < 0):
            return 1
        elif (input[1] < 0 and input[2] > 0):
            return 3

    def decoder_H1(input):
        if (input[0] > 0 and input[2] > 0):
            return 0
        elif (input[0] > 0 and input[2] < 0):
            return 1
        elif (input[0] < 0 and input[2] < 0):
            return 2
        elif (input[0] < 0 and input[2] > 0):
            return 3

    def decoder_H2(input):
        if (input[0] > 0 and input[1] > 0):
            return 0
        elif (input[0] > 0 and input[1] < 0):
            return 1
        elif (input[0] < 0 and input[1] < 0):
            return 3
        elif (input[0] < 0 and input[1] > 0):
            return 2

    def triplet_decoder(triplet):
        if (erasure_index == 0):
            return decoder_H0(triplet)
        elif (erasure_index == 1):
            return decoder_H1(triplet)
        elif (erasure_index == 2):
            return decoder_H2(triplet)

    chan_output = np.array([triplet_decoder(triplet) for triplet in chan_output])

    # reconstitute triplets
    def retrieveInput(index):
        return signal_set[index][:2]

    chan_output = np.array([retrieveInput(i) for i in chan_output])
    chan_output = chan_output.flatten()

    # retrieve input
    def find(codebook, codeword):
        res = np.array([])
        for c in codebook:
            res = np.append(res, np.inner(c, codeword))
        return res.argmax()

    chan_output = np.split(chan_output, len(chan_output)/len(codebook[0]))
    chan_output = np.array([find(codebook, codeword) for codeword in chan_output])
    chan_output = np.array([decimalToBinary(codeword, k) for codeword in chan_output]).flatten()

    # transform back into string
    chan_output = binaryToString(chan_output)

    return chan_output

In [26]:
signal_set = np.array([[1, 1, 1], [1, -1, -1], [-1, 1, -1], [-1, -1, 1]])
k = 9
codebook = genCodebook(k=k)
# random.shuffle(codebook)
text_in = generateTestString(characters=80)
print("input: \t\t" + text_in)
chan_input = encode(text_in, codebook, k)
print("input length: \t" + str(len(chan_input)))
chan_output = channel(chan_input)
text_out = decode(chan_output, codebook, k)
print("output: \t" + text_out)

diff = sum(text_in[i] != text_out[i] for i in range(len(text_out)))
print("diff: \t\t" + str(diff))

input: 		(7,A:mPd8c5?5]n+Pc6&Iu9>5$kNnMv*|Xj0^?fsQbHsHEHUB\]J8f6.HIr8$(\Rw/}'fv{{.h2D'&a}
input length: 	54528
erase index is: 2
decided on erasure index: 2
output: 	(7,A:mPd8c5?5]n+Pc6&Iu9:Q$kNnMv*|Xj0^?fsQbI!HEHUB\]J8f6.HIr8$(\Rw/}'fv{{.h2D'&a
diff: 		4


# find best number of repetitions

In [38]:
def computeAccuracy(tests, k, tolerance, erasedIndex):
    init = True
    errors = 0
    average = 0
    codebook = genCodebook(k=k)
    for i in np.arange(tests):
        text_in = generateTestString(characters=80)
        chan_input = encode(text_in, codebook, k)
        if (init):
            print("Input length is n=" + str(len(chan_input)))
            init = False
        chan_output = channel(chan_input, erasedIndex)
        text_out = decode(chan_output, codebook, k)
        diff = sum(text_in[i] != text_out[i] for i in range(len(text_out)))
        if (diff > tolerance):
            errors += 1
        average += diff
    percentage = (tests - errors) / tests * 100
    average = average / tests
    print("Percentage of decodings with less than " + str(tolerance) + " mistakes in " + str(tests) + " attempts is " + str(percentage) + "%" + " with an average of " + str(average) + " errors.")

In [35]:
computeAccuracy(tests=10, k=9, tolerance=0, erasedIndex=0)

Input length is n=54528
Percentage of decodings with less than 0 mistakes in 10 attempts is 0.0% with an average of 38.1 errors.


In [36]:
computeAccuracy(tests=10, k=9, tolerance=0, erasedIndex=1)

Input length is n=54528
Percentage of decodings with less than 0 mistakes in 10 attempts is 0.0% with an average of 39.4 errors.


In [37]:
computeAccuracy(tests=10, k=9, tolerance=0, erasedIndex=2)

Input length is n=54528
Percentage of decodings with less than 0 mistakes in 10 attempts is 40.0% with an average of 1.4 errors.
