# Helper functions

In [1]:
import numpy as np
import string
import random
import io

In [2]:
def strToBits(string):
    res = []
    byte_string = string.encode('utf-8')
    for b in byte_string:
        bit_array = bin(b)[2:]
        bit_array = '00000000'[len(bit_array):] + bit_array
        # we drop the first bit because it is always 0 for 1 byte utf-8 encoded chars
        res.extend(bit_array[1:])
    return np.array(res, dtype='int64')

def stringToChannelInput(string):
    bits = np.array(strToBits(string), dtype='int64')
    return 2*bits - 1

def channelOutputToString(channel_output):
    bits = ((channel_output+1)/2).astype('int64').tolist()
    byte_string = ""
    for char_index in range(len(bits)//7):
        bit_list = bits[char_index*7:(char_index+1)*7]
        byte = chr(int(''.join([str(bit) for bit in bit_list]), 2))
        byte_string += byte
    return byte_string

def binaryToString(bits):
    bits = bits.tolist()
    byte_string = ""
    for char_index in range(len(bits)//7):
        bit_list = bits[char_index*7:(char_index+1)*7]
        byte = chr(int(''.join([str(bit) for bit in bit_list]), 2))
        byte_string += byte
    return byte_string

In [3]:
# From handout
def channel(chan_input, erasedIndex=None):
    chan_input = np.clip(chan_input,-1,1)
    if (erasedIndex == None):
        erasedIndex = np.random.randint(3)
    chan_input[erasedIndex:len(chan_input):3] = 0
    return chan_input + np.sqrt(10)*np.random.randn(len(chan_input))

In [4]:
def channelWoNoise(chan_input, erasedIndex):
    """
    Emulates communication channel without noise.
    erasedIndex is used to specify H
    """
    chan_input = np.clip(chan_input,-1,1)
    chan_input[erasedIndex:len(chan_input):3] = 0
    return chan_input

In [5]:
def generateTestString(characters=80):
    """
    Function to generate a random utf-8 encoded string.
    """
    return ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=characters))

In [6]:
def generateTestFile(characters=80, filename="scratch"):
    """
    Function to generate a file containing a random utf-8 encoded string.
    Returns the generated text.
    """
    text = generateTestString(characters)
    with io.open(filename+".txt", "w", encoding='utf8') as f:
        f.write(text)
    return text

In [7]:
generateTestFile(filename="input_string")

'_0Y.{x<b7X44M/FU>?$~nU4jgklHwr",<ueQ[t~(AJT+l3|)pM/vdlE,!Ysi6E;SpC\\XM~UWR/U\\FAb@'

In [8]:
def readTestFile(filename="scratch"):
    """
    Function to read a text file as channel input.
    Returns the file text in the channel input format.
    """
    text = ''
    with io.open(filename+".txt", encoding='utf8') as f:
        text = f.read()
    return stringToChannelInput(text)

In [9]:
def genCodebook(initial=[[1]], k=5):

    def inverseSign(word):
        return [-i for i in word]

    if k == 0:
        return initial
    else:
        new = [word + inverseSign(word) for word in initial] + [word + word for word in initial]
        return genCodebook(new, k-1)


def pruneFirstInCodebook(codebook):
    return [codeword[1:] for codeword in codebook]

def binaryToDecimal(bits):
    res = 0
    for i in range(len(bits)):
        res += bits[i] * (2**i)
    return int(res)

def decimalToBinary(d, s):
    res = np.zeros(s, dtype='int64')
    tmp = list(bin(d).replace("0b", ""))[::-1]
    if (len(tmp) > s):
        print("WARNING number too big to fit into expected binary length!")
    res[:len(tmp)] = tmp
    return res

In [101]:
def encode(chan_input, codebook, k):
    """
    Makes channel input ready for transmission.
    Each input bit pair is mapped to a length 3 signal which is then repeated

    param: chan_input the input bit string
    param: codebook the orthogonal code codebook
    returns: the encoded input string ready for transmission through the channel
    """
    # first do the utf-8 encoding of the string
    chan_input = strToBits(chan_input)

    # pad with zeros
    n = len(chan_input)
    n_blocks = n // k
    print(n)
    
    padding_size = k - n % k
    padding = np.zeros(padding_size).astype('int64')
    print(padding)
    if (padding_size < 10):
        chan_input = np.append(chan_input, padding)
        print(decimalToBinary(padding_size, k))
        chan_input = np.append(chan_input, decimalToBinary(padding_size, k))
        n_blocks += 2

    # map each block of bits to a code from the codebook
    res = np.array([], dtype='int64')
    
    for i in range(n_blocks):
        res = np.append(res, codebook[binaryToDecimal(chan_input[i*k:(i+1)*k])])

    chan_input = res

    # flatten result
    chan_input = chan_input.flatten()
    return chan_input

In [102]:
def decode(chan_output, codebook, k):
    """
    Decodes channel output.
    The value of the erasure index is computed for each signal and the most occuring one is kept.

    param: chan_output the channel output
    param: codebook the orthogonal codebook used
    return: the decoded string
    """
    # retrieve input
    def find(codebook, codeword):
        res = np.array([])
        for c in codebook:
            res = np.append(res, np.inner(c, codeword))
        return res.argmax()

    chan_output = np.split(chan_output, len(chan_output)/len(codebook[0]))
    chan_output = np.array([find(codebook, codeword) for codeword in chan_output])

    # remove padding
    chan_output = np.array([decimalToBinary(codeword, k) for codeword in chan_output]).flatten()
    padding_size = binaryToDecimal(chan_output[-k:])
    print(padding_size)
    if (padding_size < k):
        chan_output = chan_output[:-(padding_size+k)]

    # transform back into string
    chan_output = binaryToString(chan_output)

    return chan_output

In [109]:
signal_set = np.array([[1, 1, 1], [1, -1, -1], [-1, 1, -1], [-1, -1, 1]])
k = 10
codebook = genCodebook(k=k)
text_in = generateTestString(characters=3)
print("input: \t\t" + text_in)
chan_input = encode(text_in, codebook, k)
print("input length: \t" + str(len(chan_input)))
chan_output = channel(chan_input)
text_out = decode(chan_output, codebook, k)
print("output: \t" + text_out)

diff = sum(text_in[i] != text_out[i] for i in range(len(text_out)))
print("diff: \t\t" + str(diff))

input: 		8Vu
21
[0 0 0 0 0 0 0 0 0]
[1 0 0 1 0 0 0 0 0 0]
input length: 	4096
9
output: 	8Vu
diff: 		0


# find best number of repetitions

In [13]:
def computeAccuracy(tests, k, tolerance, erasedIndex):
    init = True
    errors = 0
    average = 0
    codebook = genCodebook(k=k)
    for i in np.arange(tests):
        text_in = generateTestString(characters=80)
        chan_input = encode(text_in, codebook, k)
        if (init):
            print("Input length is n=" + str(len(chan_input)))
            init = False
        chan_output = channel(chan_input, erasedIndex)
        text_out = decode(chan_output, codebook, k)
        diff = sum(text_in[i] != text_out[i] for i in range(len(text_out)))
        if (diff > tolerance):
            errors += 1
        average += diff
    percentage = (tests - errors) / tests * 100
    average = average / tests
    print("Percentage of decodings with less than " + str(tolerance) + " mistakes in " + str(tests) + " attempts is " + str(percentage) + "%" + " with an average of " + str(average) + " errors.")

In [154]:
computeAccuracy(tests=50, k=10, tolerance=0, erasedIndex=None)

Input length is n=57344
Percentage of decodings with less than 0 mistakes in 50 attempts is 98.0% with an average of 0.04 errors.
