In [11]:
# Initialize Otter
import otter
grader = otter.Notebook("assignment7.ipynb")

# DS 453 / 653: Programming Assignment 7

**Due date**: Friday, April 5 at 8pm on [Gradescope](https://www.gradescope.com/courses/710247).

_You must follow the Academic Code of Conduct and Collaboration Policy stated in the course syllabus at all times while working on this assignment._

This assignment contains 5 questions worth a total of 5 points. You must receive at least 4 points to pass the assignment.

Note that some of the tests in this week's assignment are hidden. So: make sure to check how many points you receive from the autograder. Simply seeing a message of the form "all test cases passed" is *not* sufficient, because it only means that all of the public tests hvae passed.

To begin the assignment, please execute the code block below:

In [12]:
import otter
grader = otter.Notebook()

In [13]:
# Execute this block only if you are using Google Colab.
# If you are running the notebook file locally, install pycryptodome yourself but do NOT install the package pycrypto.

%pip install pycryptodome

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## Assignment Overview

#### Helper functions

In [14]:
from Crypto.Util.strxor import strxor

def slice_into_blocks(message, block_size):
    len_message = len(message)
    assert(len_message >= block_size)
    return [message[i: i + block_size] for i in range(0, len_message, block_size)]

class Sample_Cipher(object):
    def __init__(self):
        """
            Sample Cipher class that will be passed to some of the methods in the lab
        """
        self.BLOCK_SIZE = 16  # bytes
        return

    def sub_word(self, word):
        """performs an s-box substitution on the 8-bit input "word"
        NOTE: Using this function directly will NOT give you full-credit for question 1
        Args:
            word    (bytes/bytearray): bytes/bytearray object of length 8 bits (1 byte).
        Return:
            bytes : bytes object of length 8 bits (1 byte)
        """
        sbox = (  # Sample S-Box
            0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
            0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
            0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
            0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
            0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
            0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
            0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
            0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
            0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
            0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
            0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
            0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
            0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
            0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
            0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
            0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
        )
        return bytes([sbox[b] for b in word])

    def inv_sub_word(self, word):
        """ performs an inverse s-box substitution on the 8-bit input "word"
        NOTE: Using this function directly will NOT give you full-credit for question 1
        Args:
            word    (bytes/bytearray): bytes/bytearray object of length 8 bits (1 byte).
        Return:
            bytes : bytes object of length 8 bits (1 byte)
        """
        sbox_inv = (  # Sample S-Box-Inverse
            0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
            0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
            0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
            0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
            0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
            0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
            0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
            0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
            0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
            0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
            0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
            0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
            0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
            0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
            0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
            0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
        )
        return bytes([sbox_inv[b] for b in word])

    def encipher(self, key, message):
        """preforms an encipher on the input "message" given the key "key"
        Args:
            key     (bytes):   bytestring of length self.BLOCK_SIZE bytes.
            message (bytes):   bytestring of length self.BLOCK_SIZE bytes.
        Return:
            bytes : bytestring of length self.BLOCK_SIZE bytes.
        """
        return self.__TOY(message, key)

    def __TOY(self, message, key):
        assert(len(key) == self.BLOCK_SIZE)
        assert(len(message) % self.BLOCK_SIZE == 0)

        pre_xor = strxor(message, key)
        sub_word = b''.join(self.sub_word(word)
                            for word in slice_into_blocks(pre_xor, self.BLOCK_SIZE))
        post_xor = strxor(sub_word, key)

        return post_xor

    def decipher(self, key, ciphertext):
        """preforms a TOY decipher on the self.BlOCK_SIZE length input "ciphertext" given the key "key"
        Args:
            key         (str):   hex-encoded string of self.BlOCK_SIZE length
            ciphertext  (str):   hex-encoded string of self.BlOCK_SIZE length
        Return:
            str : hex-encoded string of self.BlOCK_SIZE length
        """
        return self.__TOY_inv(ciphertext, key)

    def __TOY_inv(self, ciphertext, key):
        assert(len(key) == self.BLOCK_SIZE)
        assert(len(ciphertext) % self.BLOCK_SIZE == 0)

        post_xor = strxor(ciphertext, key)
        sub_word = b''.join(self.inv_sub_word(word)
                            for word in slice_into_blocks(post_xor, self.BLOCK_SIZE))
        pre_xor = strxor(sub_word, key)

        return pre_xor


### Question 1: Create PKCS#7 padding

In this question, you must create methods to add and remove padding according to the PKCS#7 standard described in class. The PKCS#7 padding standard is a common method to pad messages to a multiple of the block length. Given a string `s` that is $n$ bytes short of being a multiple of the block length, PKCS#7 padding simply adds $n$ bytes each of which have the byte value $n$.

For instance, let's take AES as an example, in which case the block length is 16 bytes. The string `b"TEST STRING"` is 11 characters long and thus needs 5 bytes of padding. So, it gets padded to the string: `b"TEST STRING\x05\x05\x05\x05\x05"` Here, the "\x05" denotes the byte value 5 in hex form (this is valid Python syntax, by the way).

If we choose to use padding, then we must **always** do so because the person on the other end of the wire is planning to remove the padding. In particular, if the string length is already a multiple of the block length, then we must add a new block and fill it with padding.

For instance, the 16-byte string `b"A COMPLETE BLOCK"` gets PKCS#7 padded to the following 32-byte string:
    `b"A COMPLETE BLOCK\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10"`
where "\x10" denotes the hex value 10 (i.e., decimal value 16, the block length).

__Your task__: Implement the following methods.
- `pad(msg, block_size)` should take an input of arbitrary length and return a padded string based on the block_size and following the PKCS#7 standard.
- `unpad(padded_msg, block_size)` should remove the padding from the padded_msg and return the original un-padded message.

_Points:_ 1

In [15]:
def q1_pad(msg, block_size):
    """Adds PKCS#7 padding to an arbitrary length message based on the 'block_size'
    Inputs:
        msg      (bytes): bytestring of arbitrary length
        block_size (int): length of a block
    
    ret (bytes): padded message whose length is a multiple of block_size 
    """
    
    bytes_to_pad = block_size - len(msg) % block_size
    return msg + bytes([bytes_to_pad] * bytes_to_pad)

In [16]:
def q1_unpad(padded_msg, block_size):
    """Removes PKCS#7 padding if it exists and returns the un-padded message
    Args:
        padded_msg (bytes): message that is claimed to have PKCS#7 padding already applied
    ret (bytes): un-padded message if the padding is valid, None otherwise 
    """

    if len(padded_msg) % block_size != 0:
        return None

    padding = padded_msg[-1]
    if padding == 0 or padding > block_size:
        return None

    if padded_msg[-padding:] != bytes([padding] * padding):
        return None

    return padded_msg[:-padding]
    

In [17]:
grader.check("q1")

### Question 2: Implement CBC Mode encryption, with PKCS#7 padding and the provided block cipher `cipher`

__Your Task__: Write the function `q2_enc_cbc_mode` that encrypts a message using CBC mode. (We recommend that you review the picture of CBC mode from Lecture 17.) Rather than using AES though, we are going to use the `Sample_Cipher` class at the top of this Jupyter notebook.

Inputs:
- key     (bytes): bytestring corresponding to the symmetric key (cipher.BLOCK_SIZE-bytes long)
- message (bytes): bytestring corresponding to an arbitrary-length message
- iv      (bytes): bytestring of an IV that should be used for the CBC encryption (cipher.BLOCK_SIZE long)
- cipher (Cipher): Cipher class that provides access to a sample block cipher, via the command cipher.encipher(hexKey, message)

Output:
- ret     (str):  hex-encoded ciphertext (do **not** return the IV as part of the ciphertext)

You might find useful the helper function `slice_into_blocks` that breaks apart the message into bytestrings of size `block_length`.

Test vector:

    q2_enc_cbc_mode(key=bytes.fromhex("a8c0eeef71c4f0ad7942cb2eefb0dff0"), message=b"w)0EA@W`j-3O~FhxwS~OixkV$D<2'v[apPoW[", iv=bytes.fromhex("45054c1d141b6ae136b45c37800c7840"), cipher=Sample_Cipher()) == bytes.fromhex("100ea146471f4ddc46fb829f6d9d5518229e2961bece0661d61656c2e989e157856b2cda53b8a46b308d5bba38934961")


__Your response:__

_Points:_ 1

In [31]:
def q2_enc_cbc_mode(key, message, iv, cipher=Sample_Cipher):

    block_size = len(key)

    # pad message so that it is a multiple of block_size
    padded_msg = q1_pad(message, block_size) 
    
    # slice the padded message into blocks
    blocks = slice_into_blocks(padded_msg, block_size) 
    
    ciphertext = b''

    # set first value to be XORed to iv
    prev_block = iv

    for block in blocks:
        block = strxor(block, prev_block)
        prev_block = cipher.encipher(key, block)

        # append the encrypted block to the ciphertext
        ciphertext += prev_block

    return ciphertext
    

In [32]:
grader.check("q2")

### Question 3: Implement CBC Mode **decryption** (with PKCS#7 padding, using the provided block cipher `cipher`)

__Your Task:__ The problem description is similar to the one in the previous problem, just note the different inputs and expected outputs. This time, you must implement the **decryption** direction of CBC mode.

__Args:__
- key        (bytes): bytestring containing the symmetric key (cipher.BLOCK_SIZE-bytes long)
- ciphertext (bytes): bytestring corresponding to the ciphertext (multiple cipher.BLOCK_SIZE-bytes long)
- iv         (bytes): bytestring of an IV that should be used for the CBC decryption (cipher.BLOCK_SIZE-bytes long)
- cipher    (Cipher): Cipher class (check `sample_cipher.py`) that provides access to a sample block cipher, where you can use the command cipher.decipher(hexKey, ciphertext)

__Output:__
ret (bytes): bytestring with an arbitrary length, with the padding removed
    
__Test vectors:__
    You can use the same test vectors from `q2_enc_cbc_mode` in the reverse order to double check your solution.

_Points:_ 1

In [35]:
def q3_dec_cbc_mode(key, ciphertext, iv, cipher=Sample_Cipher):

    # get block size
    block_size = len(key)

    # slice the ciphertext into blocks
    cipher_blocks = slice_into_blocks(ciphertext, block_size)

    message = b''

    # set first value to be XORed to iv
    prev_block = iv

    for block in cipher_blocks:
        decrypted_block = cipher.decipher(key, block)
        message += strxor(decrypted_block, prev_block)
        prev_block = block

    return q1_unpad(message, block_size)
    

In [43]:
grader.check("q3")

### Question 4: Implement Counter (CTR) Mode encryption (using the provided block cipher `cipher`)

__Your Task__: Write the function `q4_enc_ctr_mode` that encrypts a message using Counter (CTR) mode. As before, we recommend that you review the picture of CTR mode from Lecture 17.

_Note:_ You can assume that the `BLOCK_SIZE` is at least 4 bytes, so the nonce you get as an input will always have a length of 4 bytes less than the `BLOCK_SIZE` of the cipher given. So make sure to append a counter of size 4 bytes to your nonce when using it. You can also assume that we would never  send a message with more than $2^{32}$ blocks; in other words, the counter will never go past `UINT32_MAX` (`0xFFFFFFFF`).  

__Args:__ Same inputs as in Question 2 (except the `iv` is now called a `nonce`).
- key     (bytes)
- message (bytes)
- nonce   (bytes)
- cipher (Cipher)

                        
__Output:__
ret     (bytes):  ciphertext (arbitrary length)

__Test vectors:__

    q4_enc_ctr_mode(key=bytes.fromhex("4c55061b9e3d802b64897306af2389a1"), message=b"qeN", nonce=bytes.fromhex("a7314e0f243701914bf02b08"), cipher=Sample_Cipher()) == bytes.fromhex("d4731a")
    q4_enc_ctr_mode(key=bytes.fromhex("7b2937e962319e03aec2d26c8d681e06"), message=b"}9&|:WQ",nonce=bytes.fromhex("a5466611ff4369a8267ebd60"), cipher=Sample_Cipher()) == bytes.fromhex("1bb8c0d40626a7")

__Your response:__

_Points:_ 1

In [95]:
def q4_enc_ctr_mode(key, message, nonce, cipher=Sample_Cipher):

    block_size = len(key) 
    padded_msg = q1_pad(message, block_size) 
    blocks = slice_into_blocks(padded_msg, block_size)

    ciphertext = b''
    counter = 0

    for block in blocks:
        keystream = cipher.encipher(key, nonce + counter.to_bytes(4, 'big'))
        ciphertext += strxor(block, keystream)
        counter += 1

    return ciphertext

In [96]:
q4_enc_ctr_mode(key=bytes.fromhex("4c55061b9e3d802b64897306af2389a1"), message=b"qeN", nonce=bytes.fromhex("a7314e0f243701914bf02b08"), cipher=Sample_Cipher()) == bytes.fromhex("d4731a")

4


False

In [69]:
grader.check("q4")

### Question 5: Implement Counter (CTR) Mode **decryption** (using the provided block cipher `cipher`)
__Your Task:__
    The problem description is similar to the one in the previous problem, just note the different inputs and expected outputs

__Args:__ Same inputs as in Question 3 (except the `iv` is now called a `nonce`).
- key (bytes)
- ciphertext (bytes)
- nonce (bytes)
- cipher (Cipher)

__output:__ ret (bytes): bytestring corresponding to the decrypted message

__Test vectors:__
You can use the same test vectors from `q4_enc_ctr_mode` in the reverse order to double check your solution

_Points:_ 1

In [57]:
def q5_dec_ctr_mode(key, ciphertext, nonce, cipher=Sample_Cipher):

    block_size = len(key)
    padded_msg = q1_pad(ciphertext, block_size) 
    blocks = slice_into_blocks(padded_msg, block_size)

    ciphertext = b''
    counter = 0

    for block in blocks:
        keystream = cipher.encipher(key, nonce + bytes(counter))
        ciphertext += strxor(block, keystream)
        counter += 1

    return ciphertext

In [58]:
grader.check("q5")

## Submitting the Assignment

Please follow these instructions to complete the assignment and submit it for credit.

**Documenting collaborators, sources, and AI tools:** In accordance with the collaboration policy, use the space below to report if you used any resources to complete this homework assignment, aside from the lecture notes and the course textbooks/videos. Specifically, please report:

1. Names of all classmates you worked with, and a short description of the work that you performed together.
2. All written materials that you used, such as books or websites (besides the lecture notes or textbooks). Please include links to any web-based resources, or citations to any physical works.
3. All code that you used from other sources. In particular, if you used an AI tool, then you must include the entire exchange with the AI tool, as per the [CDS Generative AI Assistance Policy](https://www.bu.edu/cds-faculty/culture-community/gaia-policy/).

Remember that if we discover any undocumented collaborators, sources, or AI tools then this is grounds for a grade penalty and referral to BU's Academic Conduct Committee (as described in the syllabus).

_Your response:_

1.

2.

3.

**Sending to Gradescope:** After completing the assignment:
- if you did the assignment on Colab, download it in `.ipynb` format.
- if you did the assignment locally on your machine, all you need to do is to find it in your directory.

Then, submit only the `.ipynb` file to this week's programming assignment on Gradescope. It may take a few seconds or a minute for the auto-grading system to check your work.

Remember that you can submit as many times as you want until the deadline for the assignment; only your last score counts.

## Submission

Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit. **Please save before exporting!**

Submit your assignment on Gradescope once you have passed enough tests to receive at least 4 of the 5 available points.

In [26]:
# Save your notebook first, then run this cell to export your submission.
grader.export(run_tests=True)



UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 16829: character maps to <undefined>