# **Lab 04** - ISA Assembler Design (Part 2)
In this lab, your task is to utilize the **pre-processed assembly code** obtained in part 1 and convert it to **machine code**.

**TODO**
* Complete tasks
* Save code to a `.bin` file

_Note:_ Some portions of the code have already been implemented for you, such as reading the input file (`_out1.txt`), adjusting the `.txt` file format into a list of instructions, printing the processed instructions, and getting the instruction's `format`, `opcode`, `funct3`, and `funct7` based on its name. Also, assume that inputs for a task are outputs from the previous task.

In [None]:
from google.colab import files

files.upload()

Saving example2_out1.txt to example2_out1.txt


{'example2_out1.txt': b'lw 28 9 0\naddi 10 0 550\naddi 11 0 20\naddi 5 0 0\nbeq 10 0 20\nbeq 11 0 16\nadd 5 10 5\naddi 11 11 -1\nbne 11 0 -8\nadd 10 5 0\nsw 9 10 0\n'}

In [None]:
import re
import csv

## Function to read .txt file with pre-processed assembly code
def read_processed(filename):
    '''read each line from a file'''
    asm_inst = list()
    with open(filename, 'r') as f:
        for line in f:
            asm_inst.append([(int(arg) if re.fullmatch("[+-]?[0-9]+",arg) else arg) for arg in line.split()])
    return asm_inst

## Function to print the instructions
def print_asm_inst(inst_asm):
    '''prints list of instructions'''
    print("Assembly Instructions:")
    if len(inst_asm) == 0:
        print(None)
    else:
        for line in inst_asm:
            print(line)

## Read csv file containing the format for each type of instruction
def get_isa(filename):
        isa = dict()
        with open(filename, newline='') as f:
            data = csv.reader(f)
            header = next(data)
            for row in data:
                isa[row[0]] = {header[1]: row[1], header[2]: row[2], header[3]: row[3], header[4]: row[4]}
        return isa

isa = get_isa('rv32im_isa.csv')

## Gets the encoding for the respective instruction
def get_inst_format(inst_name, isa=isa):
    '''gets the instruction's format based on its name'''
    try:
        val = isa[inst_name]['format']
    except:
        raise KeyError(f"Invalid instruction: {inst_name}")
    if val == 'None':
        raise ValueError(f"Instruction '{inst_name}' does not have a format")
    return val

def get_inst_opcode(inst_name, isa=isa):
    '''gets the instruction's opcode based on its name'''
    try:
        val = isa[inst_name]['opcode']
    except:
        raise KeyError(f"Invalid instruction: {inst_name}")
    if val == 'None':
        raise ValueError(f"Instruction '{inst_name}' does not have an opcode")
    return val

def get_inst_funct3(inst_name, isa=isa):
    '''gets the instruction's funct3 based on its name'''
    try:
        val = isa[inst_name]['funct3']
    except:
        raise KeyError(f"Invalid instruction: {inst_name}")
    if val == 'None':
        raise ValueError(f"Instruction '{inst_name}' does not have 'funct3'")
    return val

def get_inst_funct7(inst_name, isa=isa):
    '''gets the instruction's funct7 based on its name'''
    try:
        val = isa[inst_name]['funct7']
    except:
        raise KeyError(f"Invalid instruction: {inst_name}")
    if val == 'None':
        raise ValueError(f"Instruction '{inst_name}' does not have 'funct7'")
    return val



## Convert int to bin (signed 2's C or unsigned)
def get_2c_binary(integer:int, bits=32, is_signed=True):
        '''converts integert to binary size bits.
        If is_signed=True, then converts to 2's Complement binary;
        Otherwise, converts to unsigned binary'''
        limit = 2**(bits)
        if is_signed:
            if (int(integer) < -limit/2) or (int(integer) >= limit/2):
                raise ValueError(f"Value outside of range: {integer}.\nMust be between [{-limit/2}, {limit/2}).")
        else:
            if (int(integer) < 0) or (int(integer) >= limit):
                raise ValueError(f"Value outside of range: {integer}.\nMust be between [0, {limit}).")
        # if no issues:
        return format(int(integer) & (limit-1), f"0{bits}b")

In [None]:
 # list to store instructions
inst_asm = []

## reads assembly code and stores it in list of lists 'inst_asm' where axis 0 (rows) corresponds
## to each line in the file and axis 1 (columns) corresponds to each argument in that instruction
filename = "example2_out1.txt"
inst_asm = read_processed(filename)
# inst_asm = [[arg for arg in line.split()] for line in inst_asm]
print_asm_inst(inst_asm)

Assembly Instructions:
['lw', 28, 9, 0]
['addi', 10, 0, 550]
['addi', 11, 0, 20]
['addi', 5, 0, 0]
['beq', 10, 0, 20]
['beq', 11, 0, 16]
['add', 5, 10, 5]
['addi', 11, 11, -1]
['bne', 11, 0, -8]
['add', 10, 5, 0]
['sw', 9, 10, 0]


## **Task 1**
<span style="color:black; background-color:#C5E0B4; border: 1px solid; padding: 5px;">Implement a function that converts the above instructions from `inst_asm` into machine code.</span>

You will need the following functions:
- `get_inst_format()` ✓
- `get_inst_opcode()`
- `get_inst_funct3()`
- `get_inst_funct7()`
- `get_2c_binary()`

_**Note:** python indexing is <u>backwards</u> compared to how we conventionally index in hardware design. This is important to know when implementing the `imm` (immediate) field._

_**Note:** since immediate values can be <u>negative</u>, we must account for this when converting integers._

In [None]:
def get_machine_code(inst_asm):
    '''converts the assembly code to machine code'''
    inst_bin = [] # holds the final result after calling the appropriate functions

    for line in inst_asm:
        inst_name = line[0]
        match (get_inst_format(inst_name)):

            # -- enter your code here

            # R-type
            case 'R':
                # get fields
                opcode = get_inst_opcode(inst_name)
                funct3 = get_inst_funct3(inst_name)
                funct7 = get_inst_funct7(inst_name)
                rd = get_2c_binary(line[1],5, False)
                rs1 = get_2c_binary(line[2],5, False)
                rs2 = get_2c_binary(line[3], 5, False)

                # assemble instruction
                code = f"{funct7}{rs2}{rs1}{funct3}{rd}{opcode}"
                inst_bin.append(code) # append machine code to result

            # I-type
            ## ... ##
            case 'I':
                opcode = get_inst_opcode(inst_name)
                funct3 = get_inst_funct3(inst_name)
                rd = get_2c_binary(line[1] , 5, False)
                rs1 = get_2c_binary(line[2], 5,  False)
                imm = get_2c_binary(int(line[3]), 12) # Adjust the number of bits for the immediate field

                # assemble instruction
                code = f"{imm}{rs1}{funct3}{rd}{opcode}"
                inst_bin.append(code) # append machine code to result

            # S-type
            case 'S':
                opcode = get_inst_opcode(inst_name)
                funct3 = get_inst_funct3(inst_name)
                rs1 = get_2c_binary(line[2], 5, False)  # Source register 1
                rs2 = get_2c_binary(line[1], 5, False)  # Source register 2

                # Extract and format the immediate value bits
                imm1 = get_2c_binary(line[3])[20:27]
                imm2 = get_2c_binary(line[3])[27:32]

                code = f"{imm1}{rs2}{rs1}{funct3}{imm2}{opcode}"
                inst_bin.append(code) # append machine code to result

            # B-type
            case 'B':
                opcode = get_inst_opcode(inst_name)
                funct3 = get_inst_funct3(inst_name)
                rs1 = get_2c_binary(line[1], 5,  False)
                rs2 = get_2c_binary(line[2], 5, False)
                imm1 = get_2c_binary(line[3],12)[0:7]
                imm2 = get_2c_binary(line[3],12)[7:12]

                # assemble instruction
                code = f"{imm1}{rs2}{rs1}{funct3}{imm2}{opcode}"
                inst_bin.append(code) # append machine code to result


            # J-type
            case 'J':
                opcode = get_inst_opcode(inst_name)
                rd = get_2c_binary(line[1], 5, False)
                imm = get_2c_binary(line[2], 21)[0:20]

                # assemble instruction
                code = f"{imm}{rd}{opcode}"
                inst_bin.append(code) # append machine code to result



            # -- end your code here


            case _: # Other (default)
                code = "0"*32 # assemble instruction: NOP
                inst_bin.append(code) # append machine code to result

    return inst_bin


## -- check your output by uncommenting the lines below -- ##
inst_bin = get_machine_code(inst_asm)
print_asm_inst(inst_bin)

Assembly Instructions:
00000000000001001000111000000011
00100010011000000000010100010011
00000001010000000000010110010011
00000000000000000000001010010011
00000000000001010000101001100011
00000000000001011000100001100011
00000000010101010000001010110011
11111111111101011000010110010011
11111110000001011001110001100011
00000000000000101000010100110011
00000000100101010010000000100011


In [None]:
'{0:05b}'.format(10)

'01010'

## **Task 2**
<span style="color:black; background-color:#C5E0B4; border: 1px solid; padding: 5px;">Implement a function to save the processed assembly code to a `.bin` file.</span>

In [None]:
# Here's a Python function that takes a string of binary instruction data and a filename,
# then writes the binary data to a file with the given filename.

def save_bin(inst_bin, filename):
    # Open the file in binary write mode
    with open(filename, 'wb') as bin_file:
      # If inst_bin is a list of hex strings, we join them without spaces
        if isinstance(inst_bin, list):
            inst_bin = ''.join(inst_bin)
        # Convert the string of binary data to actual binary data
        bin_data = bytes.fromhex(inst_bin)
        # Write the binary data to the file
        bin_file.write(bin_data)

## -- save your final output to a .bin file by uncommenting the lines below -- ##
save_bin(inst_bin, filename[:-5]+"2.bin")