# **Lab 03** - ISA Assembler Design (Part 1)
In this lab, you will be desgning an assembler to **pre-process** an assembly code.

**TODO**
* Pre-process assembly code ('example.asm') by completing Tasks 1 - 8
* Save the pre-processed code to a '.txt' file (Task 9)

_Note:_ Some portions of the code have already been implemented for you such as reading the assembly code file, converting the register names to their equivalent values and printing the processed instructions and labels. Also, assume that inputs for a task are outputs from the previous task.

In [None]:
from google.colab import files

files.upload()


Saving example.asm to example.asm


{'example.asm': b'# Implement a RISC-V program to multiply two numbers using loop adder\r\nmain:\r\n    lw   t3, 0(s1)\r\n    addi a0, zero, 550   # load first operand\r\n    addi a1, zero, 20    # load second operand\r\n    addi t0, zero, 0   \r\n    beq  a0, zero, done  # if the first operand is equal to 0, then goto done\r\n    beq  a1, zero, done  # if the second operand is equal to 0, then goto done\r\nloop:\r\n    add  t0, a0, t0      \r\n    addi a1, a1, -1\r\n    bne  a1, zero, loop\r\ndone:\r\n    add  a0, t0, zero\r\n    sw   a0, 0(s1)\r\n'}

lw 7 9 0
addi 10 0 10
addi 11 0 20
bge 10 11 12
add 12 11 0
jal 1 12
add 12 10 0
jal 1 4
sw 9 10 0


In [None]:
import re
import csv

## Function to read the assembly code file ##
def read(filename):
    '''read each line from a file'''
    asm_inst = list()
    with open(filename, 'r') as f:
        for line in f:
            asm_inst.append(line)
    return asm_inst

## Function to get the equivalent register's value
def get_reg_value(reg_name):
    '''gets the equivalen value for the respective register name'''
    reg_abi = {"zero": 0,"ra": 1,"sp": 2,"gp": 3,"tp": 4,"t0": 5,"t1": 6,"t2": 7,
               "s0": 8,"s1": 9,"a0": 10,"a1": 11,"a2": 12,"a3": 13,"a4": 14,"a5": 15,
               "a6": 16,"a7": 17,"s2": 18,"s3": 19,"s4": 20,"s5": 21,"s6": 22,"s7": 23,
               "s8": 24,"s9": 25,"s10": 26,"s11": 27,"t3": 28,"t4": 29,"t5": 30,"t6": 31}
    if reg_name[0].lower() in 'x':
        return int(reg_name[1:])
    elif reg_name in reg_abi:
        return reg_abi[reg_name]
    elif reg_name.isdecimal():
        return int(reg_name)
    else:
        raise ValueError(f"Invalid register name/value: {reg_name}")


## FOR TESTING: Function to print the instructions
def print_asm_inst(inst_asm):
    '''prints list of instructions'''
    print("Assembly Instructions:")
    if len(inst_asm) == 0:
        print(None)
    else:
        for line in inst_asm:
            print(line)

## ## FOR TESTING: Function to print the labels
def print_asm_labels(labels):
    '''prints list of labels'''
    print("Assembly Labels:")
    if len(labels) == 0:
        print(None)
    else:
        max_len = max(5,max([len(label) for label in labels]))
        print(f"{'LABEL':<{max_len}} | {'VALUE':>5}")
        for label, val in labels.items():
            print(f"{label:<{max_len}} | {val:>5}")



inst_asm = [] # list to store instructions
labels   = [] # list to store labels

## reads assembly code and stores it in list of lists 'inst_asm' where axis 0 (rows) corresponds
## to each line in the file and axis 1 (columns) corresponds to each argument in that instruction
filename = "example.asm"
inst_asm = read(filename)
print_asm_inst(inst_asm)

Assembly Instructions:
# Implement a RISC-V program to multiply two numbers using loop adder

main:

    lw   t3, 0(s1)

    addi a0, zero, 550   # load first operand

    addi a1, zero, 20    # load second operand

    addi t0, zero, 0   

    beq  a0, zero, done  # if the first operand is equal to 0, then goto done

    beq  a1, zero, done  # if the second operand is equal to 0, then goto done

loop:

    add  t0, a0, t0      

    addi a1, a1, -1

    bne  a1, zero, loop

done:

    add  a0, t0, zero

    sw   a0, 0(s1)



## **Task 1**
Implement a function to remove comments

_Note:_ A comment in assembly code starts with a '#' symbol. You are required to remove all comments from the code. (_Hint:_ Use Regular expressions)

In [None]:
def remove_comments(inst_asm):
    # -- enter your code here
    '''Removes comments from assembly instructions while keeping empty lines'''
    cleaned_asm = []
    comment_pattern = re.compile(r'#.*')  # Regular expression to match comments

    for line in inst_asm:
        # Remove comments from the line
        cleaned_line = re.sub(comment_pattern, '', line)
        # Append the line to the cleaned list; this includes lines that become empty after removing comments
        cleaned_asm.append(cleaned_line)

    return cleaned_asm
    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = remove_comments(inst_asm)
print_asm_inst(inst_asm)

Assembly Instructions:


main:

    lw   t3, 0(s1)

    addi a0, zero, 550   

    addi a1, zero, 20    

    addi t0, zero, 0   

    beq  a0, zero, done  

    beq  a1, zero, done  

loop:

    add  t0, a0, t0      

    addi a1, a1, -1

    bne  a1, zero, loop

done:

    add  a0, t0, zero

    sw   a0, 0(s1)



## **Task 2**
Implement a function to split each line (instruction or label) into separate arguments

_Note:_ Using `inst_asm` list as input, split each line into separate arguments. Possible delimiters can be space, comma, parantheses. (_Hint:_ Use Regular expressions)

_Example:_

**addi a1, a2, 10**

changes to:

**['addi', 'a1', 'a2', '10']**




In [None]:
def split_arg(inst_asm):
    # -- enter your code here
    '''Splits each assembly instruction into separate arguments'''
    split_asm = []
    # Define a regular expression pattern to match spaces, commas, or parentheses
    arg_pattern = re.compile(r'\s+|,|\(|\)')

    for line in inst_asm:
        # Split the line by the defined delimiters and filter out any empty strings
        args = list(filter(None, re.split(arg_pattern, line)))
        split_asm.append(args)

    return split_asm
    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = split_arg(inst_asm)
print_asm_inst(inst_asm)

Assembly Instructions:
[]
['main:']
['lw', 't3', '0', 's1']
['addi', 'a0', 'zero', '550']
['addi', 'a1', 'zero', '20']
['addi', 't0', 'zero', '0']
['beq', 'a0', 'zero', 'done']
['beq', 'a1', 'zero', 'done']
['loop:']
['add', 't0', 'a0', 't0']
['addi', 'a1', 'a1', '-1']
['bne', 'a1', 'zero', 'loop']
['done:']
['add', 'a0', 't0', 'zero']
['sw', 'a0', '0', 's1']


## **Task 3**
Implement a function to remove empty lines

_Note:_ Using `inst_asm` list as input, remove all the empty lists

In [None]:
def remove_empty(inst_asm):
    # -- enter your code here

    '''Removes all the empty lists from the list of assembly instructions'''
    # Use a list comprehension to filter out any empty lists
    cleaned_asm = [inst for inst in inst_asm if inst]

    return cleaned_asm
    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = remove_empty(inst_asm)
print_asm_inst(inst_asm)

Assembly Instructions:
['start:']
['lw', 't2', '0', 's1']
['addi', 'a0', 'zero', '10']
['addi', 'a1', 'zero', '20']
['bge', 'a0', 'a1', 'done']
['add', 'a2', 'a1', 'zero']
['jal', 'ra', 'exit']
['done:']
['add', 'a2', 'a0', 'zero']
['jal', 'ra', 'exit']
['exit:']
['sw', 'a0', '0', 's1']


## **Task 4**
Implement a function to reoder arguments for load/save instructions

_Note:_ Load and save follow different instruction formats. Therefore, their arguments need to be re-ordered:

_Example:_ For `load` instructions -

**[instruction, rd, imm, rs1]**

should be changed to the following standard format:

**[instruction, rd, rs1, imm]**

_Example:_ For `store` instructions -

**[instruction, rs1, imm, rd]**

should be changed to the following standard format:

**[instruction, rd, rs1, imm]**

In [None]:
def loadsave_arg_reorder(inst_asm):
    # -- enter your code here
    '''Reorders arguments for load and save instructions to a standard format'''
    reordered_asm = []
    # Define patterns for load and save instructions for identification
    load_pattern = ['lb', 'lh', 'lw', 'lbu', 'lhu']  # Add more as needed
    save_pattern = ['sb', 'sh', 'sw']  # Add more as needed

    for inst in inst_asm:
        # Skip labels and other non-instruction lines
        if len(inst) < 2 or ':' in inst[0]:
            reordered_asm.append(inst)
            continue

        if inst[0] in load_pattern:
            # Load instruction format is already [instruction, rd, imm, rs1]
            # Just need to reorder imm and rs1
            reordered_inst = [inst[0], inst[1], inst[3], inst[2]]
        elif inst[0] in save_pattern:
            # Store instruction format is [instruction, rs1, imm, rd]
            # Need to reorder to [instruction, rd, rs1, imm]
            reordered_inst = [inst[0], inst[3], inst[1], inst[2]]
        else:
            # For other instructions, keep the original format
            reordered_inst = inst

        reordered_asm.append(reordered_inst)

    return reordered_asm
    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = loadsave_arg_reorder(inst_asm)
print_asm_inst(inst_asm)

Assembly Instructions:
['start:']
['lw', 't2', 's1', '0']
['addi', 'a0', 'zero', '10']
['addi', 'a1', 'zero', '20']
['bge', 'a0', 'a1', 'done']
['add', 'a2', 'a1', 'zero']
['jal', 'ra', 'exit']
['done:']
['add', 'a2', 'a0', 'zero']
['jal', 'ra', 'exit']
['exit:']
['sw', 's1', 'a0', '0']


## **Task 5**
Implement a function to separate labels from instructions and save their equivalent value

_Note:_ Using `inst_asm` list as input, remove all lines that contain just labels to get the resulting list of only instructions. For the removed labels, calculate their equivalent address and store them in a dictionary called `labels`, where key will be the 'label name' and value will be its equivalent 'address'. Assume that first instruction is stored at address 0 and every instruction requires 4 bytes, therefore, the second instruction will be at address 4, third instruction will be at address 8 and so on. Also, a labels address is same as the address of its first instruction.

In [None]:
def seperate_labels(inst_asm):
    labels = dict() # dictionary to store the label name and its equivalent address as a key-value pair
    # -- enter your code here
    instructions = []  # List to store instructions without labels
    address = 0  # Initial address for the first instruction

    for inst in inst_asm:
        if len(inst) == 1 and inst[0].endswith(':'):  # Check if the line is a label
            label = inst[0].rstrip(':')  # Remove the colon from the label name
            labels[label] = address  # Assign the current address to the label
        else:
            instructions.append(inst)  # Add the line to the list of instructions
            address += 4  # Increment the address for the next instruction

    return instructions, labels

    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm, labels = seperate_labels(inst_asm)
print_asm_inst(inst_asm)
print('-'*30)
print_asm_labels(labels)

Assembly Instructions:
['lw', 't2', 's1', '0']
['addi', 'a0', 'zero', '10']
['addi', 'a1', 'zero', '20']
['bge', 'a0', 'a1', 'done']
['add', 'a2', 'a1', 'zero']
['jal', 'ra', 'exit']
['add', 'a2', 'a0', 'zero']
['jal', 'ra', 'exit']
['sw', 's1', 'a0', '0']
------------------------------
Assembly Labels:
LABEL | VALUE
start |     0
done  |    24
exit  |    32


## **Task 6**
Implement a function to replace all integers in string format to `int` data type

_Note:_ Using `inst_asm` list as input, find integers in each instruction and change their data type from `string` to `int`

In [None]:
def replace_string_int(inst_asm):
    # -- enter your code here
    for i, inst in enumerate(inst_asm):
        for j, arg in enumerate(inst):
            if arg.lstrip('-').isdigit():  # Check if the argument represents an integer
                inst_asm[i][j] = int(arg)  # Convert to int and replace in the instruction

    return inst_asm
    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = replace_string_int(inst_asm)
print_asm_inst(inst_asm)
print('-'*30)
print_asm_labels(labels)

Assembly Instructions:
['lw', 't2', 's1', 0]
['addi', 'a0', 'zero', 10]
['addi', 'a1', 'zero', 20]
['bge', 'a0', 'a1', 'done']
['add', 'a2', 'a1', 'zero']
['jal', 'ra', 'exit']
['add', 'a2', 'a0', 'zero']
['jal', 'ra', 'exit']
['sw', 's1', 'a0', 0]
------------------------------
Assembly Labels:
LABEL | VALUE
start |     0
done  |    24
exit  |    32


## **Task 7**
Implement a function to replace all the labels in instructions to their equivalent values

_Note:_ Using `inst_asm` list as input, find labels in each instruction and replace them with their equivalent value.

_Example:_ If there is an instruction **['bge', 'a0', 'a1', 'done']** at address 8 and label `done` is at address 20 (as calculated from Task 5), then the resulting instruction will be **['bge', 'a0', 'a1', 12]**

In [None]:
def replace_labels(inst_asm, labels):
    # -- enter your code here
    updated_asm = []  # List to store updated instructions
    instruction_address = 0  # Initial address of the first instruction

    for inst in inst_asm:
        updated_inst = inst[:]  # Create a copy of the current instruction to modify
        if inst[-1] in labels:  # Check if the last argument is a label
            label_address = labels[inst[-1]]  # Get the address of the label
            relative_address = label_address - instruction_address  # Calculate relative address
            updated_inst[-1] = relative_address  # Replace the label with its relative address
        updated_asm.append(updated_inst)
        instruction_address += 4  # Increment the address for the next instruction

    return updated_asm
    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = replace_labels(inst_asm, labels)
print_asm_inst(inst_asm)
print('-'*30)
print_asm_labels(labels)

Assembly Instructions:
['lw', 't2', 's1', 0]
['addi', 'a0', 'zero', 10]
['addi', 'a1', 'zero', 20]
['bge', 'a0', 'a1', 12]
['add', 'a2', 'a1', 'zero']
['jal', 'ra', 12]
['add', 'a2', 'a0', 'zero']
['jal', 'ra', 4]
['sw', 's1', 'a0', 0]
------------------------------
Assembly Labels:
LABEL | VALUE
start |     0
done  |    24
exit  |    32


## **Task 8**
Implement a function to replace all the register names with their equivalent values

_Note:_ Use the function `get_reg_value()`, already implemented for you to replace register names with their equivalent values

In [None]:
def replace_reg(inst_asm):
    # -- enter your code here
    updated_asm = []  # List to store updated instructions

    for inst in inst_asm:
        updated_inst = []  # List to store the updated instruction
        for arg in inst:
            if isinstance(arg, str):  # Check if the argument is a string
                try:
                    # Attempt to replace register names with their equivalent values
                    reg_value = get_reg_value(arg)
                    updated_inst.append(reg_value)
                except ValueError:
                    # If it's not a register name, keep the original argument
                    updated_inst.append(arg)
            else:
                # If the argument is not a string, it's already an int or another type, so keep it as is
                updated_inst.append(arg)
        updated_asm.append(updated_inst)

    return updated_asm

    # -- end your code here

## -- check your output by uncommenting the lines below -- ##
inst_asm = replace_reg(inst_asm)
print_asm_inst(inst_asm)
print('-'*30)
print_asm_labels(labels)

Assembly Instructions:
['lw', 7, 9, 0]
['addi', 10, 0, 10]
['addi', 11, 0, 20]
['bge', 10, 11, 12]
['add', 12, 11, 0]
['jal', 1, 12]
['add', 12, 10, 0]
['jal', 1, 4]
['sw', 9, 10, 0]
------------------------------
Assembly Labels:
LABEL | VALUE
start |     0
done  |    24
exit  |    32


## **Task 9**
Implement a function to save the processed assembly code to a `.txt` file

In [None]:
def save_asm(inst_asm, filename):
    # -- enter your code here
    with open(filename, 'w') as file:
        for inst in inst_asm:
            # Convert each instruction's arguments into a string and join them with a space
            inst_line = ' '.join(map(str, inst))
            file.write(inst_line + '\n')  # Write the instruction string to the file, followed by a newline

    # -- end your code here

## -- save your final output to a .txt file by uncommenting the lines below -- ##
save_asm(inst_asm, filename[:-4]+"_out1.txt")