In [2]:
import re

# Instruction lists from your conventions
r_type = ['add', 'sub', 'addu', 'subu', 'madd', 'maddu', 'mul', 'and', 'or', 'not', 'xor', 'slt']
i_type = ['addi', 'addiu', 'andi', 'ori', 'xori', 'sll', 'srl', 'sla', 'sra', 'lw', 'sw', 'lui',
          'beq', 'bne', 'bgt', 'bgte', 'blt', 'bleq', 'bleu', 'bgtu', 'slti', 'seq']
j_type = ['j', 'jr', 'jal']

# Function codes for R-type (1 to 12, as in your ALUControl)
r_type_funct = {
    'add': 1, 'sub': 3, 'addu': 2, 'subu': 4, 'madd': 5, 'maddu': 6,
    'mul': 7, 'and': 8, 'or': 9, 'not': 10, 'xor': 11, 'slt': 12
}

# Opcodes for I-type: 100000 + (index+1)
i_type_opcode = {inst: 0b100000 + (i + 1) for i, inst in enumerate(i_type)}

# Opcodes for J-type
j_type_opcode = {'j': 0b010001, 'jr': 0b010010, 'jal': 0b010011}

# Register mappings
registers = {
    '$zero': 0,
    '$s0': 1, '$s1': 2, '$s2': 3, '$s3': 4, '$s4': 5, '$s5': 6, '$s6': 7, '$s7': 8,
    '$t0': 9, '$t1': 10, '$t2': 11, '$t3': 12, '$t4': 13, '$t5': 14, '$t6': 15, '$t7': 16, '$t8': 17,
    '$a0': 18, '$a1': 19, '$a2': 20, '$a3': 21,
    '$ra': 31  # Added for jal
}

def to_bin(value, width):
    """Convert an integer to a binary string of specified width, removing '0b' prefix."""
    if value < 0:
        value = (1 << width) + value  # Handle negative numbers for two's complement
    return format(value & ((1 << width) - 1), f'0{width}b')

def parse_instruction(instruction):
    """Parse a MIPS instruction and return its 32-bit binary encoding."""
    # Remove comments and extra whitespace
    instruction = instruction.split('#')[0].strip()
    if not instruction:
        return None

    # Split into tokens
    tokens = re.split(r'[,\s]+', instruction.strip())
    tokens = [t for t in tokens if t]  # Remove empty tokens
    if not tokens:
        return None

    inst = tokens[0].lower()
    args = tokens[1:]

    if inst in r_type:
        # R-type: [opcode:6][rs:5][rt:5][rd:5][shamt:5][funct:6]
        if inst == 'not':
            # not $rd, $rs (special case, no rt)
            if len(args) != 2:
                raise ValueError(f"R-type 'not' expects 2 args, got: {args}")
            rd, rs = args
            rt = '$zero'  # Implicitly use $zero for rt
        else:
            if len(args) != 3:
                raise ValueError(f"R-type expects 3 args, got: {args}")
            rd, rs, rt = args

        opcode = 0  # 000000
        rs_num = registers.get(rs, int(rs.replace('$', '')))
        rt_num = registers.get(rt, int(rt.replace('$', '')))
        rd_num = registers.get(rd, int(rd.replace('$', '')))
        shamt = 0
        funct = r_type_funct[inst]

        binary = (to_bin(opcode, 6) + 
                  to_bin(rs_num, 5) + 
                  to_bin(rt_num, 5) + 
                  to_bin(rd_num, 5) + 
                  to_bin(shamt, 5) + 
                  to_bin(funct, 6))
        return binary

    elif inst in i_type:
        # I-type: [opcode:6][rs:5][rt:5][immediate:16]
        opcode = i_type_opcode[inst]
        
        if inst in ['lw', 'sw']:
            # Format: lw/sw $rt, offset($rs)
            if len(args) != 2:
                raise ValueError(f"{inst} expects 2 args, got: {args}")
            rt, offset_rs = args
            match = re.match(r'(-?\d+)\((\$\w+)\)', offset_rs)
            if not match:
                raise ValueError(f"Invalid memory format: {offset_rs}")
            imm = int(match.group(1))
            rs = match.group(2)
        elif inst in ['sll', 'srl', 'sla', 'sra']:
            # Format: shift $rd, $rt, imm (treating as I-type with rt as destination)
            if len(args) != 3:
                raise ValueError(f"{inst} expects 3 args, got: {args}")
            rd, rt, imm = args
            rs = '$zero'  # No source register
            rt = rd  # Destination is rt field
            imm = int(imm)
        elif inst == 'lui':
            # Format: lui $rt, imm
            if len(args) != 2:
                raise ValueError(f"lui expects 2 args, got: {args}")
            rt, imm = args
            rs = '$zero'
            imm = int(imm)
        elif inst in ['beq', 'bne', 'bgt', 'bgte', 'blt', 'bleq', 'bleu', 'bgtu']:
            # Format: branch $rs, $rt, offset
            if len(args) != 3:
                raise ValueError(f"{inst} expects 3 args, got: {args}")
            rs, rt, imm = args
            imm = int(imm)
        else:
            # Format: inst $rt, $rs, imm
            if len(args) != 3:
                raise ValueError(f"{inst} expects 3 args, got: {args}")
            rt, rs, imm = args
            imm = int(imm)

        rs_num = registers.get(rs, int(rs.replace('$', '')))
        rt_num = registers.get(rt, int(rt.replace('$', '')))

        binary = (to_bin(opcode, 6) + 
                  to_bin(rs_num, 5) + 
                  to_bin(rt_num, 5) + 
                  to_bin(imm, 16))
        return binary

    elif inst in j_type:
        # J-type: [opcode:6][address:26] or [opcode:6][rs:5][0:21] for jr
        opcode = j_type_opcode[inst]
        if inst == 'jr':
            if len(args) != 1:
                raise ValueError(f"jr expects 1 arg, got: {args}")
            rs = args[0]
            rs_num = registers.get(rs, int(rs.replace('$', '')))
            binary = to_bin(opcode, 6) + to_bin(rs_num, 5) + to_bin(0, 21)
        else:
            # j, jal: target address
            if len(args) != 1:
                raise ValueError(f"{inst} expects 1 arg, got: {args}")
            addr = int(args[0], 0)  # Handle decimal or hex (e.g., 0x1000)
            addr = addr >> 2  # Remove lower 2 bits (word-aligned)
            binary = to_bin(opcode, 6) + to_bin(addr, 26)
        return binary

    else:
        raise ValueError(f"Unknown instruction: {inst}")

def assemble(program):
    """Convert a MIPS program (list of instructions) to binary."""
    binary_instructions = []
    for line in program.split('\n'):
        try:
            binary = parse_instruction(line)
            if binary:
                binary_instructions.append(binary)
        except Exception as e:
            print(f"Error in line '{line}': {e}")
    return binary_instructions

# Example usage
if __name__ == "__main__":
    program = """
    addi $s0, $zero, 5
    add $s1, $s0, $s0
    beq $s0, $s1, 4
    j 0x1000
    lw $t0, 4($s0)
    sll $t1, $t0, 2
    jal 0x2000
    jr $ra
    """
    binary = assemble(program)
    for i, bin_inst in enumerate(binary):
        print(f"Instruction {i}: {bin_inst} (hex: {hex(int(bin_inst, 2))})")


helo
