Assembler for Nand2Tetris, Project 6

Purpose: Convert Hack Assembly files (symbolic) to Hack Machine Language files (binary)

In [1]:
from google.colab import files
uploaded = files.upload()

Saving Pong.asm to Pong.asm


In [2]:
def getFileString(fileName):
  with open(fileName+'.asm', 'r') as reader:
      # Read & print the entire file
      asmString = reader.read()
      #print(asmString)
  return asmString

def writeStringFile(fileName, HackStr):
  with open(fileName+'.hack', 'w') as fileW:
      # Read & print the entire file
      n = fileW.write(HackStr)
      #print(asmString)
  return

In [3]:
fileName = "Pong"
fileString = getFileString(fileName)
print(fileString)

// This file is part of www.nand2tetris.org
// and the book "The Elements of Computing Systems"
// by Nisan and Schocken, MIT Press.
// File name: projects/06/pong/Pong.asm

// The Pong game program was originally written in the high-level Jack language.
// The Jack code was then translated by the Jack compiler into VM code.
// The VM code was then translated by the VM translator into the Hack
// assembly code shown here.

@256
D=A
@SP
M=D
@133
0;JMP
@R15
M=D
@SP
AM=M-1
D=M
A=A-1
D=M-D
M=0
@END_EQ
D;JNE
@SP
A=M-1
M=-1
(END_EQ)
@R15
A=M
0;JMP
@R15
M=D
@SP
AM=M-1
D=M
A=A-1
D=M-D
M=0
@END_GT
D;JLE
@SP
A=M-1
M=-1
(END_GT)
@R15
A=M
0;JMP
@R15
M=D
@SP
AM=M-1
D=M
A=A-1
D=M-D
M=0
@END_LT
D;JGE
@SP
A=M-1
M=-1
(END_LT)
@R15
A=M
0;JMP
@5
D=A
@LCL
A=M-D
D=M
@R13
M=D
@SP
AM=M-1
D=M
@ARG
A=M
M=D
D=A
@SP
M=D+1
@LCL
D=M
@R14
AM=D-1
D=M
@THAT
M=D
@R14
AM=M-1
D=M
@THIS
M=D
@R14
AM=M-1
D=M
@ARG
M=D
@R14
AM=M-1
D=M
@LCL
M=D
@R13
A=M
0;JMP
@SP
A=M
M=D
@LCL
D=M
@SP
AM=M+1
M=D
@ARG
D=M
@SP
AM=M+1
M=D
@THIS
D

# File Preprocessing
First, we need to remove:
- comments (including inline comments)
- empty lines
- whitespaces


In [4]:
import re
def removeComments(string):
    string = re.sub(re.compile("/\*.*?\*/",re.DOTALL ) ,"" ,string) # remove all occurrences streamed comments (/*COMMENT */) from string
    string = re.sub(re.compile("//.*?$" ) ,"" ,string) # remove all occurrence single-line comments (//COMMENT\n ) from string
    return string

In [5]:
lineList = fileString.split('\n')
print(lineList)
rawlines = []

for line in lineList:
  #if (not line.startswith('/')) and (line):
    #print(line) <- doesnt deal with inline comments
  cleanedLine = removeComments(line)
  if(cleanedLine): #if not empty line
    cleanedLine = cleanedLine.replace(" ","")
    rawlines.append(cleanedLine)

print(rawlines)

['// This file is part of www.nand2tetris.org', '// and the book "The Elements of Computing Systems"', '// by Nisan and Schocken, MIT Press.', '// File name: projects/06/pong/Pong.asm', '', '// The Pong game program was originally written in the high-level Jack language.', '// The Jack code was then translated by the Jack compiler into VM code.', '// The VM code was then translated by the VM translator into the Hack', '// assembly code shown here.', '', '@256', 'D=A', '@SP', 'M=D', '@133', '0;JMP', '@R15', 'M=D', '@SP', 'AM=M-1', 'D=M', 'A=A-1', 'D=M-D', 'M=0', '@END_EQ', 'D;JNE', '@SP', 'A=M-1', 'M=-1', '(END_EQ)', '@R15', 'A=M', '0;JMP', '@R15', 'M=D', '@SP', 'AM=M-1', 'D=M', 'A=A-1', 'D=M-D', 'M=0', '@END_GT', 'D;JLE', '@SP', 'A=M-1', 'M=-1', '(END_GT)', '@R15', 'A=M', '0;JMP', '@R15', 'M=D', '@SP', 'AM=M-1', 'D=M', 'A=A-1', 'D=M-D', 'M=0', '@END_LT', 'D;JGE', '@SP', 'A=M-1', 'M=-1', '(END_LT)', '@R15', 'A=M', '0;JMP', '@5', 'D=A', '@LCL', 'A=M-D', 'D=M', '@R13', 'M=D', '@SP', 'AM=M

# First Pass for Labels
Next, we must:

- Create a table (dictionary) for labels
- Add the pre-defined symbols to the table
- Each time we see an "instruction" with label definition eg.(LABEL), add a pair to the symbol table

In [6]:
labelTable = {
    "R0": "0",
    "R1": "1",
    "R2": "2",
    "R3": "3",
    "R4": "4",
    "R5": "5",
    "R6": "6",
    "R7": "7",
    "R8": "8",
    "R9": "9",
    "R10": "10",
    "R11": "11",
    "R12": "12",
    "R13": "13",
    "R14": "14",
    "R15": "15",
    "SCREEN": "16384",
    "KBD": "24576",
    "SP": "0",
    "LCL": "1",
    "ARG": "2",
    "THIS": "3",
    "THAT": "4"
} #all predefined labels added to dict

labelRemovedLines = []
lineIndex = 0
for line in rawlines:
  #in this first pass, whenever we see label declaration add it to table
  if(line.startswith('(')): 
    labelTable[line[1:-1]] = str(lineIndex)
  else: 
    #if not label declaration, move on to next line
    labelRemovedLines.append(line)
    lineIndex = lineIndex+1 #labels do not count as lines (will not increase line count)

print(labelRemovedLines)
print(labelTable)

['@256', 'D=A', '@SP', 'M=D', '@133', '0;JMP', '@R15', 'M=D', '@SP', 'AM=M-1', 'D=M', 'A=A-1', 'D=M-D', 'M=0', '@END_EQ', 'D;JNE', '@SP', 'A=M-1', 'M=-1', '@R15', 'A=M', '0;JMP', '@R15', 'M=D', '@SP', 'AM=M-1', 'D=M', 'A=A-1', 'D=M-D', 'M=0', '@END_GT', 'D;JLE', '@SP', 'A=M-1', 'M=-1', '@R15', 'A=M', '0;JMP', '@R15', 'M=D', '@SP', 'AM=M-1', 'D=M', 'A=A-1', 'D=M-D', 'M=0', '@END_LT', 'D;JGE', '@SP', 'A=M-1', 'M=-1', '@R15', 'A=M', '0;JMP', '@5', 'D=A', '@LCL', 'A=M-D', 'D=M', '@R13', 'M=D', '@SP', 'AM=M-1', 'D=M', '@ARG', 'A=M', 'M=D', 'D=A', '@SP', 'M=D+1', '@LCL', 'D=M', '@R14', 'AM=D-1', 'D=M', '@THAT', 'M=D', '@R14', 'AM=M-1', 'D=M', '@THIS', 'M=D', '@R14', 'AM=M-1', 'D=M', '@ARG', 'M=D', '@R14', 'AM=M-1', 'D=M', '@LCL', 'M=D', '@R13', 'A=M', '0;JMP', '@SP', 'A=M', 'M=D', '@LCL', 'D=M', '@SP', 'AM=M+1', 'M=D', '@ARG', 'D=M', '@SP', 'AM=M+1', 'M=D', '@THIS', 'D=M', '@SP', 'AM=M+1', 'M=D', '@THAT', 'D=M', '@SP', 'AM=M+1', 'M=D', '@4', 'D=A', '@R13', 'D=D+M', '@SP', 'D=M-D', '@ARG', 'M

# Second Pass: Creating binary Hack file
In the second pass, we identify which instructions are A or C instructions. 
For A instructions, we:
- replace labels with their assigned instruction numbers using the previously built label table
- add variables to the table, replace them with their register values (addresses in memory)
- convert these from decimal numbers to binary numbers

For C instructions, we:
- divide each instruction into its parts (computation bits, destination bits, and jump bits)
- use pre-defined tables to translate these to binary

In [7]:
jumpTable = { #code created by hack language designers, provided by class
  "JGT": "001",
  "JEQ": "010",
  "JGE": "011",
  "JLT": "100",
  "JNE": "101",
  "JLE": "110",
  "JMP": "111",
}
compTable = { #code created by hack language designers, provided by class
  "0": "101010",
  "1": "111111",
  "-1": "111010",
  "D": "001100",
  "A": "110000",
  "M": "110000",
  "!D": "001101",
  "!A": "110001",
  "!M": "110001",
  "-D": "001111",
  "-A": "110011",
  "-M": "110011",
  "D+1": "011111",
  "A+1": "110111",
  "M+1": "110111",
  "D-1": "001110",
  "A-1": "110010",
  "M-1": "110010",
  "D+A": "000010",
  "D+M": "000010",
  "D-A": "010011",
  "D-M": "010011",
  "A-D": "000111",
  "M-D": "000111",
  "D&A": "000000",
  "D&M": "000000",
  "D|A": "010101",
  "D|M": "010101",
}

def Ainstr(symbolicStr, nextReg):
  instruction = symbolicStr[1:] #removes @
  if not instruction.isnumeric(): 
    #is a symbol (variable or label), need to look in table for corresponding value or add to table
    if instruction in labelTable: 
      #if label or previously encountered variable, replace instruction w/ address
      instruction = labelTable[instruction]
    else: 
      #if new variable, assign to next available memory register and increment register
      labelTable[instruction] = str(nextReg) 
      nextReg = nextReg + 1 
      instruction = labelTable[instruction]
  address = int(instruction)#converts numeric string to int
  binaryStr = '{0:015b}'.format(address) #converts to binary, puts in 15-bit format
  binaryStr = '0'+binaryStr #adds '0' as opcode for hack machine language
  print(symbolicStr)
  return binaryStr, nextReg

def Cinstr(symbolicStr):
  symbolicStr = symbolicStr.replace(" ", "") #remove whitespaces, just in case

  #figuring out where the computation is; if no dest or jump in command, assumes whole string is simply command
  startCompIndex = 0
  endCompIndex = len(symbolicStr) 

  #destination bits in C instruction - simple so dont need dict / table - each bit simply corresponds to whether or not going to A, M or D register
  dest = "000"
  if "=" in symbolicStr:
    eqIndex = symbolicStr.index('=')
    startCompIndex = eqIndex+1
    destStr = symbolicStr[:eqIndex]
    dest = "000"
    if "M" in destStr:
      dest = dest[:2] + "1" + dest[2 + 1:]
    if "D" in destStr:
      dest = dest[:1] + "1" + dest[1 + 1:]
    if "A" in destStr:
      dest = dest[:0] + "1" + dest[0 + 1:]
    
  #simply need to lookup binary value of string in table
  if ";" in symbolicStr:
    semiIndex = symbolicStr.index(';') 
    endCompIndex = semiIndex
    jumpStr = symbolicStr[semiIndex+1:] #cleaning up so get only characters after ';'
    jump = jumpTable[jumpStr]
  else:
    jump = "000"

  compStr = symbolicStr[startCompIndex:endCompIndex] #only characters of comp
  comp = compTable[compStr]
  if "M" in compStr: #adding "a" bit in comp string
    comp = "1" + comp
  else:
    comp = "0" + comp

  print(symbolicStr)
  binaryStr = "111"+comp+dest+jump #add 111 as customary in C instructions (opcode + 11)
  return binaryStr

In [8]:
nextRegister = 16
binaryLines = []
for line in labelRemovedLines:
  #if A instruction, goes to A method, else goes to C method
  if(line.startswith('@')): 
    newLine, nextRegister = Ainstr(line, nextRegister)
  else:
    newLine = Cinstr(line)
  binaryLines.append(newLine)

print(binaryLines)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
M=D
@RET_ADDRESS_GT35
D=A
@22
0;JMP
@SP
AM=M-1
D=M
A=A-1
M=D|M
@SP
AM=M-1
D=M
@screen.drawrectangle$if_true0
D;JNE
@screen.drawrectangle$if_false0
0;JMP
@9
D=A
@SP
AM=M+1
A=A-1
M=D
@1
D=A
@R13
M=D
@sys.error
D=A
@R14
M=D
@RET_ADDRESS_CALL271
D=A
@95
0;JMP
@SP
AM=M-1
D=M
@R5
M=D
@ARG
A=M
D=M
@SP
AM=M+1
A=A-1
M=D
@16
D=A
@SP
AM=M+1
A=A-1
M=D
@2
D=A
@R13
M=D
@math.divide
D=A
@R14
M=D
@RET_ADDRESS_CALL272
D=A
@95
0;JMP
@SP
AM=M-1
D=M
@LCL
A=M+1
A=A+1
A=A+1
M=D
@ARG
A=M
D=M
@SP
AM=M+1
A=A-1
M=D
@LCL
D=M
@3
A=D+A
D=M
@SP
AM=M+1
A=A-1
M=D
@16
D=A
@SP
AM=M+1
A=A-1
M=D
@2
D=A
@R13
M=D
@math.multiply
D=A
@R14
M=D
@RET_ADDRESS_CALL273
D=A
@95
0;JMP
@SP
AM=M-1
D=M
A=A-1
M=M-D
@LCL
D=M
@7
D=D+A
@R13
M=D
@SP
AM=M-1
D=M
@R13
A=M
M=D
@ARG
A=M+1
A=A+1
D=M
@SP
AM=M+1
A=A-1
M=D
@16
D=A
@SP
AM=M+1
A=A-1
M=D
@2
D=A
@R13
M=D
@math.divide
D=A
@R14
M=D
@RET_ADDRESS_CALL274
D=A
@95
0;JMP
@SP
AM=M-1
D=M
@LCL
A=M+1
A=A+1
A=A+1
A=A+1
M=D
@ARG
A=M+1


In [9]:
#exporting binary lines list as hack machine language file
HackStr = '\n'.join(binaryLines)
print(HackStr)
writeStringFile(fileName, HackStr)

0000000100000000
1110110000010000
0000000000000000
1110001100001000
0000000010000101
1110101010000111
0000000000001111
1110001100001000
0000000000000000
1111110010101000
1111110000010000
1110110010100000
1111000111010000
1110101010001000
0000000000010011
1110001100000101
0000000000000000
1111110010100000
1110111010001000
0000000000001111
1111110000100000
1110101010000111
0000000000001111
1110001100001000
0000000000000000
1111110010101000
1111110000010000
1110110010100000
1111000111010000
1110101010001000
0000000000100011
1110001100000110
0000000000000000
1111110010100000
1110111010001000
0000000000001111
1111110000100000
1110101010000111
0000000000001111
1110001100001000
0000000000000000
1111110010101000
1111110000010000
1110110010100000
1111000111010000
1110101010001000
0000000000110011
1110001100000011
0000000000000000
1111110010100000
1110111010001000
0000000000001111
1111110000100000
1110101010000111
0000000000000101
1110110000010000
0000000000000001
1111000111100000
11111100000100