In [90]:
import os.path
import sys
class Database():
    
    def __init__(self, file):
        self.file = file
        self.byteorder=sys.byteorder
        if not os.path.exists(file):
            self.fd = open(file, "xb+", buffering=0)
            self.index={}
        else:
            self.fd = open(file, "r+b", buffering=0)
            with open(file+".idx") as fdi:
                items = [l.strip().split(':') for l in fdi.readlines()]
                self.index = {k:int(v) for k,v in items}
        self.readptr = self.fd.tell()
        self.fd.seek(0,2)
        self.writeptr = self.fd.tell()
        
        
    def set(self, x, v):
        if not isinstance(x, str):
            raise ValueError("Key must be a string")
        bin_x = x.encode('utf-8')
        sz_x=len(bin_x).to_bytes(1, byteorder=self.byteorder)
        if not isinstance(v, str):
            raise ValueError("Value must be a string")
        bin_v = v.encode('utf-8')
        sz_v=len(bin_v).to_bytes(1, byteorder=self.byteorder)
        try:
            self.index[x]=self.writeptr
            self.fd.seek(self.writeptr)
            print("currently", self.fd.tell())
            self.fd.write(sz_x+sz_v+bin_x+bin_v)
        except:
            del self.index[x]
        else:
            self.writeptr=self.fd.tell()
            
    def get(self, x):
        try:
            offset = self.index[x]
        except:
            raise ValueError("{} is not in index".format(x))
        bin_x = x.encode('utf-8')
        print("offset is", offset)
        self.readptr=offset
        self.fd.seek(self.readptr)
        sz_k = int.from_bytes(self.fd.read(1), byteorder=self.byteorder)
        sz_v = int.from_bytes(self.fd.read(1), byteorder=self.byteorder)
        self.fd.seek(sz_k,1)
        readit=self.fd.read(sz_v).decode('utf-8')
        print("now", self.fd.tell())
        return readit
    
    def delete(self, x):
        try:
            offset = self.index[x]
        except:
            raise ValueError("{} is not in index".format(x))
        bin_x = x.encode('utf-8')
        self.readptr=offset
        self.fd.seek(self.readptr)
        sz_k = int.from_bytes(self.fd.read(1), byteorder=self.byteorder)
        sz_v = int.from_bytes(self.fd.read(1), byteorder=self.byteorder)
        for k, v in self.index.items():
            if v > offset:
                self.index[k] -= sz_k + sz_v + 2
        del(self.index[x])
        self.fd.seek(sz_k + sz_v, 1)
        db_vals = self.fd.readlines()
        self.fd.seek(self.readptr)
        for vals in db_vals:
            self.fd.write(vals)
        self.fd.truncate()
        
    def close(self):
        fdi=open(self.file+".idx","w")
        fdi.write("\n".join([k+":"+str(v) for k,v in self.index.items()]))
        fdi.close()
        self.fd.close()
        
    def __del__(self):
        self.fd.close()

In [91]:
!rm /tmp/test.db

In [92]:
db = Database("/tmp/test.db")
db.index
db.set("test1", "val1")
db.set("test2", "val2")

currently 0
currently 11


In [93]:
db.delete("test1")

In [94]:
db.index

{'test2': 0}

In [95]:
db.get("test2")

offset is 0
now 11


'val2'

Concurrency would break here since the file could be deleting an item simultaneously when another wants to access it

## Today's exercises

1. Implement deletion (to submit next monday)
2. Think about concurrency issues inour little database