Algoritmos

In [14]:
# @title Record
#Abstract Record
class Record:
  def __init__(self, record_size, value=None):
    self.byte_array = bytearray([0] * record_size)
    self.write(value)

  # creates a record from a bytearray
  def create(record_size, byte_array):
   raise NotImplementedError("Subclasses must implement create")

  def write(self, value):
    raise NotImplementedError("Subclasses must implement write")

  def read(self):
   raise NotImplementedError("Subclasses must implement read")

  def size(self):
    return len(self.byte_array)

  def bytes(self):
    return self.byte_array.copy()

  def __str__(self):
    return f"value:{self.read()}, capacity:{self.size()}"

In [15]:
# @title DataEntry
import struct
class DataEntry(Record):

  def __init__(self, record_size, name, age):
    self.byte_array = bytearray([0] * record_size)
    self.write(name, age)

  def create(record_size,byte_array):
    (name, age) = DataEntry.__unpack(byte_array)
    return DataEntry(record_size, name, age)

  def write(self, name, age):
    if(not isinstance(name, str) or not isinstance(age, int)):
       raise TypeError(f"The name must be str and age must be int!")

    self.byte_array[:4] = struct.pack('I', age)

    string_bytes = name.encode('utf-8')

    if (5+len(string_bytes))> len(self.byte_array):
      raise ValueError(f"Record overflow!")

    self.byte_array[4] = len(string_bytes)
    self.byte_array[5:5+len(string_bytes)] = string_bytes


  def read(self):
    return DataEntry.__unpack(self.byte_array)

  def __unpack(byte_array):
    age = struct.unpack('I', byte_array[0:4])[0]
    string_bytes = byte_array[5:5+byte_array[4]]
    return (string_bytes.decode('utf-8'), age)

In [16]:
# @title DataEntry test
rec = DataEntry(10,"João", 46)
print(rec)
print(f"Bytes: {rec.bytes()}")
rec2 = DataEntry.create(10, rec.bytes())
print(rec2)

value:('João', 46), capacity:10
Bytes: bytearray(b'.\x00\x00\x00\x05Jo\xc3\xa3o')
value:('João', 46), capacity:10


In [17]:
# @title Block
import struct

class Block:
  def __init__(self, block_size, record_size):
    self.records = []
    self.block_size = block_size
    self.record_size = record_size
    self.capacity = (block_size-2) // record_size #two bytes are used to store the number or records

  # creates a block from a bytearray
  def create(block_size, record_size, byte_array, Record):
    block = Block(block_size, record_size)

    num_records = struct.unpack('>H', byte_array[:2])[0]
    pos = 2
    for _ in range(num_records):
      rec = Record.create(record_size, byte_array[pos:pos+record_size] )
      block.add(rec)
      pos += record_size

    return block

  def add(self, record):
    if(self.size() < self.capacity):
      self.records.append(record)
    else:
      raise ValueError("The block is full!")

  def addIndex(self, index, record):
    if(self.size() < self.capacity):
      self.records.insert(index, record)
    else:
      raise ValueError("The block is full!")

  def remove(self, keyPos, keyValue):
    if(not isinstance(keyPos, int)):
       raise TypeError(f"The keyPos must be an int!")

    rec = self.search(keyPos, keyValue)
    if(rec):
      return self.records.remove(rec)
    return None

  def removeIndex(self, index):
    if(not isinstance(index, int)):
       raise TypeError(f"The index must be int!")
    return self.records.pop(index)

  def removeLast(self):
    if len(self.records)>0:
      return self.records.pop()
    return None

  def read(self):
    str=""
    for rec in self.records:
      str += f"{rec.read()}\n"
    return str;

  #returns the position of the record within the block
  def search(self, keyPos, keyValue):
    if(not isinstance(keyPos, int)):
       raise TypeError(f"The key must be an int!")

    for i in range(len(self.records)):
      if(self.getRecord(i).read()[keyPos]==keyValue):
        return i
    return -1

  # returns the list of records, within the range keyA (inclusive) and KeyB (exclusive)
  def rangeSearch(self, keyPos, keyValueA, keyValueB):
    if(not isinstance(keyPos, int)):
       raise TypeError(f"The keyPos must be an int!")

    ret = []
    for rec in self.records:
      recValue = rec.read()[keyPos]
      if(recValue>=keyValueA and recValue<keyValueB):
        ret.append(rec)
    return ret

  def getRecord(self, index):
    return self.records[index]

  def getFirtRecord(self):
    return self.records[0]

  def getLastRecord(self):
    return self.records[-1]

  def size(self):
    return len(self.records)

  def isFull(self):
    return self.size()==self.capacity

  def isEmpty(self):
    return self.size()==0

  def bytes(self):
    byte_array = bytearray([0] * self.block_size)

    byte_array[:2] = struct.pack('>H', self.size()) #pack int into two bytes
    pos = 2
    for rec in self.records:
      byte_array[pos:pos+rec.size()] = rec.bytes()
      pos += rec.size()

    return byte_array

In [18]:
dataBlock = Block(64, 12)
print(f"block capacity: {dataBlock.capacity}")
print(f"block size: {dataBlock.size()}")

dataBlock.add(DataEntry(12, "Ana", 28))
dataBlock.add(DataEntry(12, "Dade", 33))
dataBlock.add(DataEntry(12, "Felix", 17))


print(dataBlock.read())

print(f"getRecord(0): {dataBlock.getRecord(0)}")
print(f"search: {dataBlock.search(1, 33)}")
print(f"range search(1, 28, 30):")
for rec in dataBlock.rangeSearch(1, 28, 30):
  print(rec)
print(f"block bytes: {dataBlock.bytes()}")

block2 = Block.create(64, 12, dataBlock.bytes(), DataEntry)
print(block2.read())

block capacity: 5
block size: 0
('Ana', 28)
('Dade', 33)
('Felix', 17)

getRecord(0): value:('Ana', 28), capacity:12
search: 1
range search(1, 28, 30):
value:('Ana', 28), capacity:12
block bytes: bytearray(b'\x00\x03\x1c\x00\x00\x00\x03Ana\x00\x00\x00\x00!\x00\x00\x00\x04Dade\x00\x00\x00\x11\x00\x00\x00\x05Felix\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
('Ana', 28)
('Dade', 33)
('Felix', 17)



In [19]:
# @title HeapFile
import os
class HeapFile:

  def __init__(self, filename, block_size, record_size, Record, recordKeyId, create=False):
    self.filename = filename
    self.block_size = block_size
    self.record_size = record_size
    self.Record = Record
    self.keyPos = recordKeyId

    #print(f"os.path.abspath(filename): {os.path.abspath(filename)}")

    if(create and os.path.exists(filename)):
      os.remove(filename)

    if((not create) and os.path.exists(filename)):
      self.blocks = os.path.getsize(filename)//block_size
    else:
      if(os.path.exists(filename)):
        os.remove(filename)

      self.blocks = 1
      with open(self.filename, "wb") as file:
        file.write(Block(block_size, record_size).bytes())

  def add(self, record):
    block = self.read(self.blocks-1)

    if(block.isFull()):
      self.blocks+=1
      block = Block(self.block_size, self.record_size)

    block.add(record)
    self.write(self.blocks-1, block)

  def addNewBlock(self):
    block = Block(self.block_size, self.record_size)
    self.write(self.blocks, block)
    self.blocks+=1

  def remove(self, key):
    (block_id, rec_id) = self.search(key)
    if block_id>=0:
      block = self.read(block_id)
      block.removeIndex(rec_id)
      self.write(block_id, block)

  def scan(self, output=True):
    for i in range(self.blocks):
      if output:
        print(self.read(i).read())
      else:
        self.read(i).read()

  # returns the record, containing the key
  def search(self, key):
    for i in range(self.blocks):
      block = self.read(i)
      rec_id = block.search(self.keyPos, key)

      if(rec_id>=0):
        return (i, rec_id)
    return (-1, -1)

  # returns a list of records, within the range keyA (inclusive) and KeyB (exclusive)
  def rangeSearch(self, keyA, keyB):
    result = []
    for i in range(self.blocks):
      result += self.read(i).rangeSearch(self.keyPos, keyA, keyB)
    return result

  def write(self, block_id, block):
    if(not isinstance(block, Block)):
       raise TypeError(f"The block must by of Block type!")

    with open(self.filename, "r+b") as file:
      file.seek(self.block_size * (block_id))
      file.write(block.bytes())


  def readFirst(self):
    return self.read(0)

  def readLast(self):
    return self.read(self.blocks-1)

  def read(self, block_id):
    with open(self.filename, "r+b") as file:
      file.seek(self.block_size *(block_id))
      byte_array = file.read(self.block_size)
      return Block.create(self.block_size, self.record_size, byte_array, self.Record)



In [20]:
# @title HeapFile test
filename  = "heapfile.bin"
fileType = HeapFile(filename, 64, 12, DataEntry, 1)
fileType.add(DataEntry(12, "V1", 18))
fileType.add(DataEntry(12, "V2", 28))
fileType.add(DataEntry(12, "V3", 32))
fileType.add(DataEntry(12, "V5", 56))
fileType.add(DataEntry(12, "V6", 81))
fileType.add(DataEntry(12, "V7", 14))
fileType.add(DataEntry(12, "V8", 17))

fileType.scan()

print(f"blocks:{fileType.blocks}")

block1 = fileType.readFirst()
print(f"block1.getRecord(0)={block1.getRecord(0)}")

block2 = fileType.readLast()
print(f"block2.getRecord(1)={block2.getRecord(1)}")

print(f"range search(1, 28, 30):")
for rec in fileType.rangeSearch(18, 32):
  print(rec)

os.remove(filename)

('V1', 18)
('V2', 28)
('V3', 32)
('V5', 56)
('V6', 81)

('V7', 14)
('V8', 17)

blocks:2
block1.getRecord(0)=value:('V1', 18), capacity:12
block2.getRecord(1)=value:('V8', 17), capacity:12
range search(1, 28, 30):
value:('V1', 18), capacity:12
value:('V2', 28), capacity:12
