In [None]:
# This study contains a python implementation of the floating point standard
# (float32) to be specific

In [None]:
# Helper functions
import math


# limits the bits, starting from the least sig
def limit_bits(bits: int, limit: int) -> int:
    return bits & ((1 << limit) - 1)


# returns a printable list of bits from least sig to most sig
def print_bits(bits: int) -> list[int]:
    result = []

    while bits != 0:
        result.append(bits & 1)
        bits >>= 1
    
    return result

# joins the list of bits into a str, from least sig to most sig
def str_bits(bits: int) -> str:
    array = print_bits(bits)
    return "".join(map(str, array))

def concat_bits(bits: list[int]) -> str:
    return "".join(map(str, bits))


# creates a readable, big endian bitstring from str_bits
def format_bits(bits: str) -> str:
    result = ""
    for i, c in enumerate(bits):
        result += c
        if i % 4 == 3:
            result += "_"

    return result[::-1]


def pad_bits(bits: list[int], size: int) -> list[int]:
    new_bits = [*bits]
    while len(new_bits) < size:
        new_bits.append(0)

    return new_bits


# convert a number (in any format) to a list of little endian digits based on the base
def int_to_base(number: int, base: int) -> list[int]:
    result = []

    while number > 0:
        result.append(number % base)
        number //= base
    
    return result

# convert a base of little endian digits to a number (in python's integer format)
def base_to_int(digits: list[int], base: int) -> int:
    result = 0

    for digit in reversed(digits):
        result = result * base + digit

    return result

def decimal_to_base(decimal: float, base: int) -> list[int]:
    result = []

    while decimal > 0:
        digit = math.floor(decimal * base)
        result.append(math.floor(decimal * base))
        decimal = (decimal * base) - digit

    return result

def str_to_int(value: str) -> int:
    total = 0
    for c in value:
        total = total * 10 + (ord(c) - ord('0'))
    
    return total

def custom_floor(number: float) -> float:

    # find the decimal part
    temp = number
    while temp >= 1:
        temp -= 1
    
    return int(number - temp)


# print(decimal_to_base(0.52, 2))
# int_to_base(52, 10)
# custom_floor(1.2)

In [None]:
# FOR MORE INFORMATION ON THE ALGORITHMS
# REFER TO https://www.rfwireless-world.com/Tutorials/floating-point-tutorial.html

class Float32:
    """
    IEEE 754 floating point implementation,
    albeit without the arithmetic
    """

    bits: int

    def __init__(self, sign: int, exponent: int, mantissa: int):

        # mantissa has 23 bits, exponent 8 bits, sign 1 bit
        # ordered like
        # sign | exponent | mantissa
        self.bits = limit_bits(mantissa, 23) + (limit_bits(exponent, 8) << 23) + (limit_bits(sign, 1) << (23 + 8))
    

    ### BUILDER FUNCTIONS ###
    def from_str(value: str) -> 'Float32':
        sign = 0
        if value.startswith('-'):
            value = value[1:]
            sign = 1
            

        if '.' not in value:
            # turn to binary
            total = str_to_int(value)
            # limit the total to 23 bits
            # 1011,
            # 1 0 1 1
            real_bits = print_bits(total)
            real_bits.reverse()

            exp = len(real_bits) - 1
            bits = pad_bits(real_bits[1:], 23)

            # 0 1 1 | 0 0 ...
            mantissa = bits[:23]
            mantissa.reverse()
            return Float32(sign, exp + 127, base_to_int(mantissa, 2))
        
        # parse decimal
        # count the places after decimal
        dec_place = value.index('.')
        real = value[:dec_place]
        dec = value[dec_place+1:]

        real_bits = int_to_base(str_to_int(real), 2)
        real_bits.reverse()

        dec_bits = decimal_to_base(str_to_int(dec) / (10**len(dec)), 2)

        # if we have real bits
        if len(real_bits) > 0:
            exp = len(real_bits) - 1
            combined = [*real_bits[1:], *dec_bits]
          
        else:
            # else use the leading 1
            
            # find the first 1 in dec_bits
            one_index = dec_bits.index(1)
            combined = dec_bits[one_index+1:]
            # shift the exponent by the amount of the index of that bit
            exp = -one_index - 1    

        total_bits = pad_bits(combined, 23)

        mantissa = total_bits[:23]
        mantissa.reverse()
        
        return Float32(sign, exp + 127, base_to_int(mantissa, 2))

    def sign(self):
        offset = 23 + 8
        return (self.bits & (1 << offset)) >> offset

    def expo(self):
        return (self.bits & ((1 << (23 + 8)) - 1)) >> 23

    def mantissa(self):
        return self.bits & ((1 << 23) - 1)

    def is_subnormal(self) -> bool:
        return self.expo() == 0

    def is_infinity(self) -> bool:
        return self.expo() == (1 << 8) - 1 and self.mantissa() == (1 << 23) - 1

    def is_normal(self) -> bool:
        return not (self.is_subnormal() or self.is_infinity() or self.is_nan())

    def is_nan(self) -> bool:
        return self.expo() == ((1 << 8) - 1) and not self.mantissa() == ((1 << 23) - 1)

    def is_zero(self) -> bool:
        return self.expo() == 0 and self.mantissa() == 0

    def as_float(self) -> float:
        # so i dont need to implement the operations
        # this is not cheating, just using the arithmatic operations from the float class

        sign_bit = -1 if self.sign() == 1 else 1

        if self.is_zero():
            return sign_bit * 0.0

        if self.is_infinity():
            return sign_bit * float('infinity')

        if self.is_nan():
            return float('nan')

        if self.is_subnormal():
            return sign_bit * 2.0 ** -126 * (self.mantissa() / (2**23))

        if self.is_normal():
            return sign_bit * 2.0 ** (self.expo() - 127) * (1 + self.mantissa() / (2**23))


    def as_decimal(self) -> str:
        sign = self.sign()
        sign_str = '-' if sign == 1 else ''

        if self.is_zero():
            return f"{sign_str}0.0"

        if self.is_nan():
            return 'nan'

        if self.is_infinity():
            return f"{sign_str}inf"
        # this algorithm works for both normal and subnormal numbers
        # because it only uses arithmatic operations
        if self.is_subnormal() or self.is_normal():
            value = abs(self.as_float())  # only works on positive numbers
            
            expo = 0
            while value > 10:
                value /= 10
                expo += 1
            
            while value < 1:
                value *= 10
                expo -= 1

            # compute the decimal part, with max accuracy of 12 digits
            result = []
            while value > 0 and len(result) <= 12:
                real = math.floor(value)
                result.append(real)
                value = (value - real) * 10

            decimal = concat_bits(result)
            return f"{sign_str}{decimal[0]}.{decimal[1:] if len(decimal) > 1 else '0'}e{expo}"

        return 'unknown'

    def as_bits(self) -> str:
        bits = pad_bits(print_bits(self.bits), 32)
        result = concat_bits(bits)
        return (result[:23] + '|' + result[23:23+8] + '|' + result[23+8])[::-1]

    def true_expo(self) -> int:
        if self.is_normal():
            return self.expo() - 127
        elif self.is_subnormal():
            return -126

        return 0

    def true_mantissa(self) -> float:
        mantissa = self.mantissa()

        dec_mantissa = 0
        for i, b in enumerate(print_bits(mantissa)):
            dec_mantissa += b * 2.0**(-(23-i))

        if not self.is_subnormal():
            dec_mantissa += 1

        return dec_mantissa

    def compute_true_mantissa(mantissa: int, subnormal: bool) -> float:
        dec_mantissa = 0
        for i, b in enumerate(print_bits(mantissa)):
            dec_mantissa += b * 2.0**(-(23-i))

        if not subnormal:
            dec_mantissa += 1

        return dec_mantissa

    def __str__(self) -> str:
        return self.as_decimal()

    def __repr__(self) -> str:
        sign = self.sign()
        mantissa = self.mantissa()
        bin_mantissa = format_bits(str_bits(mantissa).ljust(23, '0'))
        expo = self.expo()
        bin_expo = format_bits(str_bits(expo).ljust(8, '0'))

        dec_expo = self.true_expo()
        dec_mantissa = self.true_mantissa()

        return f"Float32(sign={sign}, exponent=0b{bin_expo} or {dec_expo}, mantissa=0b{bin_mantissa} or {dec_mantissa})"

    def precision(self) -> tuple[float, float]:
        if not (self.is_normal() or self.is_subnormal()):
            return (float('nan'), float('nan'))

        mantissa = self.mantissa()
        if mantissa == 0:
            if self.expo() == 0:
                lower_difference = self.as_float()
            else:
                lower_difference = 2.0 ** self.true_expo() * (self.true_mantissa() - Float32.compute_true_mantissa((1 << 23) - 1, self.is_subnormal()) / 2)
        else:
            lower_difference = 2.0 ** self.true_expo() * (self.true_mantissa() - Float32.compute_true_mantissa(mantissa-1, self.is_subnormal()))
        
        # upper difference
        if mantissa == ((1 << 23) - 1):
            if self.expo() == ((1 << 8) - 1):
                upper_difference = float('nan')
            else:
                upper_difference = 2 ** self.true_expo() * (Float32.compute_true_mantissa(0, self.is_subnormal()) * 2 - self.true_mantissa())
        else:
            upper_difference = 2.0 ** self.true_expo() * ( Float32.compute_true_mantissa(mantissa+1, self.is_subnormal())- self.true_mantissa())

        
        return (lower_difference, upper_difference)
        

# test_number = 0b11001
# test_number = limit_bits(test_number, 4)
# print_bits(test_number)
# fl = Float32(0, 0b0000_0000, 0b1100_0000_0000_0000_0000_000)
# fl = Float32(0, 0, 0b1100_0000_0000_0000_0000_000)
# print_bits(fl.bits)
# print(fl.as_float())
# print(fl.as_decimal())
# print(fl.as_bits())
# print(fl.mantissa())
print(repr(fl))
print(fl.precision())

In [None]:
fl1 = Float32.from_str("0.1")
fl3 = Float32.from_str("0.3")
print(repr(fl1))
print(fl1)
print(repr(fl3))
print(fl3)

In [None]:
import matplotlib.pyplot as plt
import numpy as np


# test the precision of the floating point number
def test_precision():

    # x var is expo
    # y var is precision
    # 0, 255 are reserved
    spacing = math.floor(2 ** 23 / 100000)
    xs = np.linspace(0, 10, 9 * spacing)
    lower = []
    upper = []

    # compute ys
    for x in xs:
        exp = math.floor(x)
        fl = Float32(0, exp, math.ceil(x * spacing - exp * spacing))
        l, u = fl.precision()
        # print(l)
        lower.append(l)
        upper.append(u)

    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    ax[0].plot(xs - 127, lower, color='red')
    ax[0].set_xlabel("EXP")
    ax[0].set_title("Lower Precision")
    ax[0].set_yscale("log")

    ax[1].plot(xs - 127, upper, color='blue')
    ax[1].set_xlabel("EXP")
    ax[1].set_title("Upper Precision")
    ax[1].set_yscale("log")
    
    fig.supylabel("log2 precision")

    plt.show()

test_precision()



In [None]:
float('infinity')