# Test Vectors for FP8 Module
- Generate binary test vectors for testing FP8 module. 
- Use Look-up Table (LUT) for conversion: Decimal to Binary.   

References:
1. https://onnx.ai/onnx/technical/float8.html
2. https://github.com/IntelLabs/FP8-Emulation-Toolkit
3. https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation
4. https://www.geeksforgeeks.org/python-program-to-convert-floating-to-binary/
5. https://stackoverflow.com/questions/2566412/find-nearest-value-in-numpy-array

![image](./Image_ref/fp8_formats.png)

- Formulas for E4M3FN --> Binary to Decimal  
$s\_b_{6}b_{5}b_{4}b_{3}\_b_{2}b_{1}b_{0}$

$$FP8_{10}=
    \begin{cases}
        (-1)^{s} \cdot 2^{(\sum_{i=3}^{6}b_{i}2^{i-3})-7} \cdot (1 + \sum_{i=0}^{2}b_{i}2^{i-3}) & \text{exponent} \neq 0 \text{(normal)}\\
        (-1)^{s} \cdot \sum_{i=0}^{2}b_{i}2^{i-3} & \text{exponent} = 0 \text{(subnormal)}
    \end{cases}
$$

# Libraries

In [2]:
import numpy as np
import pandas as pd
import itertools
from tqdm import tqdm
from binary_fractions import Binary


# Functions

- Print all possible values for FP8 format.
- Generate the LUT tables.

In [3]:
"""
Show all 256 possible values for:
    - E4M3FN
Generate LUT for conversion Decimal to Binary

IN:
format <-- FP8 format
verbose <-- Show all the possible values

OUT:
val_fp8 --> array with all values for rouding
bin_fp8 --> dictionary for binary conversion

"""
def show_range(format='E4M3', verbose = False):
    # Array to store all possible floating point representations for FP8 (256)
    # This will be used to round any floating point value for binary conversion
    val_fp8 = np.zeros(256)

    # Dictionaty to store all floating point values (255) and correponding binary representation
    # LUT for binary conversion
    # Only +0
    bin_fp8 = {}
    
    # Generate all 256 binary values
    # Use the cartesian product to generate all possible permutations
    # Create a list of lists of integerer numbers
    # Number of bits
    n = 8
    lst_bin = [list(i) for i in itertools.product([0, 1], repeat=n)]
    # Debug
    # print(lst_bin)
    # print(len(lst_bin))
    # print(type(lst_bin[0][1]))

    # Bias for the corresponding format
    if format == 'E4M3':
        bias = 7.

    if verbose:
            print(f'All possible values for FP8 representation {format}:')

    # Loop over all the values
    for n, val_bin in enumerate(lst_bin):
        # Calculate decimal values following the corresponding formula
        # Using float number
        # Position of bits:  s  _ b6  b5  b4  b3  _ b2  b1  b0
        #                   [0] _ [1] [2] [3] [4] _ [5] [6] [7]
        
        # Sign in decimal
        # Multiplicative factor
        sign_f = (-1.)**(val_bin[0])
        
        # Exponent in decimal
        # Initialize the exponent
        exp_v = 0.
        # For E4M3 we have 4 bits for exponent
        # Considering the decimal equivalent of each bit
        for i in range(4):
            exp_v += val_bin[4-i]*(2**(i))
        # Multiplicative factor calculted from the exponent
        # it is different depending on normal or subnormal interval
        if exp_v != 0:
            # Normal number
            exp_f = 2**(exp_v - bias)
        else:
            # Subnormal number
            exp_f = 2**(1 - bias)
        # Debug
        # print(f'Exponent factor: {exp_f}')

        # Mantissa in decimal
        # Initialize the mantissa
        man_v = 0.
        # For E4M3 we have 3 bits for mantissa
        # Considering the decimal equivalent of each bit
        for i in range(3):
            man_v += val_bin[7-i]*(2**(i-3))
        # Multiplicative factor calculted from mantissa
        # it is different depending on normal or subnormal interval
        if exp_v != 0:
            # Normal number
            man_f = 1 + man_v
        else:
            # Subnormal number
            man_f = man_v
        # Debug
        # print(f'Mantissa factor: {man_f}')

        ## Decimal Representation
        val_d = sign_f*exp_f*man_f
        # Store Decimal representation
        val_fp8[n] = val_d

        ## Binary Representation
        # Sign in binary
        sign_b = str(val_bin[0])
        # Exponent in binary
        exp_b = ''.join([str(b) for b in val_bin[1:5]])
        # Mantissa in binary
        man_b = ''.join([str(b) for b in val_bin[5:]])

        # Store decimal and binary representation (string)
        # Only consider +0
        if not(val_d == 0 and sign_b == '1'):
            bin_fp8[val_d] = sign_b+exp_b+man_b    

        # Print all the possible corresponding values
        if verbose:
            ######################################################
            # Zero values
            ######################################################
            if val_d == 0:
                print('\nZero value:')

            ######################################################
            # NaN values
            ######################################################
            # Maximum possible value
            elif abs(val_d) == 480:
                print('\nNaN value:')
                # val_d = float('nan')
            
            ######################################################
            # Subnormal values
            ######################################################
            # Exponent is equal to zero
            elif exp_v == 0:
                print('\nSubnormal value:')
                # print(f'Exponent factor: {exp_f}')
                # print(f'Mantissa factor: {man_f}')
            
            ######################################################
            # Normal values
            ######################################################
            # Exponent is different to zero
            else:
                print('\nNormal value:')
            
            # Print binary and decimal representation
            print(f'{sign_b}_{exp_b}_{man_b} --> {val_d}')              

    return val_fp8, bin_fp8

In [4]:
# Test the function
val_fp8, bin_fp8 = show_range(format='E4M3', verbose = True)
print(val_fp8.shape)
print(val_fp8)

print(len(bin_fp8))
print(bin_fp8)

All possible values for FP8 representation E4M3:

Zero value:
0_0000_000 --> 0.0

Subnormal value:
0_0000_001 --> 0.001953125

Subnormal value:
0_0000_010 --> 0.00390625

Subnormal value:
0_0000_011 --> 0.005859375

Subnormal value:
0_0000_100 --> 0.0078125

Subnormal value:
0_0000_101 --> 0.009765625

Subnormal value:
0_0000_110 --> 0.01171875

Subnormal value:
0_0000_111 --> 0.013671875

Normal value:
0_0001_000 --> 0.015625

Normal value:
0_0001_001 --> 0.017578125

Normal value:
0_0001_010 --> 0.01953125

Normal value:
0_0001_011 --> 0.021484375

Normal value:
0_0001_100 --> 0.0234375

Normal value:
0_0001_101 --> 0.025390625

Normal value:
0_0001_110 --> 0.02734375

Normal value:
0_0001_111 --> 0.029296875

Normal value:
0_0010_000 --> 0.03125

Normal value:
0_0010_001 --> 0.03515625

Normal value:
0_0010_010 --> 0.0390625

Normal value:
0_0010_011 --> 0.04296875

Normal value:
0_0010_100 --> 0.046875

Normal value:
0_0010_101 --> 0.05078125

Normal value:
0_0010_110 --> 0.0546875

- Decimal to Binary FP8 (E4M3).
- Use LUT table since we only need 256 values.

In [4]:
'''
Auxiliary function to find the nearest value in an array
    - It is used to round the input floating point value to the closest representation in FP8.

IN:
array <-- numpy array
value <-- value to round

OUT:
array[idx] --> rounded value
'''
def find_nearest(array, value):
    # array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

In [5]:
'''
Conversion of floating point to binary FP8
    - Round the floating point to the nearest value represented in FP8
    - Use a LUT to determine the corresponding binary value

IN:
val_fp <-- floating point value for conversion
format <-- FP8 format

OUT:
val_round --> corresponding FP8 value representation
val_bin --> corresponding FP8 binary representation
'''
def to_FP8_bin(val_fp, format = 'E4M3'):
    # Generate the array and LUT
    val_fp8, bin_fp8 = show_range(format=format, verbose = False)
    
    # Round the input floating point
    # Handle the only exception: NaN
    if abs(val_fp) > 448:
        # Assign the corresponding key for NaN
        val_round = np.sign(val_fp)*480.
    else:
        # Round to the available keys in FP8 dictionary
        # Round to the nearest value
        val_round = find_nearest(array=val_fp8, value=val_fp)

    # Find the corresponding binary representation
    val_bin = bin_fp8[val_round]
    return val_bin, val_round

In [153]:
# Test the function
vals_fp = [-447, 450, 1.5e-2, 1.3e-2, 1.9e-3, 5.6e-2] 

for val_fp in vals_fp:
    val_bin, val_round = to_FP8_bin(val_fp=val_fp, format = 'E4M3')
    print(f'Original FP is {val_fp}, FP8 representation {val_round} --> Binary FP8 {val_bin}')

Original FP is -447, FP8 representation -448.0 --> Binary FP8 11111110
Original FP is 450, FP8 representation 480.0 --> Binary FP8 01111111
Original FP is 0.015, FP8 representation 0.015625 --> Binary FP8 00001000
Original FP is 0.013, FP8 representation 0.013671875 --> Binary FP8 00000111
Original FP is 0.0019, FP8 representation 0.001953125 --> Binary FP8 00000001
Original FP is 0.056, FP8 representation 0.0546875 --> Binary FP8 00010110


- Decimal to Binary FP16.
- Use for Raw scores and Attention scores

In [6]:
"""
Method to obtain the FP16 binary representation of any FP number

IN:
n_fp <-- floating-point number

OUT:
v_fp16 --> FP16 decimal representation
b_fp16 --> FP16 binary representation (string)
"""
def to_FP16_bin(n_fp):
    # Change precision to FP16
    v_fp16 = np.float16(n_fp)

    # H is used to get a 16-bit unsigned integer representation
    b_fp16 = bin(v_fp16.view('H'))[2:].zfill(16)
    
    return b_fp16, v_fp16 

In [191]:
# Example
a = 0.5
a_b, a_v = to_FP16_bin(a)
print(a_b)

b = -0.4375
b_b, b_v = to_FP16_bin(b)
print(b_b)

c = a + b
c_b, c_v = to_FP16_bin(c)
print(c_b)

print(f'Addition: {a_v} + {b_v} = {c_v}')

0011100000000000
1011011100000000
0010110000000000
Addition: 0.5 + -0.4375 = 0.0625


- From Hex to Binary (FP16).
- Binary elements are separeted into sign, exponent, and mantissa.
- fp16 formula (normal numbers) is applied:
$$(-1)^{sign bit} \times 2^{exponent - 15} \times \left( 1 + \frac{mantissa}{1024} \right)$$

In [7]:
"""
Convert Hex string to decimal numbers in fp16 representation
- Only works for normal numbers

IN:
hex_s <-- hexadecimal string representing a fp16 number

OUT:
fp16_n --> fp16 decimal number
"""
def hex_to_fp16(hex_s):
    # Hex string --> INT16 --> INT16 to binary string
    bin_s = bin(int(hex_s, 16))
    # Drop the '0b'
    # Complete the 16 bits
    bin_s = bin_s[2:].zfill(16)

    # Debug
    # print(bin_s)
    # print(type(bin_s))
    # print(len(bin_s))

    # Separate sign, exponent, and mantissa.
    # Get their numerical representation
    # Turn the number into fp16 to make sure that the reconstructed number will be fp16

    # 1 bit for sign
    sign_t = bin_s[0]
    # print(f'Sign in bits: {sign_t}')
    sign_t = int(sign_t, 2)
    sign_t = np.float16(sign_t)
    # print(f'Sign value is {sign_t} and type is {sign_t.dtype}\n')

    # 5 bits for exponent
    exp_t = bin_s[1:6]
    # print(f'Exponent in bits: {exp_t}')
    exp_t = int(exp_t, 2)
    exp_t = np.float16(exp_t)
    # print(f'Exponent value is {exp_t} and type is {exp_t.dtype}\n')

    # 10 bits for mantissa
    man_t = bin_s[6:]
    # print(f'Mantissa in bits: {man_t}')
    man_t = int(man_t, 2)
    man_t = np.float16(man_t)
    # print(f'Mantissa value is {man_t} and type is {man_t.dtype}\n')

    # Apply the formula to calculate fp16 number
    number_t = ((np.float16(-1))**sign_t) * (np.float16(2)**(exp_t - np.float16(15))) * (np.float16(1) + (man_t/np.float16(1024)))
    # print(f'fp16 Decimal number: {number_t}')
    # print(f'and type is {number_t.dtype}') 
    return number_t

In [197]:
# Simulate of the results from testbench
hex_string = '4cec' # 19.6875
# hex_string = '3fe0'
# hex_string = 'd0c1'

print(hex_to_fp16(hex_string))

19.69


# E4M3FN
- 1 bit for the sign, 4 bits for the exponents, 3 bits for the mantissa, only nan values and no infinite values (FN).
-  NVIDIA, Intel and ARM implement E4M3FN in its latest graphical processor.

In [155]:
_,_ = show_range(format='E4M3', verbose=True)

All possible values for FP8 representation E4M3:

Zero value:
0_0000_000 --> 0.0

Subnormal value:
0_0000_001 --> 0.001953125

Subnormal value:
0_0000_010 --> 0.00390625

Subnormal value:
0_0000_011 --> 0.005859375

Subnormal value:
0_0000_100 --> 0.0078125

Subnormal value:
0_0000_101 --> 0.009765625

Subnormal value:
0_0000_110 --> 0.01171875

Subnormal value:
0_0000_111 --> 0.013671875

Normal value:
0_0001_000 --> 0.015625

Normal value:
0_0001_001 --> 0.017578125

Normal value:
0_0001_010 --> 0.01953125

Normal value:
0_0001_011 --> 0.021484375

Normal value:
0_0001_100 --> 0.0234375

Normal value:
0_0001_101 --> 0.025390625

Normal value:
0_0001_110 --> 0.02734375

Normal value:
0_0001_111 --> 0.029296875

Normal value:
0_0010_000 --> 0.03125

Normal value:
0_0010_001 --> 0.03515625

Normal value:
0_0010_010 --> 0.0390625

Normal value:
0_0010_011 --> 0.04296875

Normal value:
0_0010_100 --> 0.046875

Normal value:
0_0010_101 --> 0.05078125

Normal value:
0_0010_110 --> 0.0546875

- Store binary and decimal FP8 representation.
- For Q, K, and V vectors.

In [184]:
# Files to modify
file_names = ['qv_fp8_attH0.csv', 'qv_fp8_attH1.csv', 'kv_fp8_attH0.csv', 'kv_fp8_attH1.csv', 'vv_fp8_attH0.csv', 'vv_fp8_attH1.csv']

In [187]:
# Create all the files based on the INTEL emulation results
for file_name in tqdm(file_names):

    # Read .csv file
    df = pd.read_csv('./test_vectors/'+file_name, header=None)
    # display(df)

    # Turn into an array
    fp_array = df.to_numpy()
    # print(fp_array.shape)
    # print(fp_array.dtype)
    # Get indexes
    n_i, n_j = fp_array.shape

    # Empty array to store FP8 decimal representation
    fp8_array = np.zeros_like(fp_array)
    # print(fp8_array.shape)
    # Empty Datframe for FP8 binary representation
    fp8_df = pd.DataFrame(index=range(n_i), columns=range(n_j))

    # Round each value to FP8 decimal representation
    # Also get the corresponding FP8 binary representation

    # Loop through all elements in the matrix
    for i in range(n_i):
        for j in range(n_j):
            # Get FP8 representations
            val_bin, val_round = to_FP8_bin(val_fp=fp_array[i,j], format = 'E4M3')

            # Store decimal representation
            fp8_array[i,j] = val_round
            # Store binary representation
            fp8_df.loc[i,j] = val_bin

    # Create a dataframe to store the values
    df = pd.DataFrame(fp8_array)
    # display(df)
    # Save the file
    name = file_name[0:7]+'r_'+file_name[7:]
    # print(name)
    df.to_csv('./test_vectors/'+name, index=False, header=False)

    # Dataframe for binary representation
    # display(fp8_df)
    # Save the file
    name = file_name[0:7]+'r_bin_'+file_name[7:]
    # print(name)
    fp8_df.to_csv('./test_vectors/'+name, index=False, header=False)

100%|██████████| 6/6 [00:24<00:00,  4.01s/it]


- Store binary and decimal FP16 representation.
- For raw and attention scores.

In [192]:
# Files to modify
file_names = ['raw_score_fp8_attH0.csv', 'raw_score_fp8_attH1.csv', 'att_score_fp8_attH0.csv', 'att_score_fp8_attH1.csv']

In [194]:
# Create all the files based on the INTEL emulation results
for file_name in tqdm(file_names):

    # Read .csv file
    df = pd.read_csv('./test_vectors/'+file_name, header=None)
    # display(df)

    # Turn into an array
    fp_array = df.to_numpy()
    # print(fp_array.shape)
    # print(fp_array.dtype)
    # Get indexes
    n_i, n_j = fp_array.shape

    # Empty array to store FP16 decimal representation
    fp16_array = np.zeros_like(fp_array)
    # print(fp16_array.shape)
    # Empty Dataframe for FP16 binary representation
    fp16_df = pd.DataFrame(index=range(n_i), columns=range(n_j))

    # Change precision to FP16
    # Also get the corresponding FP16 binary representation

    # Loop through all elements in the matrix
    for i in range(n_i):
        for j in range(n_j):
            # Get FP8 representations
            val_bin, val_round = to_FP16_bin(n_fp=fp_array[i,j])

            # Store decimal representation
            fp16_array[i,j] = val_round
            # Store binary representation
            fp16_df.loc[i,j] = val_bin

    # Create a dataframe to store the values
    df = pd.DataFrame(fp16_array)
    # display(df)
    # Save the file
    name = file_name[0:12]+'16'+file_name[13:]
    # print(name)
    df.to_csv('./test_vectors/'+name, index=False, header=False)

    # Dataframe for binary representation
    # display(fp16_df)
    # Save the file
    name = file_name[0:12]+'16_bin'+file_name[13:]
    # print(name)
    fp16_df.to_csv('./test_vectors/'+name, index=False, header=False)

100%|██████████| 4/4 [00:00<00:00,  7.92it/s]


# Binary Operations
- Calculate **one Raw Score** using binary representation (E4M3).
- Generate the exact binary result.

- Get FP8 decimal representation of Q-vector and K-vector from Attention Head 0.

In [84]:
# Get vector q1
# Read .csv file
df = pd.read_csv('./test_vectors/qv_fp8_r_attH0.csv', header=None)
# display(df)

# Get the 1st vector
q1_v = df.iloc[0].to_numpy()
print(f'q1 Vector of dimension: {q1_v.shape} \n{q1_v}')

# Get vector k1
# Read .csv file
df = pd.read_csv('./test_vectors/kv_fp8_r_attH0.csv', header=None)
# display(df)

# Get the 1st vector
k1_v = df.iloc[0].to_numpy()
print(f'\nk1 Vector of dimension: {k1_v.shape} \n{k1_v}')

q1 Vector of dimension: (64,) 
[ 0.8125     -0.1875     -0.8125      0.0703125   0.3125      0.140625
  0.25        0.34375    -0.1875      1.75       -0.625      -0.75
  0.0859375   0.234375   -0.40625    -0.8125     -1.375      -1.25
  0.03515625  0.28125     0.28125    -1.375       0.46875    -0.4375
 -0.625      -0.8125     -0.15625    -0.00390625  0.40625     0.6875
 -1.125      -0.5         0.0859375  -0.09375    -0.5         0.6875
 -0.625      -0.203125    0.0859375  -0.05859375 -0.4375     -0.375
 -0.05078125  0.9375     -0.625      -0.5625      0.5         0.9375
 -0.00390625  0.6875      1.625      -0.109375   -0.203125    0.75
 -0.046875   -0.3125      0.40625     0.04296875 -1.125       0.875
  0.875      -0.5625     -0.5625     -0.625     ]

k1 Vector of dimension: (64,) 
[ 0.140625    0.25       -0.8125      0.1015625  -1.375      -3.
 -0.234375   -1.375       3.75        0.9375     -0.625       1.125
 -1.         -0.01367188 -0.25        0.46875     0.5         0.875
 -

- Partial products.

In [85]:
# Binary representation of q1
q1_b =[]
# FP8(E4M3) Binary representation of q1
q1_fp8_b =[]
# Binary representation of k1
k1_b =[]
# FP8(E4M3) Binary representation of k1
k1_fp8_b =[]
# Binary representation of q1[i]*k1[i]
partial_q1xk1_b =[]
# FP16 Binary representation of q1[i]*k1[i]
partial_q1xk1_fp16_b =[]
# FP16 Decimal representation of q1[i]*k1[i]
partial_q1xk1_fp16_d =[]

for i in range(64):
    # Binary representation
    q1_b.append(Binary(q1_v[i]))
    k1_b.append(Binary(k1_v[i]))
    # FP8 binary representation
    val_bin, _ = to_FP8_bin(val_fp=q1_v[i], format = 'E4M3')
    q1_fp8_b.append(val_bin)
    val_bin, _ = to_FP8_bin(val_fp=k1_v[i], format = 'E4M3')
    k1_fp8_b.append(val_bin)

    # Partial result in binary (multiplication)
    partial_q1xk1_b.append(Binary(q1_v[i])*Binary(k1_v[i]))
    
    # Partial result in FP16 (multiplication)
    fp16_b, fp16_v = to_FP16_bin(float(partial_q1xk1_b[i]))
    partial_q1xk1_fp16_b.append(fp16_b)
    partial_q1xk1_fp16_d.append(fp16_v)

# print(q1_b[0].to_sci_exponent())
# print(k1_b[0].to_sci_exponent())
# print(partial_q1xk1_b[0].to_sci_exponent())
# print(partial_q1xk1_fp16_b)
# print(partial_q1xk1_fp16_d)

In [86]:
# Show all partial results
for i in range(64):
    print(f'q1[{i}] x k1[{i}]: {q1_b[i].to_sci_exponent()}({q1_v[i]}) x {k1_b[i].to_sci_exponent()}({k1_v[i]}) = {partial_q1xk1_b[i].to_sci_exponent()}({float(partial_q1xk1_b[i])})')
    print(f'   FP8-->FP16: {q1_fp8_b[i]}({q1_v[i]}) x {k1_fp8_b[i]}({k1_v[i]}) = {partial_q1xk1_fp16_b[i]}({partial_q1xk1_fp16_d[i]})\n')

q1[0] x k1[0]: 0b1.101e-1(0.8125) x 0b1.001e-3(0.140625) = 0b1.110101e-4(0.1142578125)
   FP8-->FP16: 00110101(0.8125) x 00100001(0.140625) = 0010111101010000(0.1142578125)

q1[1] x k1[1]: -0b1.1e-3(-0.1875) x 0b1e-2(0.25) = -0b1.1e-5(-0.046875)
   FP8-->FP16: 10100100(-0.1875) x 00101000(0.25) = 1010101000000000(-0.046875)

q1[2] x k1[2]: -0b1.101e-1(-0.8125) x -0b1.101e-1(-0.8125) = 0b1.0101001e-1(0.66015625)
   FP8-->FP16: 10110101(-0.8125) x 10110101(-0.8125) = 0011100101001000(0.66015625)

q1[3] x k1[3]: 0b1.001e-4(0.0703125) x 0b1.101e-4(0.1015625) = 0b1.110101e-8(0.00714111328125)
   FP8-->FP16: 00011001(0.0703125) x 00011101(0.1015625) = 0001111101010000(0.00714111328125)

q1[4] x k1[4]: 0b1.01e-2(0.3125) x -0b1.011e0(-1.375) = -0b1.10111e-2(-0.4296875)
   FP8-->FP16: 00101010(0.3125) x 10111011(-1.375) = 1011011011100000(-0.4296875)

q1[5] x k1[5]: 0b1.001e-3(0.140625) x -0b1.1e1(-3.0) = -0b1.1011e-2(-0.421875)
   FP8-->FP16: 00100001(0.140625) x 11000100(-3.0) = 1011011011000

- Raw score.

In [87]:
# Sum all partial products
for i in range(64):
    if i == 0:
        # Initial partial product
        raw_score_b = partial_q1xk1_b[0]
    else:
        # Accumulate the other partial products
        raw_score_b += partial_q1xk1_b[i]

# FP16 representation
fp16_b, fp16_v = to_FP16_bin(float(raw_score_b))

print(f'Raw Score q1xk1: {raw_score_b.to_sci_exponent()}({float(raw_score_b)}) --> FP16: {fp16_b}({fp16_v})')

Raw Score q1xk1: 0b1.010111000000110011e2(5.4382781982421875) --> FP16: 0100010101110000(5.4375)


- Generate test vectors:
    - 1st row --> q1 vector
    - 2nd row --> k1 vector
    - 3rd row --> partial products (q1[i] x k1[i])
    - 4th row --> row score padded with zeros on the right (sum(q1[i] x k1[i]))

In [82]:
## Decimal representation

# 1st Row
q1_v = np.reshape(q1_v, (1,-1))
# print(q1_v.shape)

# 2nd row
k1_v = np.reshape(k1_v, (1,-1))
# print(k1_v.shape)

# 3rd row
# Array for partial products
partial_prod = np.zeros_like(q1_v)
# print(partial_prod.shape)
# Fill array with decimal representation of partial products
for i in range(64):
    partial_prod[0,i] = float(partial_q1xk1_b[i])
# print(partial_prod.shape)
    
# 4th row
# Array for raw score
raw_score = np.zeros_like(q1_v)
# print(raw_score.shape)
# Load the raw score and leave the other 68 values as zero
raw_score[0,0] = fp16_v
# print(raw_score.shape)

# Concatenate all rows
test_1 = np.concatenate((q1_v, k1_v, partial_prod, raw_score), axis=0)
# print(test_1)

# Dataframe to store
# Create a dataframe to store the values
df = pd.DataFrame(test_1)
display(df)

# Save the file
df.to_csv('./test_vectors/q1xk1_exact.csv', index=False, header=False)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.8125,-0.1875,-0.8125,0.070312,0.3125,0.140625,0.25,0.34375,-0.1875,1.75,...,-0.046875,-0.3125,0.40625,0.042969,-1.125,0.875,0.875,-0.5625,-0.5625,-0.625
1,0.140625,0.25,-0.8125,0.101562,-1.375,-3.0,-0.234375,-1.375,3.75,0.9375,...,-0.25,2.0,0.5625,-0.050781,-1.625,-0.8125,1.625,0.40625,0.15625,-1.25
2,0.114258,-0.046875,0.660156,0.007141,-0.429688,-0.421875,-0.058594,-0.472656,-0.703125,1.640625,...,0.011719,-0.625,0.228516,-0.002182,1.828125,-0.710938,1.421875,-0.228516,-0.087891,0.78125
3,5.4375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
## Binary representation
# FP8 (q1 and k1) and FP16 (partial products and raw score)

# Empty Dataframe for binary representation: dimension (4, 64)
df_fp = pd.DataFrame('0', index=range(4), columns=range(64))
# display(df_fp)

# 1st row
# Loop through all columns
for j in range(64):
    # Store binary representation
    df_fp.loc[0,j] = q1_fp8_b[j]

# 2st row
# Loop through all columns
for j in range(64):
    # Get FP8 representations
    val_bin, _ = to_FP8_bin(val_fp=k1_v[0,j], format = 'E4M3')
    # Store binary representation
    df_fp.loc[1,j] = k1_fp8_b[j]

# 3st row
# Loop through all columns
for j in range(64):
    # Store binary representation
    df_fp.loc[2,j] = partial_q1xk1_fp16_b[j]

# 4th row
fp16_b, _ = to_FP16_bin(float(raw_score_b))
# Store binary representation
df_fp.loc[3,0] = fp16_b

display(df_fp)

# Save the file
df_fp.to_csv('./test_vectors/q1xk1_exact_b.csv', index=False, header=False)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,110101,10100100,10110101,11001,101010,100001,101000,101011,10100100,111110,...,10010100,10101010,101101,10011,10111001,110110,110110,10110001,10110001,10110010
1,100001,101000,10110101,11101,10111011,11000100,10100111,10111011,1000111,110111,...,10101000,1000000,110001,10010101,10111101,10110101,111101,101101,100010,10111010
2,10111101010000,1010101000000000,11100101001000,1111101010000,1011011011100000,1011011011000000,1010101110000000,1011011110010000,1011100110100000,11111010010000,...,10001000000000,1011100100000000,11001101010000,1001100001111000,11111101010000,1011100110110000,11110110110000,1011001101010000,1010110110100000,11101001000000
3,100010101110000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Test

In [None]:
 # compare all bit after the sign bit
if all(b==0 for b in val_bin[1:]):
    print('\n Zero value:')
    print(f'{sign_b}_{exp_b}_{man_b} --> {sign_v}0')

In [121]:
aux1 = {0.72: 'option1', 1.35: 'option2'}
print(aux1[72.e-2])

option1


In [138]:
aux1 = Binary(1.5625e-2)
print(aux1)
print(type(aux1))

aux1 = aux1.to_sci_exponent()
print(aux1)
print(type(aux1))

aux1 = str(aux1)
print(aux1)
print(type(aux1))

0b0.000001
<class 'binary_fractions.binary.Binary'>
0b1e-6
<class 'binary_fractions.binary.Binary'>
0b1e-6
<class 'str'>


In [139]:
aux1 = Binary(1.9e-03)
print(aux1)
print(type(aux1))

aux1 = aux1.to_sci_exponent()
print(aux1)
print(type(aux1))

aux1 = str(aux1)
print(aux1)
print(type(aux1))

0b0.00000000011111001000010010110101110111001100011000111111000101
<class 'binary_fractions.binary.Binary'>
0b1.1111001000010010110101110111001100011000111111000101e-10
<class 'binary_fractions.binary.Binary'>
0b1.1111001000010010110101110111001100011000111111000101e-10
<class 'str'>


In [9]:
# Example to FP8 x FP8 = FP16

# Obtain FP16 representation
a = np.float16(-0.0859375)
print(f'A = {a}')

b = np.float16(0.025390625)
print(f'B = {b}')

c = a*b
print(f'C = {c} in {c.dtype}')
print(f'C in binary: {to_FP16_bin(c)}')

A = -0.0859375
B = 0.025390625
C = -0.0021820068359375 in float16
C in binary: ('1001100001111000', -0.002182)


In [19]:
# Final result to achieve
bf1str: str = "-1.011e-4"  # -0.0859375
bf1: Binary = Binary(bf1str)
print(f"float({bf1}) = {float(bf1)}")

# Float
fl1: float = -0.0859375
print(f"Binary({fl1}) = {Binary(fl1).to_sci_exponent()}")

fl2: float = 0.025390625
print(f"Binary({fl2}) = {Binary(fl2).to_sci_exponent()}")

# Multiplication
c_b = Binary(fl1)*Binary(fl2)
print(f'Multiplication({float(c_b)}): {c_b.to_sci_exponent()}')

float(-0b1.011e-4) = -0.0859375
Binary(-0.0859375) = -0b1.011e-4
Binary(0.025390625) = 0b1.101e-6
Multiplication(-0.0021820068359375): -0b1.0001111e-9
