# Floating Point Numbers

*IEEE 754 Standard for Floating Point: Single and Double Precision*

### We'll start off by importing what we need

In [None]:
import math
import numpy as np
import numpy.linalg as npla

# New one!
import struct

# Lecture starts here

## Let's look at this example from lecture...

In [None]:
# How many loops will it take to get x/2 iteratively so small as to be considered a zero?
# First, the default way, where FP numbers are assumed to be double-precision (float64)

x = 1.0
count = 0
while (1.0 + x) != 1.0:
    e = x
    x = x/2.0
    count += 1

print(e, count)
print(np.finfo(e))

In [None]:
s = 2**-52
print(s)

In [None]:
# Repeating it but forcing the FP numbers to be single-precision (float32)

x = np.float32(1.0)
count = 0
while (np.float32(1.0) + x) != np.float32(1.0):
    e = x
    x = x/np.float32(2.0)
    count += 1

print(e, count)
print(np.finfo(e))

### You can use `np.float32()` and `np.float64()` to define the floating point type, per IEEE 754 standards.
#### Let's try these out with several examples

In [None]:
a = np.float32(1./512)
b = np.float64(1./512)

print(a)
print(b)

In [None]:
a = np.float32(1./3)
b = np.float64(1./3)

print(a)
print(b)

In [None]:
a = np.float32(1./10)
b = np.float64(1./10)

print(a+a+a+a+a+a+a+a+a+a)    # That's 10 a's in there...
print(b+b+b+b+b+b+b+b+b+b)    # That's 10 b's in there...

#### Defining a function fprint() that will print out a 64b FP number in various formats and give us information on it

In [None]:
bits = {'0':'0000', '1':'0001', '2':'0010', '3':'0011', 
        '4':'0100', '5':'0101', '6':'0110', '7':'0111', 
        '8':'1000', '9':'1001', 'a':'1010', 'b':'1011', 
        'c':'1100', 'd':'1101', 'e':'1110', 'f':'1111'}

drop = {'0':'0', '1':'1', '2':'2', '3':'3', '4':'4', '5':'5', '6':'6', '7':'7', 
        '8':'0', '9':'1', 'a':'2', 'b':'3', 'c':'4', 'd':'5', 'e':'6', 'f':'7'}

def double_to_hex(f):
    s = hex(struct.unpack('<Q', struct.pack('<d', f))[0])
    s = s[2:]           # remove the 0x prefix
    while len(s) < 16:  # pad with zeros
        s = '0' + s
    return s

def fprint(x):
    """Print a 64-bit floating-point number in various formats.
    """
    print('input     :', x)
    # Cast the input to a 64-bit float
    x = np.float64(x)
    xhex = double_to_hex(x)
    print('as float64: {:.16e}'.format(x))
    print('as hex    : ' + xhex)
    if bits[xhex[0]][0] == '0':
        sign = '0 means +'
    else:
        sign = '1 means -'
    print('sign      :', sign)
    expostr = drop[xhex[0]] + xhex[1:3]
    expo = int(expostr, 16)
    if expo == 0:
        print('exponent  :', expostr, 'means zero or denormal')
    elif expo == 2047:
        print('exponent  :', expostr, 'means inf or nan')
    else:
        print('exponent  :', expostr, 'means', expo, '- 1023 =', expo - 1023)
        mantissa = '1.'
        for i in range(3,16):
            mantissa = mantissa + bits[xhex[i]]
        print('mantissa  :', mantissa)
    print()

### Demonstration of `fprint()`

In [None]:
fprint(18.125)
# Compare against the 64b examples done in lecture

In [None]:
fprint(2.)

In [None]:
fprint(1.)

In [None]:
fprint(0)

### Special "numbers" in IEEE 754

In [None]:
fprint(np.inf)

In [None]:
fprint(-np.inf)

In [None]:
fprint(np.nan)

In [None]:
a = 1/10
print(a)
print(a+a+a+a+a+a+a+a+a+a)    # That's 10 a's in there...
print()

fprint(a)
fprint(a+a+a+a+a+a+a+a+a+a)

### Let's run the x = 1+x routine (or something similar to it) with `fprint()` to get more info

In [None]:
# This is going to be helpful for one of your homework problems...! :)
x = 1.0
for i in range(200):
    print('x:')
    fprint(x)
    print('1 + x:')
    fprint(1.0 + x)
    print('---------------------------------')
    if 1.0 == (1.0 + x): 
        print('Stopped at iteration: ', i)
        break
    x = x/2.

### What are the the parameters for a double-precision on my machine?
**These will look different on different CPUs**

In [None]:
print(np.finfo(np.float64))

In [None]:
print(np.finfo(np.float32))