In [1]:
import numpy as np
from utils.floating_point import *

In [2]:
0.1 + 0.2 == 0.3

False

In [3]:
1e16 + 1 == 1e16

True

In [4]:
f"{104:b}"  # I'm using the python f-string format code "b" to display my integer in binary

'1101000'

In [5]:
x = 104
x.bit_length()

7

In [6]:
print("DECIMAL | BINARY")
print("================")
for _ in range(11):
    print(f"   {_:02}   |  {_:04b}")

DECIMAL | BINARY
   00   |  0000
   01   |  0001
   02   |  0010
   03   |  0011
   04   |  0100
   05   |  0101
   06   |  0110
   07   |  0111
   08   |  1000
   09   |  1001
   10   |  1010


In [7]:
bits = 8
print(f"unsigned min: 0")
print(f"unsigned max: {2 ** bits - 1}\n")
print(f"signed min: {-2 ** (bits - 1)}")
print(f"signed max: {2 ** (bits - 1) - 1}")

unsigned min: 0
unsigned max: 255

signed min: -128
signed max: 127


In [8]:
np.iinfo("int8")  # use np.iinfo() to check out the limits of a dtype

iinfo(min=-128, max=127, dtype=int8)

In [9]:
np.iinfo("uint8")  # "uint8" is an unsigned 8-bit integer, so ranging from 0 to 2^7

iinfo(min=0, max=255, dtype=uint8)

In [10]:
np.array([-87, 31], "int8")  # this is fine as all numbers with the allowable range of int8

array([-87,  31], dtype=int8)

In [11]:
np.array([-129, 128], "int8")  # these numbers are outside the allowable range!

array([ 127, -128], dtype=int8)

In [12]:
np.array([-129, 128], "int16")

array([-129,  128], dtype=int16)

In [13]:
x = 2 ** 64
x

18446744073709551616

In [14]:
x.bit_length()

65

In [15]:
(2.0 ** np.arange(-4, 4, 1)).sum()

15.9375

In [16]:
2 ** -4

0.0625

In [17]:
(2.0 ** np.arange(-2, 6, 1)).sum()

63.75

In [18]:
2 ** -2

0.25

In [19]:
f"{1234:.3e}"

'1.234e+03'

In [20]:
binary(10)

 Decimal: 1.25 x 2^3
  Binary: 1.01 x 2^11

    Sign: 0 (+)
Mantissa: 01 (0.25)
Exponent: 11 (3)


In [21]:
binary(2.998e8)

 Decimal: 1.11684203147888184 x 2^28
  Binary: 1.0001110111101001010111 x 2^11100

    Sign: 0 (+)
Mantissa: 0001110111101001010111 (0.11684203147888184)
Exponent: 11100 (28)


In [22]:
binary(6.02214076e23)

 Decimal: 1.9925592330949422 x 2^78
  Binary: 1.1111111000011000010111001010010101111100010100010111 x 2^1001110

    Sign: 0 (+)
Mantissa: 1111111000011000010111001010010101111100010100010111 (0.9925592330949422)
Exponent: 1001110 (78)


In [23]:
binary(6.62607004e-34)

 Decimal: 1.720226132656187 x 2^-111
  Binary: 1.1011100001100000101111010110010101111011100111100001 x 2^-1101111

    Sign: 0 (+)
Mantissa: 1011100001100000101111010110010101111011100111100001 (0.720226132656187)
Exponent: -1101111 (-111)


In [24]:
f"{0.1:.60f}"

'0.100000000000000005551115123125782702118158340454101562500000'

In [25]:
float_rep(0.1)

You entered: 0.1
Which is inexactly stored as: 0.1000000000000000055511151231257827021181583404541015625


In [26]:
float_rep(0.25)

You entered: 0.25
Which is exactly stored as: 0.25


In [27]:
binary(1)

 Decimal: 1.0 x 2^0
  Binary: 1.0 x 2^0

    Sign: 0 (+)
Mantissa: 0 (0.0)
Exponent: 0 (0)


In [28]:
2 ** -52

2.220446049250313e-16

In [29]:
np.nextafter(1, 2)

1.0000000000000002

In [30]:
spacing = np.nextafter(1, 2) - 1
spacing

2.220446049250313e-16

In [31]:
spacing == 2 ** -52

True

In [32]:
1 + 0.4 * spacing == 1  # add less than half the spacing

True

In [33]:
1 + 0.6 * spacing == 1 # add a more than half the spacing

False

In [34]:
large_number = 1e25
spacing = calc_spacing(large_number)
print(f"{spacing:.5e}")

2.14748e+09


In [35]:
one_billion = 1e9
1e25 + one_billion == 1e25  # adding a billion didn't change our number!

True

In [36]:
two_billion = 2e9
1e25 + two_billion == 1e25  # adding two billion (more than half the spacing) did change our number

False

In [37]:
1 == 0.99999999999999999 and 1 == 1.00000000000000009

True

In [38]:
bits = 11
max_exponent = 2 ** (bits - 1) - 1
max_exponent

1023

In [39]:
max_mantissa = 1.999999999999999
max_mantissa

1.999999999999999

In [40]:
max_mantissa * 2 ** max_exponent

1.797693134862315e+308

In [41]:
np.finfo(np.float64).max

1.7976931348623157e+308

In [42]:
10 ** 309.0

OverflowError: (34, 'Result too large')

In [43]:
np.power(10, 309.0)

  """Entry point for launching an IPython kernel.


inf

In [44]:
2 ** -1022

2.2250738585072014e-308

In [45]:
np.finfo(np.float64).minexp  # minimum exponent value

-1022

In [46]:
np.finfo(np.float64).tiny  # smallest possible positive value

2.2250738585072014e-308

In [47]:
2 ** -1024

5.562684646268003e-309

In [48]:
2 ** -1074

5e-324

In [49]:
2 ** -1075

0.0

In [50]:
1e16 + 1 + 1 == 1 + 1 + 1e16

False

In [51]:
calc_spacing(1e16)

2.0

In [52]:
1e16 + 1 == 1e16

True

In [53]:
1e16 + (1 + 1) == 1e16

False

In [54]:
1e16 + 1 + 1 == 1e16

True

In [55]:
1 + 1 + 1e16 == 1e16

False

In [56]:
def loss_lr(w, x, y):
    
    return np.sum(np.log(1 + np.exp(-y * w * x)))

In [57]:
n = 10
x = np.random.randn(n) * 1e5
w = np.random.randn()
y = np.random.choice([-1, +1], size=n)
loss_lr(w, x, y)

  This is separate from the ipykernel package so we can avoid doing imports until


inf

In [58]:
np.log(np.finfo(np.float64).max)

709.782712893384

In [59]:
-y * w * x

array([ 64417.45688554,  13511.94138729, -64231.28413197, 181721.17102822,
        49393.2609363 , -17855.79895993,  19182.03532056, 108681.46256593,
        28894.16728872,  -1578.80520392])

In [60]:
z = 100
np.log(1 + np.exp(z))

100.0

In [61]:
@np.vectorize  # decorator to vectorize the function
def log_1_plus_exp_safe(z):
    if z > 100:
        print(f"Avoiding overflow error with approximation of {z:.0f}!")        
        return z
    else:
        return np.log(1 + np.exp(z))

In [62]:
log_1_plus_exp_safe([1, 50, 200, 500])

Avoiding overflow error with approximation of 200!
Avoiding overflow error with approximation of 500!


array([  1.31326169,  50.        , 200.        , 500.        ])

In [63]:
def loss_lr(w, x, y):
    
    return np.sum(log_1_plus_exp_safe(-y * w * x))

In [64]:
loss_lr(w, x, y)

Avoiding overflow error with approximation of 64417!
Avoiding overflow error with approximation of 64417!
Avoiding overflow error with approximation of 13512!
Avoiding overflow error with approximation of 181721!
Avoiding overflow error with approximation of 49393!
Avoiding overflow error with approximation of 19182!
Avoiding overflow error with approximation of 108681!
Avoiding overflow error with approximation of 28894!


465801.49541255436

In [65]:
np.float64().nbytes  # number of bytes consumed by a float64

8

In [66]:
np.float64().nbytes * 8  # recall 1 byte = 8 bits

64

In [67]:
x64 = np.random.randn(1000, 1000)
print(f"array size: {x64.shape}")
print(f"array type: {x64.dtype}")
print(f"mem. usage: {x64.nbytes * 8 / (1000 * 1000)} MB")

array size: (1000, 1000)
array type: float64
mem. usage: 64.0 MB


In [68]:
x32 = x64.astype('float32')
print(f"array type: {x32.dtype}")
print(f"mem. usage: {x32.nbytes * 8 / (1000 * 1000)} MB")

array type: float32
mem. usage: 32.0 MB


In [69]:
time64 = %timeit -q -o -r 3 x64 ** 2
time32 = %timeit -q -o -r 3 x32 ** 2
print(f"float32 array is {time64.average / time32.average:.2f}x faster than float64 array here.")

float32 array is 6.04x faster than float64 array here.
