# Generating Data with Numpy

### 1. Run these next few cells:

In [1]:
import numpy as np

In [2]:
array_1D = np.array([10,11,12,13, 14])
array_1D

array([10, 11, 12, 13, 14])

In [3]:
array_2D = np.array([[20,30,40,50,60], [43,54,65,76,87], [11,22,33,44,55]])
array_2D

array([[20, 30, 40, 50, 60],
       [43, 54, 65, 76, 87],
       [11, 22, 33, 44, 55]])

In [4]:
array_3D = np.array([[[1,2,3,4,5], [11,21,31,41,51]], [[11,12,13,14,15], [51,52,53,54,5]]])
array_3D

array([[[ 1,  2,  3,  4,  5],
        [11, 21, 31, 41, 51]],

       [[11, 12, 13, 14, 15],
        [51, 52, 53, 54,  5]]])

### 2. Generate 4 arrays of size 10:
    A) The first one should be "empty"
    B) The second one should be full of 0s
    C) The third one should be full of 1s
    D) The last one should be full of 2s
   (<b>Hint</b>: Try to use 4 different functions here.)

In [5]:
display(np.empty(10))
display(np.empty(shape=(2,5)))

array([nan,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

array([[nan,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [6]:
display(np.zeros(10))
display(np.zeros(shape=(2,5), dtype=np.int32))

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]], dtype=int32)

In [7]:
display(np.ones(10))
display(np.ones(shape=(2,5), dtype=np.int32))

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]], dtype=int32)

In [8]:
display(np.full(10, fill_value=2))
print(np.full(10, fill_value=2).dtype)
display(np.full(shape=(2,5), fill_value=2, dtype=str))

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

int64


array([['2', '2', '2', '2', '2'],
       ['2', '2', '2', '2', '2']], dtype='<U1')

### 3. Generate 4 more arrays. This time, they should be 2 by 4 arrays. 

In [9]:
shp = (2,4)
fv = np.int8(2)
for fun in (np.empty, np.zeros, np.ones, np.full):
    if fun != np.full:
        display(fun(shape=shp))
    else:
        display(fun(shape=shp, fill_value=fv))

array([[0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 7.80623720e-321, 7.56592337e-307,
        9.34588060e-307]])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

array([[2, 2, 2, 2],
       [2, 2, 2, 2]], dtype=int8)

### 4. Use the <i> _like </i> functions to generate 4 more arrays:
    A) An empty array with the same shape as array_1D. 
    B) An 2-D array of 0s with the same shape as array_2D.
    C) A 3-D array of 1s with the same shappe as array_3D.
    D) A 3-D array of 2s with the same shape as array_3D. 

In [10]:
np.empty_like(array_1D)

array([214748364850, 214748364850, 214748364850, 214748364850,
       214748364850])

In [11]:
np.zeros_like(array_2D)

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [12]:
np.ones_like(array_3D)

array([[[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]],

       [[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]]])

In [13]:
np.full_like(array_3D, fill_value=2)

array([[[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]],

       [[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]]])

### 5. With the help of the np.arange() function, generate several sequences of numbers:
    A) The integers from 0 to 50, excluding 50. 
    B) The integers from 1 to 50, including 50. 
    C) The integers from 25 to 50, including 50. 
    D) Every 5-th integers from 25 to 50, including 50. 
    E) Every 5-th integers from 25 to 50, including 50, represented as decimals of up to 32 bits. 

In [14]:
display(np.arange(50))
display(np.arange(start=0, stop=50))

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [15]:
np.arange(start=1, stop=51, dtype=int)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])

In [16]:
np.arange(start=25, stop=51, dtype=np.int8)

array([25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
       42, 43, 44, 45, 46, 47, 48, 49, 50], dtype=int8)

In [17]:
np.arange(start=25, stop=51, step=5)

array([25, 30, 35, 40, 45, 50])

In [18]:
np.arange(start=25, stop=51, step=5, dtype=np.float32)

array([25., 30., 35., 40., 45., 50.], dtype=float32)

### 6. Import the following functions from the numpy.random module:
    A) Generator as "gen".
    B) PCG64 as "pcg"

In [19]:
from numpy.random import Generator as gen 
from numpy.random import PCG64 as pcg

### 7. Create a random generator object.

In [22]:
array_RG = gen(pcg(seed=365))

### 8. Using the .random() method, generate the following pseudo-random values:
    A) A single probability of an event occuring.
    B) An array of size 10 with the probabilities of 10 events. 
    C) A 5 by 10 2-D array with the probabilities of 50 events.  

In [33]:
array_RG = gen(pcg(seed=365))
display(array_RG.normal())
array_RG.random()

-0.13640898920107863

0.7662217951397283

In [36]:
array_RG.random(size=10)

array([0.7206516 , 0.84594003, 0.17159792, 0.72989385, 0.31220331,
       0.4966964 , 0.62633289, 0.25432223, 0.27184357, 0.58527261])

In [37]:
array_RG.random(size=(5,10))

array([[0.47582808, 0.31213978, 0.86326571, 0.69308801, 0.03810334,
        0.7685183 , 0.35663042, 0.10106293, 0.84993589, 0.33190844],
       [0.20269642, 0.33520176, 0.98973619, 0.64291474, 0.32662863,
        0.61190909, 0.35667588, 0.04772906, 0.90762694, 0.5903071 ],
       [0.02853661, 0.84445833, 0.91917457, 0.82386954, 0.7588933 ,
        0.06393185, 0.34507138, 0.12637865, 0.33319814, 0.13927789],
       [0.21461953, 0.5337677 , 0.36904177, 0.53629827, 0.81589813,
        0.07632435, 0.9389407 , 0.48395624, 0.9295958 , 0.23011817],
       [0.64945578, 0.02243555, 0.97401848, 0.35091164, 0.07297074,
        0.52586653, 0.01239245, 0.22309532, 0.89845526, 0.89214756]])

### 9. Set the seed for the random generator to 123, and generate the same 3 sets of values:
    A) A single probability of an event occuring.
    B) An array of size 10 with the probabilities of 10 events. 
    C) A 5 by 10 2-D array with the probabilities of 50 events.  
   <b> Note: </b> The seed only lasts for a single method before it gets reset. Hence, make sure you define the seed before calling the method every time. 

In [39]:
array_RG = gen(pcg(seed=123))
array_RG.random()

0.6823518632481435

In [40]:
array_RG = gen(pcg(seed=123))
array_RG.random(size=10)

array([0.68235186, 0.05382102, 0.22035987, 0.18437181, 0.1759059 ,
       0.81209451, 0.923345  , 0.2765744 , 0.81975456, 0.88989269])

In [41]:
array_RG = gen(pcg(seed=123))
array_RG.random(size=(5,10))

array([[0.68235186, 0.05382102, 0.22035987, 0.18437181, 0.1759059 ,
        0.81209451, 0.923345  , 0.2765744 , 0.81975456, 0.88989269],
       [0.51297046, 0.2449646 , 0.8242416 , 0.21376296, 0.74146705,
        0.6299402 , 0.92740726, 0.23190819, 0.79912513, 0.51816504],
       [0.23155562, 0.16590399, 0.49778897, 0.58272464, 0.18433799,
        0.01489492, 0.47113323, 0.72824333, 0.91860049, 0.62553401],
       [0.91712257, 0.86469025, 0.21814287, 0.86612743, 0.73075194,
        0.27786529, 0.79704355, 0.86522171, 0.2994379 , 0.52704208],
       [0.07148681, 0.58323841, 0.2379064 , 0.76496365, 0.17363164,
        0.31274226, 0.01447448, 0.03255192, 0.49670184, 0.46831253]])

### 10. Set the seed for the random generator to 123, and generate the following arrays of integers:
    A) A single integer between 0 and 10.
    B) A 1-D array of 10 integers between 0 and 100.
    C) A 5 by 10 array of two-digit integers.

In [51]:
array_RG = gen(pcg(seed=123))
display(array_RG.integers(10))
array_RG = gen(pcg(seed=123))
array_RG.integers(low=0, high=10)

np.int64(0)

np.int64(0)

In [53]:
array_RG = gen(pcg(seed=123))
array_RG.integers(100, size=10)

array([ 1, 68, 59,  5, 90, 22, 25, 18, 33, 17])

In [68]:
array_RG = gen(pcg(seed=123))
array_RG.integers(low=10, high=99, size=(5,10))

array([[11, 70, 62, 14, 90, 29, 32, 26, 39, 25],
       [40, 82, 50, 92, 50, 34, 80, 82, 86, 89],
       [12, 55, 33, 31, 31, 83, 80, 29, 46, 75],
       [23, 66, 49, 92, 75, 30, 84, 81, 28, 56],
       [80, 30, 31, 24, 10, 54, 11, 61, 47, 26]])

### 11. For this next bit, check the NumPy documentation and select 3 different probability distributions (e.g. normal, poisson, binomial, logistic) and generate the following:
    A) One "default" value from one distribution (without specifying any non-mandatory arguments).
    B) An array of 10 values from the second distribution. 
    C) A 5 by 10 2-D array with values from the third distribution.

In [83]:
array_RG = gen(pcg(seed=328))
display(array_RG.normal())
display(array_RG.poisson())
#display(array_RG.binomial())
display(array_RG.logistic())

0.5515532539351139

1

0.7034298047526156

In [86]:
display(array_RG.poisson(lam=14, size=10))
array_RG.binomial(n=100, p=0.4, size=10)

array([10, 12, 11, 18, 10, 14, 13,  8, 16, 17])

array([39, 47, 28, 44, 34, 43, 41, 41, 40, 44])

In [87]:
display(array_RG.binomial(n=100, p=0.4, size=(5,10)))
array_RG.logistic(loc=9, scale=2, size=(5,10))

array([[37, 39, 43, 36, 45, 33, 35, 34, 43, 39],
       [36, 42, 35, 32, 46, 40, 40, 46, 40, 39],
       [30, 47, 46, 33, 44, 38, 41, 38, 36, 37],
       [36, 36, 39, 46, 36, 49, 36, 42, 38, 36],
       [41, 44, 44, 46, 35, 53, 37, 39, 46, 45]])

array([[ 6.67771735, 10.20895911,  3.7722341 , 10.31744697,  8.46904351,
         7.91213937, 11.51767146,  6.01095713, 10.17039572, 12.99197827],
       [ 7.60943757,  6.19728446, 10.40785885, 15.03246183,  5.46801072,
        10.80934813, 15.1055401 ,  6.1015415 ,  4.77807486, 10.71277763],
       [ 8.99167662,  7.10855307,  8.7084992 , 12.6226263 ,  5.78727739,
         9.85170484,  7.96844794,  6.48156961,  9.24543465,  8.07336879],
       [10.7956467 ,  7.32366841,  8.96656125,  9.03788989,  5.42551661,
         8.51998693,  9.25256999,  9.60745486,  7.59994883, 11.90614286],
       [ 7.78316689,  4.39376511, 13.87698986,  4.60512832,  8.94221812,
         5.9061758 ,  5.66964925,  8.50535669,  8.32540222, 10.46231571]])