# Generating Data w/ Numpy

In [1]:
import numpy as np

### np.empty(), np.zeros(), np.ones(), np.full()

In [2]:
array_empty = np.empty(shape = (2,3))
array_empty

array([[0., 0., 0.],
       [0., 0., 0.]])

In [3]:
array_zeros = np.zeros(shape = (2,3))
array_zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [5]:
array_zeros = np.zeros(shape = (2,3), dtype = np.int8)
array_zeros

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [6]:
array_ones = np.ones(shape = (2,3), dtype = np.int8)
array_ones

array([[1, 1, 1],
       [1, 1, 1]], dtype=int8)

In [9]:
array_full = np.full(shape = (2,3), fill_value = 2)
array_full

array([[2, 2, 2],
       [2, 2, 2]])

In [10]:
array_full = np.full(shape = (2,3), fill_value = "hello")
array_full

array([['hello', 'hello', 'hello'],
       ['hello', 'hello', 'hello']], dtype='<U5')

### "_like" functions

In [11]:
matrix_A = np.array([[1,0,9,2,2],[3,23,4,5,1],[0,2,3,4,1]])
matrix_A

array([[ 1,  0,  9,  2,  2],
       [ 3, 23,  4,  5,  1],
       [ 0,  2,  3,  4,  1]])

In [13]:
array_empty_like = np.empty_like(matrix_A)
array_empty_like

array([[   0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0],
       [1316,    0,    0,    0,    0]])

In [14]:
array_0s_like = np.zeros_like(matrix_A)
array_0s_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

### np.arange()

In [15]:
range(30)

range(0, 30)

In [16]:
list(range(30))

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29]

In [18]:
array_rng = np.arange(30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [22]:
array_rng = np.arange(start = 0, stop = 30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [23]:
array_rng = np.arange(start = 0, stop = 30, step = 2.5)
array_rng

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5])

In [24]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5, dtype = np.float32)
array_rng

# The casting happens after all the computations. 

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5], dtype=float32)

In [25]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5, dtype = np.int32)
array_rng

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

## Random Generators

### Defining Random Generators

In [27]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg


## We load two functions from the numpy.random module.

In [29]:
array_RG = gen(pcg())
array_RG.normal()

# RG is short for Random Generator.

0.0022557501485206983

In [33]:
array_RG = gen(pcg())
array_RG.normal(size = 5)

array([ 0.69509364, -0.05223079,  0.29728155, -1.36689891, -0.18577339])

In [32]:
array_RG = gen(pcg())
array_RG.normal(size = (5,5))

array([[ 0.13867839,  1.49853253,  0.10043548,  0.82187209, -0.28818085],
       [-0.79622882, -0.04463805,  1.50867129,  0.98687105, -0.0712671 ],
       [ 1.57498568,  0.26257698,  1.70319034,  0.01639261,  1.90086275],
       [ 0.97573978,  0.02996   , -0.5953379 ,  0.20043562,  0.60953567],
       [ 0.48858329,  0.46841493, -0.87738916,  0.46892909, -2.15908636]])

In [36]:
array_RG = gen(pcg(seed = 365))
array_RG.normal(size = (5,5))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

In [None]:
array_RG.normal(size = (5,5))

### Generating Integers, Probabilities and Random Choices

In [38]:
array_RG = gen(pcg(seed = 365))
array_RG.integers(10, size = (5,5))

array([[0, 7, 6, 7, 8],
       [6, 6, 2, 0, 6],
       [3, 0, 3, 7, 9],
       [1, 1, 8, 7, 4],
       [4, 8, 6, 4, 9]], dtype=int64)

In [None]:
array_RG = gen(pcg(seed = 365))
array_RG.integers(10, size = (5,5))

In [39]:
array_RG = gen(pcg(seed = 365))
array_RG.integers(low =10, high = 100, size = (5,5))

array([[18, 78, 64, 78, 84],
       [66, 67, 28, 10, 69],
       [45, 15, 37, 74, 96],
       [19, 21, 89, 73, 54],
       [53, 84, 66, 51, 92]], dtype=int64)

In [40]:
array_RG = gen(pcg(seed = 365))
array_RG.random(size = (5,5))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],
       [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],
       [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736],
       [0.29787156, 0.4996155 , 0.4865245 , 0.62740703, 0.54952637],
       [0.64894629, 0.04411757, 0.7206516 , 0.84594003, 0.17159792]])

In [41]:
array_RG = gen(pcg(seed = 365))
array_RG.choice([1,2,3,4,5],size = (5,5))

array([[1, 4, 4, 4, 5],
       [4, 4, 2, 1, 4],
       [2, 1, 2, 4, 5],
       [1, 1, 5, 4, 3],
       [3, 5, 4, 3, 5]])

In [42]:
#array_RG.choice(matrix_A[0], size = (5,5))
array_RG = gen(pcg(seed = 365)) 
#array_RG.choice([1,2,3,4,5], size = (5,5))
array_RG.choice((1,2,3,4,5), p = [0.1,0.1,0.1,0.1,0.6],size = (5,5))

# Chooses among a given set (with possible weighted probabilities).

array([[5, 5, 5, 3, 5],
       [1, 5, 2, 5, 5],
       [5, 5, 1, 5, 4],
       [3, 5, 5, 5, 5],
       [5, 1, 5, 5, 2]])

### Generating Arrays From Known Distributions

In [43]:
array_RG = gen(pcg(seed = 365)) 
array_RG.poisson(size = (5,5))

# The default Poisson distribution.

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]], dtype=int64)

In [44]:
array_RG = gen(pcg(seed = 365)) 
array_RG.poisson(lam = 10, size = (5,5))

array([[11, 12, 12, 14, 13],
       [ 9, 10, 11, 11,  8],
       [11,  8, 10,  9, 14],
       [ 7,  8,  9, 15, 15],
       [13,  8,  8,  7,  9]], dtype=int64)

In [46]:
array_RG = gen(pcg(seed = 365)) 
array_RG.binomial(n = 100, p = 0.4, size = (5,5))

array([[42, 44, 30, 36, 45],
       [36, 41, 38, 42, 41],
       [35, 31, 35, 46, 29],
       [41, 41, 46, 34, 48],
       [45, 45, 45, 40, 43]], dtype=int64)

In [47]:
array_RG = gen(pcg(seed = 365)) 
array_RG.logistic(loc = 9, scale = 1.2, size = (5,5))

array([[10.37767822, 10.42451863,  9.63404367,  7.36153427,  9.82286787],
       [ 5.81223125, 10.09354231,  6.46790532, 11.38740256,  8.97147918],
       [10.85844698,  8.79081317,  5.962079  ,  9.99560681,  8.34539118],
       [ 7.97105522,  8.9981544 ,  8.93530194,  9.6253307 ,  9.23850869],
       [ 9.73729284,  5.3090678 , 10.13723528, 11.04372782,  7.11078651]])

https://numpy.org/doc/stable/reference/random/generator

### Applications of Random Generators

In [48]:
array_RG = gen(pcg(seed = 365)) 

array_column_1 = array_RG.normal(loc = 2, scale = 3, size = (1000))
array_column_2 = array_RG.normal(loc = 7, scale = 2, size = (1000))
array_column_3 = array_RG.logistic(loc = 11, scale = 3, size = (1000))
array_column_4  = array_RG.exponential(scale = 4, size = (1000))
array_column_5  = array_RG.geometric(p = 0.7, size = (1000))

# Create the individual columns of the dataset we're creating. 

In [51]:
random_test_data = np.array([array_column_1, array_column_2, array_column_3, array_column_4, array_column_5]).transpose()
random_test_data
#We want 5,1000      NOT 1000,5

# Use np.array to generate a new array with the 5 arrays we created earlier. 
# Use the transpose method to make sure our dataset isn't flipped. 

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [50]:
random_test_data.shape

(1000, 5)

In [53]:
random_test_data = np.array([array_column_1, array_column_2, array_column_3, array_column_4, array_column_5]).transpose()
random_test_data

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [54]:
random_test_data.shape

(1000, 5)

In [55]:
np.savetxt("Random-Test-from-NumPy.csv", random_test_data, fmt = '%s', delimiter = ',')


# Saving the arrays to an extrenal file we're creating. 

# file name -> "Random-Test-from-NumPy.csv"
# random_test_data -> data we're exporting (saving to an external file)
# format -> strings
# delimiter ","

# We'll talk more about these in just a bit. 

In [56]:
np.genfromtxt("Random-Test-from-NumPy.csv", delimiter = ',')

# Importing the data from the file we just created. 

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [57]:
rand_test_data = np.genfromtxt("Random-Test-from-NumPy.csv", delimiter = ',')
print(rand_test_data)

[[ 1.59077303  6.42174295 10.14698427  6.91500737  1.        ]
 [ 2.28243293  8.57902322 15.93309953  6.243605    1.        ]
 [ 1.81098674  5.17270135 -0.46878789  2.44997251  1.        ]
 ...
 [ 0.1973629   4.3465854   2.66485989  0.80935387  1.        ]
 [-2.21015722  8.2176402  12.69328115  0.50644607  2.        ]
 [ 2.91161235  7.90337695 11.79840961  4.86816939  1.        ]]
