# Generating Data w/ Numpy

In [1]:
import numpy as np

### np.empty(), np.zeros(), np.ones(), np.full()
- np.empty() creates an array with no values in it, its just empty, generally it contains 0 values, but sometimes it may have garbage values too.
- np.zeroes() creates an array with 0 values in it, its consists of null values.
- np.ones() will create an array with full of ones and of the given shape.
- np.full() will create an array with some scalar value filled in the shape provided by us.

In [3]:
arr_empty = np.empty(shape = (2,3),dtype = np.float64)
arr_empty

array([[0., 0., 0.],
       [0., 0., 0.]])

In [5]:
arr_zeroes = np.zeros(shape = (2,3),dtype = int)
arr_zeroes

array([[0, 0, 0],
       [0, 0, 0]])

In [7]:
arr_ones = np.ones(shape = (2,3),dtype = int)
arr_ones

array([[1, 1, 1],
       [1, 1, 1]])

In [14]:
arr_full = np.full(shape = (2,3),fill_value = "Hello World!")
arr_full

array([['Hello World!', 'Hello World!', 'Hello World!'],
       ['Hello World!', 'Hello World!', 'Hello World!']], dtype='<U12')

### "_like" functions
- Like functions do the same thing as the above sequence non-random generators.
- The only difference being that these functions will take an array as an input and then create a new array of the same shape and the outputs as per the function.
- Like function copies the dtype of the array whose shape is being used, and hence by default new array will have same type, unless specified otherwise

In [34]:
# A random array of a particular shape which will give us the structure for creating the other types of arrays
array = np.array([["heloooooooo","heloooooooo","heloooooooo","heloooooooo"],\
                  ["heloooooooo","heloooooooo","heloooooooo","heloooooooo"]],dtype= str)
array

array([['heloooooooo', 'heloooooooo', 'heloooooooo', 'heloooooooo'],
       ['heloooooooo', 'heloooooooo', 'heloooooooo', 'heloooooooo']],
      dtype='<U11')

In [40]:
arr_empty = np.empty_like(array)
arr_empty

array([['', '', '', ''],
       ['', '', '', '']], dtype='<U11')

In [41]:
arr_zeroes = np.zeros_like(array)
arr_zeroes

array([['', '', '', ''],
       ['', '', '', '']], dtype='<U11')

In [42]:
arr_ones = np.ones_like(array)
arr_ones

array([['1', '1', '1', '1'],
       ['1', '1', '1', '1']], dtype='<U11')

In [39]:
arr_full = np.full_like(array,fill_value = 'Hello')
arr_full

array([['Hello ', 'Hello ', 'Hello ', 'Hello '],
       ['Hello ', 'Hello ', 'Hello ', 'Hello ']], dtype='<U11')

### np.arange()
- Here the arange stands simply for a range and stands for arrays Range.
- This function actually generates a series of numbers as the Range function in python.
- However the main difference being there the range function basically generates tuples, and they are converted to lists. 
- But here the np.arange function will generate an array series based on the arguments that go inside the function.
- Here there is no by default start = 0 as indexing had.
- But if we only give start, the arange will create an array with start as 0 and the stop as the given start value. i.e. If we give the start as 30, then the range will take the 30 as the stop value and the start value to be 0.

In [43]:
range_array = np.arange(start = 30)
range_array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [47]:
range_array_2 = np.arange(start = 0, stop = 30,step = 2)
range_array_2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [48]:
range_array_3 = np.arange(start = 0, stop = 30,step = 2.5)
range_array_3

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5])

In [50]:
# Here we don't get the multiples of 2, but we get the integers with step of 2, but we get only 12 elements, 
# cause with the step of 2.5, only 12 elements can be picked up.
# So the point is don't consider the output as multiples of 2 as they are not.
range_array_3 = np.arange(start = 0, stop = 30,step = 2.5,dtype = int)
range_array_3

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

## Random Generators

### Defining Random Generators

In [53]:
from numpy.random import Generator as gen # Generator function.
from numpy.random import PCG64 as pcg # This is a bit generator.
# The generator function takes a bit generator as an input and creates generator objects.

#PCG is Permutation Congruential Generator which are function pointers that 
# can produce values up to 64 bits in size.

In [54]:
 # This method pulls out a single random value from a standard normal distribution.

0.8330189237307865

In [58]:
array_RG.normal(size = 5) # This will bring 5 random values array every time and the value changes every time.

array([-0.92699451, -1.8289533 ,  0.28369994, -0.11476081,  0.1916118 ])

In [60]:
array_RG.normal(size = (5,5)) # We can also create an 2D array of the random values.

array([[ 0.49502473, -0.39520211, -0.19603236, -0.15032579,  0.98584493],
       [ 1.35998942,  0.45217894, -0.29888139,  0.65475579,  2.44050888],
       [-1.23135067, -2.19229167,  2.04862471,  0.3664447 , -1.12300141],
       [ 0.47287314, -1.78188623, -0.13738788,  2.56328242, -0.14175104],
       [-0.6187722 , -0.25152134, -1.68570909, -1.85358494, -0.06528211]])

In [66]:
# However sometimes we might need to generate values for once randomly and then use it multiple times consistently.
# For that we use a seed as a parameter to the bit generator. The seed ensures the same no. being generated when 
# we specify the same seed. Usecase = Give same input to model everytime.
array_RG = gen(pcg(seed = 365))
array_RG.normal(size = (5,5))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

In [73]:
# Now the point to remember is that the seed remains active for one execution only  
# so every time get the same random values we need to specify the seed for the RG- random generator function.
# This is done so that people may forget once a seed is set and will always get the same value.

# array_RG = gen(pcg(seed = 365)) Without this random values different than above are generated.
array_RG.normal(size = (5,5))

array([[ 0.58830139,  0.43063835,  0.42871861, -0.49041682,  0.58326844],
       [-0.10015833, -1.27535497,  0.94226986,  0.01434193, -1.20783944],
       [ 0.16767707, -1.35242456,  0.51855231,  0.66726422,  0.5453976 ],
       [-0.46037542,  2.10611129,  0.79579772, -1.44055324, -0.62070809],
       [ 2.16041849, -0.50664074,  0.08321514,  0.87416817,  1.98530199]])

### Generating Integers, Probabilities and Random Choices
- We saw how random generation works, and getting basic terminology of seeds, generator functions.
- After this we will use specific functions to generate integers, probabilities and random choices.

In [74]:
# First define a RG function.
array_RG = gen(pcg(seed = 365))
array_RG.normal(size = (5,5))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

In [84]:
# Creating an array with random integers
# Here we generate value from low to high-1 and do it randomly.
# If we only specify low, then low is set to 0 and high to the value we specified in low.
array_RG.integers(low = 12, size = (5,5))

array([[ 6, 10,  4,  5,  6],
       [ 1,  9,  7,  0,  3],
       [11,  0,  5,  6, 11],
       [ 0,  2,  3,  7,  8],
       [ 0,  5, 11,  9,  4]], dtype=int64)

In [86]:
# Sometimes we will require values between 0 and 1 and that's for probabilities, so we use the 
# probability function which picks values from a continuous uniform distribution.
array_RG.random(size = (5,5))# No need of high and low as all values are in 0 and 1

array([[0.07297074, 0.52586653, 0.01239245, 0.22309532, 0.89845526],
       [0.89214756, 0.54528016, 0.9196353 , 0.2630934 , 0.94166123],
       [0.28297619, 0.7369772 , 0.70370802, 0.29407794, 0.70988119],
       [0.75161463, 0.09717125, 0.8397204 , 0.66449766, 0.05516039],
       [0.33356683, 0.0034234 , 0.81017284, 0.3652647 , 0.01801856]])

In [104]:
# Sometimes we need specific values from the given values only, not a range but specific values.
# There we use choice function. By default every outocome will be equally likely, unless we specify probabilities 
# of our own to each element.

# If p is specified, every element will have probability that is almost equal to given p value.

# While specifying probabilities, remember that probability arrays length has to match the length of choices array.
array_RG.choice(a = [1,2,3,4,5],p = [.50,.10,.10,.10,.20],size = (5,5))

array([[1, 2, 2, 3, 1],
       [5, 1, 1, 3, 5],
       [1, 4, 1, 1, 3],
       [1, 1, 1, 1, 2],
       [1, 5, 1, 4, 1]])

### Generating Arrays From Known Distributions
- Here we will be generating the random values from specific distributions such as poisson, binomial and logistic distribution.

In [108]:
array_RG = gen(pcg(seed = 365))
array_RG.poisson(lam = .9, size = (5,5))

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]], dtype=int64)

In [111]:
# Here we will get the collection of probabilities i.e. out of 100 outcomes the success has a p of near to 40%
array_RG = gen(pcg(seed = 365))
array_RG.binomial(n = 100, p = .8, size = (5,5))

array([[77, 77, 79, 83, 78],
       [86, 78, 85, 75, 80],
       [76, 81, 86, 78, 81],
       [82, 80, 80, 79, 80],
       [79, 87, 78, 76, 84]], dtype=int64)

https://numpy.org/doc/stable/reference/random/generator

### Applications of Random Generators
- We basically use random generators to create tests for the model.
- In tests we use random values to test the models versatile nature.

In [125]:
# Generating the 5 different arrays of the same size to keep the columns of the same size.
array_RG = gen(pcg(seed = 364))
array_1 = array_RG.poisson(lam = .9,size=(1000))
array_2 = array_RG.binomial(n = 100, p = .6,size=(1000))
array_3 = array_RG.normal(loc = 7, scale = 2,size=(1000))
array_4 = array_RG.exponential(scale = 4,size=(1000))
array_5 = array_RG.geometric(p = 0.7,size=(1000))

In [126]:
# Converging the numbers in a column array.
array = np.array([array_1,array_2,array_3,array_4,array_5]).transpose()
print(array.shape)

(1000, 5)


In [132]:
# Saving the random generated numbers in a file in the string format.
print(array)
np.savetxt("Random_Test_From_Numpy.csv",array,fmt = '%s',delimiter = ',')

[[ 0.         64.          7.72970997  5.6021137   1.        ]
 [ 1.         53.          6.02103397  3.82156739  1.        ]
 [ 0.         67.          4.54775707  4.86846517  1.        ]
 ...
 [ 3.         59.          6.62905739  1.25272944  1.        ]
 [ 0.         59.          8.95309233  0.48415405  1.        ]
 [ 2.         58.          4.89175141  1.08191056  1.        ]]


In [130]:
# Retrieving the same file.
np.genfromtxt("Random_Test_From_Numpy.csv",delimiter= ',')

array([[ 0.        , 64.        ,  7.72970997,  5.6021137 ,  1.        ],
       [ 1.        , 53.        ,  6.02103397,  3.82156739,  1.        ],
       [ 0.        , 67.        ,  4.54775707,  4.86846517,  1.        ],
       ...,
       [ 3.        , 59.        ,  6.62905739,  1.25272944,  1.        ],
       [ 0.        , 59.        ,  8.95309233,  0.48415405,  1.        ],
       [ 2.        , 58.        ,  4.89175141,  1.08191056,  1.        ]])