In [1]:
import numpy as np  # yup thats it

### Random Sampling
NumPy has some tools for generating random samples. Each has its specific usages and advantages. We will investigate a few of these briefly but again visit the NumPy documentation for a complete referrence. 

In [6]:
# you can create arrays with random values inside
a = np.random.rand(25)  # random number from 0-1 
a         # the 1 in the paranthesis is the dimension of array

array([ 0.72887397,  0.11010912,  0.10561209,  0.80414468,  0.64235741,
        0.33867329,  0.38464653,  0.91567963,  0.80467241,  0.81171195,
        0.16406302,  0.91776607,  0.61774358,  0.69428262,  0.71057708,
        0.71974008,  0.14178484,  0.5754328 ,  0.13908481,  0.15692219,
        0.28228817,  0.81787331,  0.97729425,  0.59645642,  0.30045334])

In [4]:
b = np.random.rand(5,5)
b

array([[ 0.3340914 ,  0.31203252,  0.47998479,  0.40733019,  0.34296724],
       [ 0.6889197 ,  0.754771  ,  0.50681148,  0.21336499,  0.80153957],
       [ 0.90361957,  0.1104925 ,  0.69393226,  0.22096957,  0.41783638],
       [ 0.11390563,  0.40156506,  0.76893991,  0.02137118,  0.56850509],
       [ 0.89166505,  0.90246817,  0.44323996,  0.99563277,  0.27215129]])

In [9]:
# very similar but this samples from normal distribution
c = np.random.randn(3) #chooses between -1 and 1
c

array([-0.28321369, -1.47304691, -0.48496634])

In [10]:
# random integers 
d = np.random.randint(3)  # 3 is upper bound
d

2

In [11]:
rand = np.random.randint(1,10, size = 5)  # (low, high, size)
rand

array([6, 5, 9, 3, 1])

In [13]:
# using a seed for repeatable results
np.random.seed(0) 
rand_arr = np.random.randn(4,3)
rand_arr #running this in the same cell will always print the same results

array([[ 1.76405235,  0.40015721,  0.97873798],
       [ 2.2408932 ,  1.86755799, -0.97727788],
       [ 0.95008842, -0.15135721, -0.10321885],
       [ 0.4105985 ,  0.14404357,  1.45427351]])

### Common functions for NumPy arrays

In [16]:
# mean of values in an array
print('avg',np.mean(rand))
calc_avg = np.sum(rand)/25
calc_avg

avg 4.8


0.95999999999999996

In [17]:
# variance 
np.var(rand)

7.3599999999999994

In [18]:
# standard deviation
np.std(rand)

2.7129319932501073

In [19]:
# sum of the values
np.sum(rand) #

24

In [25]:
# for matrices that are not 1d, specify axis
v = np.array([[1,2],[3,4]])
print(v)
row_sum = np.sum(v, axis=1) #axis=1 adds the rows(left to right) and axis = 0 adds the columns(up and down)
row_sum

[[1 2]
 [3 4]]


array([3, 7])

In [24]:
col_sum = np.sum(v, axis=0) #axis = 0 adds the columns
print(v)
col_sum

[[1 2]
 [3 4]]


array([4, 6])

In [27]:
# you can find the min and max 
sample = np.random.randn(10) #creates an array of 10 values
sample_max = np.max(sample) #takes the highest value in generated array
sample_min = np.min(sample) #takes the lowest value in generated array
print(sample) #prints the requested elements
print(sample_max)
print(sample_min)

[ 0.8644362  -0.74216502  2.26975462 -1.45436567  0.04575852 -0.18718385
  1.53277921  1.46935877  0.15494743  0.37816252]
2.26975462399
-1.4543656746


In [29]:
# use argmax and argmin to find the index for min and mx values
print(np.argmax(sample))
np.argmin(sample)  # python indexing starts from 0

2


3

In [31]:
# you can combine commands together
data = np.random.randn(4,4) # create random data
print('data',data)
col_totals = np.sum(data, axis=0)  # total the columns (adds up and down)
print('totals',col_totals)
max_col_total = (np.argmax(col_totals),np.max(col_totals))
max_col_total  # 1st output is the index, 2nd is the value for max

data [[ 0.17742614 -0.40178094 -1.63019835  0.46278226]
 [-0.90729836  0.0519454   0.72909056  0.12898291]
 [ 1.13940068 -1.23482582  0.40234164 -0.68481009]
 [-0.87079715 -0.57884966 -0.31155253  0.05616534]]
totals [-0.46126869 -2.16351103 -0.81031868 -0.03687958]


(3, -0.036879582427382887)

In [None]:
# heres how to do in 1 line
max_col_tot = (np.argmax(np.sum(data,axis=0)),
               np.max(np.sum(data,axis=0)))
max_col_tot

### PHYSICS EXAMPLE
Lets say everyday we record the average temperature for an object. We are interested in finding the day where the object had its maximum temperature. We will first generate random data and this use numpy functions to find the answer. 

In [32]:
# generate random velocities (must be positive)
temp_data = 50*np.random.rand(100)
print('day of max temp - ',np.argmax(temp_data)+1, 'days')
print('max temp - ',np.max(temp_data), 'degrees Celsius')

time of max temp -  52 days
max temp -  49.9423503284 degrees Celsius


In [35]:
# shape gets the dimensions
a = np.array([[1,2,3],[4,5,6],[7,8,9]])  # syntax for 3x3
print(a)
a.shape #displays the dimensions, only works when both row/column length is the same

[[1 2 3]
 [4 5 6]
 [7 8 9]]


(3, 3)

In [36]:
# len still works
len(a)

3

In [37]:
# you can index through the elements of the array
print(a[0][0])  # first element of first row
print(a[1][1]) # second element of second row
print(a[-1][-1])  # last element second row
print(a[0])  # first row
print(a[0:2])  # first 2 rows
a[:,0:2]  # first 2 columns

1
5
9
[1 2 3]
[[1 2 3]
 [4 5 6]]


array([[1, 2],
       [4, 5],
       [7, 8]])

In [38]:
# merging numpy arrays horizontally
first = np.array([1,2,3])
second = np.array([4,5,6])
frst_scnd = np.hstack((first,second))
frst_scnd

array([1, 2, 3, 4, 5, 6])

In [39]:
# merge vertically
new_arr = np.vstack((first,second))
new_arr

array([[1, 2, 3],
       [4, 5, 6]])