In [1]:
import numpy as np # use np as convenient shorthand for the numpy module
import random

In [2]:
x_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
print(x_values)

# Another way to populate a list with integers from 0 thru 9:
# (or in math notation, [0, 10) -- includes 0 but not 10).
# start with an empty list
x_values = [] # yes, we can "reuse" x_values, by assigning an empty list to it
print(x_values)

for i in range(10):
    x_values.append(i)
print(x_values)

# The code above, meaning the creation of the list, and populating it
# in a loop, can be expressed in a much more succint way:
x_values = [i for i in range(10)] # This form is called a Python Comprehension List.
print(x_values)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [3]:
# We now populate our y_values with 10 random integers in range [0, 20)
y_values = [random.randint(0,20) for i in range(10)]

# Now we compute the standard deviation, which is the square root of the average of (y-M)**2
# where M is the average, or mean, of all y values in y_values
M = sum(y_values) / len(y_values)
std_dev = (sum([(y - M)**2 for y in y_values])/len(y_values))**0.5 # raising at power 0.5 is same as square root
print(std_dev)

# The above calculations were done "by hand", for pedagogical value
# Now we'll achieve the same with numpy in one swift line:
print(np.std(y_values))

# Surprisingly, the results should match!

4.935585071701226
4.935585071701226


A closing observation about the code above. You may have noticed that the data type for x_values and y_values are vanilla Python lists. There is nothing wrong with using Python lists, but when manipulating large datasets, numpy arrays are more efficient. To prove this, we use the sys.getsizeof() function to determine the size of an object in bytes.

In [4]:
import sys
print(sys.getsizeof(y_values))

184


In [5]:
y_values_array = np.array(y_values, dtype=np.int32) # convert y_values to numpy array
print(sys.getsizeof(y_values_array))

152


The gain may not look like much, but that's because we only have 10 items in the list / array. With more items, the gains are more significant.

In [6]:
y_values = [random.randint(0,20) for i in range(1000)] # 1000 random ints
y_values_array = np.array(y_values, dtype=np.int32) # convert y_values to numpy array

print(sys.getsizeof(y_values))
print(sys.getsizeof(y_values_array))

8856
4112


In [7]:
# And really, last point for today. You can populate a numpy array of random ints in just one line:
y_values_array = np.random.randint(0, 20, 1000, np.int32)