# 1. The Numpy Package

## 1.1 Numpy Array Basics

In [None]:
import numpy as np #import numpy

In [None]:
l = list(range(1,11)) #create a list
l

In [None]:
my_array = np.array(l) #transform list into a numpy array (ndarray)
print(my_array)

In [None]:
type(my_array)

In [None]:
for i in my_array: # same as lists, ndarrays store multiple elements (is iterable)
    print(i)

In [None]:
l = [1, 2.5, "Dog", True] #lists can store different datatypes

In [None]:
for i in l:
    print(type(i))

In [None]:
a = np.array(l) #in ndarrays, all elements must have same datatype; numpy transforms automatically
a

In [None]:
for i in a:
    print(type(i))

In [None]:
b = np.array([1., "2", 3])
b

In [None]:
type(b)

In [None]:
b.dtype #can check single datatype of all elements with attribute .dtype

## 1.2 Numpy Array (element-wise operations / vectorization)

In [None]:
import numpy as np

In [None]:
np.arange(1,11) #create new ndarray from 1(incl.) to 11(excl.)

In [None]:
np.arange(1,11,2) #only every second number is created

In [None]:
l = [1,2,3,4]
l

In [None]:
l*2 #this is not an element-wise operation

In [None]:
l1 = [] #element-wise operations with lists require a bunch of code
for i in l:
    l1.append(i*2)
l1

In [None]:
l+2 #this is not an element-wise operation and does not work at all

In [None]:
a = np.arange(1,5) #create ndarray from 1 to 4 (both including)
a

In [None]:
a * 2 #element-wise (vectorized) operations are pretty simple with ndarrays

In [None]:
a + 2 #addition works as well

In [None]:
a**2 #all elements squared

In [None]:
2**a #can serve as exponent as well

In [None]:
np.sqrt(a) #square root of all elements

In [None]:
np.exp(a) #exponentiation with e

In [None]:
np.log(a) #natural logarithm

In [None]:
a.sum() #sum of all elements (ndarray method)

In [None]:
np.sum(a) #sum of all elements

In [None]:
sum(a)

In [None]:
a.size #number of elements in ndarray (ndarray attribute)

In [None]:
len(a)

In [None]:
b = np.array([-2, -1, -0.5, 0, 1, 2, 3.5])
b

In [None]:
np.abs(b) #absolute values of all elements

In [None]:
c = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0])
c

In [None]:
np.ceil(c) #element-wise rounding up

In [None]:
np.floor(c) #element-wise rounding down

In [None]:
np.around([-3.23, -0.76, 1.44, 2.65, ], decimals = 0) #evenly round all elements to the given number of decimals.

## 1.3 Numpy Array (Indexing and Slicing)

In [None]:
import numpy as np

In [None]:
a = np.arange(1,11) #array from 1 to 10 (incl.)
a

In [None]:
a[0] #first element at index position 0 (zero-based indexing!)

In [None]:
a[1] #second element (index position 1)

In [None]:
a[-1] #last element

In [None]:
list(enumerate(a)) #list of index,value tuples

In [None]:
a[2:6] #slicing from index position 2 (incl.) till position 6 (excl.)

In [None]:
a[:] #all elements

In [None]:
a[:5] #all elements until index position 5 (excl.)

In [None]:
a[6:] #all elements from index position 6 (incl.) till the last element (incl.)

In [None]:
a[::2] #every second element, starting from first element

In [None]:
a[::3] #every third element, starting from first element

In [None]:
a[2::3] #every third element, starting from third element (index position 2)

In [None]:
a[0] = 100 #ndarrays are mutable, changing first element to 100
a

In [None]:
a[-1] = 101 #changing last element to 101
a

In [None]:
a[2:5] = 50 #in contrast to lists, ndarrays allow braodcasting, assigning one new value to multiple elements
a

In [None]:
a[2:5] = [50, 51, 52] #assigning multiple new values to multiple elements
a

In [None]:
a = np.arange(1,11) #creating new ndarray a
a

In [None]:
b = a[2:8] # making a slice of ndarray a and assign new variable b
b

In [None]:
b[0] = 100 #changing first element of ndarray b
b

In [None]:
a #respective element of ndarray a has changed as well!!!

In [None]:
l = list(range(1,11)) #lists behave differently
l

In [None]:
m = l[2:8] #here a copy of the slice of l is created
m

In [None]:
m[0] = 100 #changing first element of slice m
m

In [None]:
l #no effect on l !!!

## 1.4 Numpy Array (Shape and multiple Dimensions)

In [None]:
import numpy as np

In [None]:
a = np.arange(1,13) #creating array from 1 to 12
a

In [None]:
type(a)

In [None]:
a.shape #one-dimensional array, 12 elements in one dimension (vector)

In [None]:
a = a.reshape(2,6) #reshaping a: 2 rows / 6 columns

In [None]:
a

In [None]:
a.shape # two-dimensional array: 2 rows / 6 columns (matrix)

In [None]:
a = a.reshape(6,2) # two-dimensional array: 6 rows / 2 columns
a

In [None]:
a.shape

In [None]:
a + 100 #element-wise operations still work

In [None]:
a.reshape(3,5) #not possible with 12 elements

In [None]:
a = a.reshape(2,2,3) #creating a three-dimensional array
a

In [None]:
a.shape

In [None]:
b = np.arange(1,101).reshape(25,4) #creating 2-dim ndarray with one line of code
b

## 1.5 Numpy Array (Indexing and Slicing multi-dimensional arrays)

In [None]:
import numpy as np

In [None]:
a = np.arange(1,13)
a

In [None]:
a = a.reshape(3,4, order = "C") #creating matrix with 3 rows and 4 columns
a

In [None]:
a[0] #first row (index position 0)

In [None]:
a[1] #second row (index position 1)

In [None]:
a[2] #third row (index position 2)

In [None]:
a[-1] #last row (index position -1)

In [None]:
a[1][1] #second row, second column

In [None]:
a[1,1] #more convenient in one square bracket

In [None]:
a[2,-1] #third row, last column

In [None]:
a[:,0] #all rows, first column

In [None]:
a[:,1] #all rows, second column

In [None]:
a[:,-1] #all rows, last column

In [None]:
a[:2,1:3] #first two rows, column two and three

In [None]:
a

In [None]:
a.T #Transpose: switching axes (attribute)

In [None]:
a.transpose() #same (method)

In [None]:
a

In [None]:
a[:,-1] = a[:,-1] /4 #changing slice inplace

In [None]:
a

In [None]:
a = np.arange(1,13).reshape(3,4) #creating a 3x4 matrix
a

In [None]:
a.sum() #sum over all elements in matrix

In [None]:
a.sum(axis = 0) #sum of each column

In [None]:
a.sum(axis = 1) #sum of each row

In [None]:
a.cumsum() #cumulative sum of all elements

In [None]:
a.cumsum(axis = 0) #cumulative sum for each column

In [None]:
a.cumsum(axis = 1) #cumulative sum for each row

In [None]:
a

In [None]:
a.prod() #product over all elements

In [None]:
a.prod(axis = 0) #product over all elements in each column

In [None]:
a.prod(axis = 1) #product over all elements in each row

## 1.6 Boolean Indexing

In [None]:
import numpy as np

In [None]:
a = np.arange(1,11) #array from 1 to 10
a

In [None]:
mask1 = a > 5 #element-wise check if greater than 5
mask1

In [None]:
mask2 = a < 8 #element-wise check if smaller than 8
mask2

In [None]:
mask3 = (a > 5) & (a < 8) #element-wise check if greater 5 and smaller 8 (logical and)
mask3

In [None]:
mask4 = (a > 5) | (a < 8) #element-wise check if greater 5 or smaller 8 (logical or)
mask4

In [None]:
mask5 = ~((a > 5) & (a < 8)) #the opposite of mask3
mask5

In [None]:
a[a>5] #slicing all elements that are greater 5 (fulfill condition of mask1)

In [None]:
a[mask1] #slicing all elements that fulfill condition of mask1

In [None]:
a[mask2] #slicing all elements that fulfill condition of mask2

In [None]:
a[mask3] #slicing all elements that fulfill condition of mask3

In [None]:
a[mask4] #slicing all elements that fulfill condition of mask4

In [None]:
a[mask5] #slicing all elements that fulfill condition of mask5

## 1.7 Random Numbers

In [None]:
import numpy as np

In [None]:
a = np.random.randint(1,101,10) #creating 10 random integers between 1 (incl.) and 101 (excl.)
a

In [None]:
np.random.seed(123) #setting a seed enables reproducibility
a = np.random.randint(1,101,10)
a

In [None]:
np.random.normal(5, 2,10) #creating 10 normal disctributed numbers with mean 5 and std 2

In [None]:
b = np.arange(1,101) #creating array b from 1 to 100
b

In [None]:
np.random.shuffle(b) #randomly shuffle ndarray b

In [None]:
b

In [None]:
b.sort() #sorting ndarray b again

In [None]:
b[::-1] #sorting in reverse order

In [None]:
b

In [None]:
np.random.seed(123)
b1 = np.random.choice(b, 100, replace = True) #randomly creating a 100 elements sample of ndarray b with/without replacement
b1

In [None]:
b1.sort() #sorting b1
b1

In [None]:
np.unique(b1) #unique elements of b1

In [None]:
np.array(list(set(b1))) #same

In [None]:
np.unique(b1).size #how many unique elements?

In [None]:
np.unique(b1, return_index= True, return_counts=True) #.unique()-method is quite informative

## 1.8 Performance

In [None]:
import numpy as np

In [None]:
size = 1000000 #number of elements

In [None]:
a = np.arange(size) #ndarray
len(a)

In [None]:
l = list(range(size)) #list
len(l)

In [None]:
%timeit a+2 #ndarray: measuring time for element-wise addition

In [None]:
%timeit [i+2 for i in l] #list: measuring time for element-wise addition

In [None]:
%timeit a*2 #multiplication

In [None]:
%timeit [i*2 for i in l] #multiplication

In [None]:
%timeit a**2 #square

In [None]:
%timeit [i**2 for i in l] #square

In [None]:
%timeit np.sqrt(a) #square root

In [None]:
%timeit [i**0.5 for i in l] #square root

## 1.9 Case Study Numpy vs. Python Standard Library

In [None]:
import numpy as np

In [None]:
np.random.seed(122)
(np.random.randint(1,11,1000000).reshape(10000,100) == 1).sum(axis = 1).mean() #using vectorization and method-chaining

In [None]:
np.random.seed(122)
(np.random.randint(1,11,10000*100).reshape(10000,100) == 1).sum(axis = 1).mean()

In [None]:
%timeit (np.random.randint(1,11,100*10000).reshape(10000,100) == 1).sum(axis = 1).mean()

In [None]:
import random

In [None]:
def simulation(): # using nested loops, if statements and lists
    results = []
    for _ in range(10000):
        l = []
        for _ in range(100):
            if random.randint(1,10) == 1:
                l.append(True)
            else:
                l.append(False)
        results.append(sum(l))
    return (sum(results) / len(results))

In [None]:
simulation()

In [None]:
%timeit simulation()

## 1.10 Summary Statistics

In [None]:
import numpy as np

In [None]:
np.random.seed(123)
a = np.random.randint(1,101, 11) #creating an array with 11 random integers between 1 and 100
a

In [None]:
a.sort()
a

In [None]:
a.max() #maximum

In [None]:
np.max(a) #maximum

In [None]:
max(a) #maximum

In [None]:
a.min() #minimum

In [None]:
np.mean(a) #mean

In [None]:
a.mean() #mean

In [None]:
np.median(a) #median

In [None]:
np.std(a) #standard deviation

In [None]:
np.var(a) #variance

In [None]:
np.percentile(a, 10) #10th percentile

In [None]:
np.percentile(a, 90) #90th percentile

In [None]:
np.random.seed(123)
a = np.random.randint(1,101, 11) #creating an array a with 11 random integers between 1 and 100
a

In [None]:
np.random.seed(111) #different seed!!!
b = np.random.randint(1,101, 11) #creating an array b with 11 random integers between 1 and 100
b

In [None]:
np.cov(a,b) #covariance matrix

In [None]:
np.corrcoef(a,b) #correlation matrix

## 1.11 Visualization and (Linear) Regresion

In [None]:
import numpy as np

In [None]:
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8")

In [None]:
y = np.random.normal(5,2,10000) #creating 10,000 normally distributed numbers with mean 5 and std 2
y

In [None]:
plt.figure(figsize = (10,6))
plt.hist(y, bins = 100, label = "Data") #Histogramm
plt.title("Frequency Distribution of y")
plt.vlines(np.mean(y), 0, 350, label = "Mean")
plt.xlabel("y")
plt.ylabel("frequency")
plt.legend()
plt.show()

In [None]:
np.linspace(1,10,10) #creating evenly spaced numbers over a specified interval.



In [None]:
x = np.linspace(-10, 10, 1000) #creating 1,000 evenly spaced numbers over the interval -10 to 10
x

In [None]:
y = 3 * x**3 - 2* x**2 +5*x -5 #function over x
y

In [None]:
y = np.sin(x) #function over x

In [None]:
plt.figure(figsize = (10,6))
plt.plot(x, y)
plt.xlabel("x")
plt.ylabel("y")
plt.show()

In [None]:
np.random.seed(123)
m = np.random.normal(10,2,20).reshape(2,10) #creating 20 normally distributed numbers in (2,10) shape with mean 10 and std 2
m

In [None]:
a = m[0] #array a is represents first row of m and contains 10 elements
a

In [None]:
b = m[1] #array b is represents second row of m and contains 10 elements
b

In [None]:
plt.figure(figsize = (10,6)) # scatter plot of a and b
plt.scatter(a, b)
plt.show()

In [None]:
reg1 = np.polyfit(a, b, 1) #linear regression
reg1    #function: b = 12.51 - 0.1599 * a

In [None]:
x = np.linspace(min(a),max(a), num = 100) #creating x and y values for regression line
y = np.polyval(reg1, x)

In [None]:
plt.figure(figsize = (10,6))
plt.scatter(a, b, label = "Data") #Data points
plt.plot(x, y, 'b--', label='linear') #linear regression
plt.plot(x, np.polyval(reg2, x),'m-.', label='quadratic') #quadratic regression
plt.plot(x, np.polyval(reg3, x),'g.', label='cubic') #cubic regression
plt.legend()
plt.show()

In [None]:
reg2 = np.polyfit(a, b, 2) #quadratic regression
reg2 #function: b = 16.986 + 0.0535 * a**2 - 1.1738 * a

In [None]:
reg3 = np.polyfit(a, b, 3) #cubic regression
reg3 #function: b = 92.25 - 0.1178 * a**3 + 3.267 * a**2 - 28.899 * a

In [None]:
reg_perfect = np.polyfit(a,b, len(a)-1) #perfect regression, polynomial of degree 9 perfectly regresses 10 data points
reg_perfect

In [None]:
plt.figure(figsize = (10,6))
plt.scatter(a, b, label = "Data")
plt.plot(x,np.polyval(reg_perfect, x) , 'b--', label='perfect')
plt.legend()
plt.ylim(min(b)-0.5, max(b)+0.5)
plt.show()