In [None]:
import numpy as np

# performance comparison
my_list = list(range(1000000))
my_arr = np.arange(1000000)
my_arr

In [None]:
%time for _ in range(10): [x*2 for x in my_list]

In [None]:
%time for _ in range(10): my_arr2 = my_arr * 2  # element-wise operation
# python -> numpy ::: 10-100x performance

In [None]:
data = np.random.randn(2, 3)   # shape
data

In [None]:
data * 10  # mul element-wise

In [None]:
data + 1000

In [None]:
1 / data

In [None]:
data  

In [None]:
data * 2

In [None]:
data + data  # matrix sum

In [None]:
print(data.dtype, type(data.dtype))
print(data.shape, type(data.shape))

In [None]:
# various creating ways
data1 = [1, 2, 3, 4, -1.2]
arr1 = np.array(data1)
arr1.dtype

In [None]:
data2 = [[1, 2, 3], [-1, -2, -3]]
arr2 = np.array(data2)
arr2

In [None]:
arr2.dtype

In [None]:
np.zeros(10)

In [None]:
np.zeros((5, 5))

In [None]:
np.zeros((2, 3, 4))

In [None]:
np.empty((5, 5))

In [None]:
np.empty(5)  # no guarantee that it is 0

In [None]:
np.arange(30)

In [None]:
np.arange(5, 39, 3)

In [None]:
arr = np.array([1, 2, 3], dtype=np.float64)
arr.dtype

In [None]:
np.string_

In [None]:
np.array(['hello', 'world'])

In [None]:
# Casting
arr = np.array([1, 2, 3, 4])
print(arr.dtype)
float_arr = arr.astype(np.float64)
print(float_arr)
string_arr = arr.astype(np.unicode_)
print(string_arr)

In [None]:
arr = np.array([1.4, 3.9, -1.2, 5])
print(arr)
print(arr.astype(np.int32))  # truncated!

In [None]:
arr = np.arange(10)
other_arr = np.array([1.2, -3.2, 0])
arr.astype(other_arr.dtype)

In [None]:
#
# Basic arithmetic
#
arr = np.array([[1, 2, 3], [4, 5, 6]], dtype='f8')  # 8bytes -> 64bits -> np.float64
arr

In [None]:
arr + arr  # matrix sum (same as element-wise)

In [None]:
arr * arr  # vectorization --> operations without the loop  --> element-wise (same shape)

In [None]:
arr - arr

In [None]:
2**5

In [None]:
arr**2.3

In [None]:
other_arr = np.array([[0, -2, 3], [0, 19, -123]])
print(arr)
print(other_arr)

In [None]:
arr > other_arr

In [None]:
#
# indexing
#
arr = np.arange(10)
print(arr)
arr[5]

In [None]:
arr[2:5]

In [None]:
arr[1] = 212121231

In [None]:
arr

In [None]:
arr[:4] = -1
arr

In [None]:
slice = arr[5:]
slice

In [None]:
slice[1] = 1000
print(slice)
print(arr)

In [None]:
slice[:] = 0
arr

In [None]:
new_arr = arr[:3].copy()
new_arr

In [None]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr

In [None]:
arr[0]

In [None]:
arr[0][1]

In [None]:
arr[0, 1]

In [None]:
arr

In [None]:
arr[1] = np.arange(3)
arr

In [None]:
arr[:2]

In [None]:
arr = np.array([list(range(i, 5+i)) for i in range(5)])
arr

In [None]:
arr[:2, 3:] = 10
arr

In [None]:
names = np.array(['Pedro', 'Goncalo', 'Maria', 'Maria', 'Pedro', 'Joao', 'Pedro'])
names.shape

In [None]:
data = np.random.randn(7, 4)
data

In [None]:
print(names)
names == 'Pedro'

In [None]:
data[names == 'Pedro']

In [None]:
data[names == 'Goncalo']

In [None]:
data[names == 'Pedro', 2:]

In [None]:
data[names == 'Pedro', 3]

In [None]:
data[names != 'Pedro']

In [None]:
data[~(names == 'Pedro')]  # boolean/binary negation

In [None]:
cond = names == 'Pedro'
data[~cond]

In [None]:
cond = (names == 'Pedro') | (names == 'Goncalo')  # '|' --> binary operation, not 'or'
data[cond]

In [None]:
data

In [None]:
data[data > 0]

In [None]:
data[data < 0] = 0
data

In [None]:
#
# Arrays & Algebra
#

In [None]:
arr = np.arange(64).reshape((8, 8))
arr

In [None]:
arr.reshape((4, 4, 4))

In [None]:
arr

In [None]:
arr[[1, 2, 4, -1]]

In [None]:
arr

In [None]:
arr[[3, 0, -1]][:, [1, -2]]

In [None]:
arr = np.arange(15).reshape((3, 5))
arr

In [None]:
arr.T  # transpose

In [None]:
# matrix product
np.dot(arr, arr.T)  # 3,5 * 5,3  -->  3,3

In [None]:
np.dot(arr.T, arr)  # 5,3 * 3,5 --> 5,5


In [None]:
arr = np.arange(16).reshape((2, 2, 4))
arr

In [None]:
arr.transpose((1, 0, 2))  # change axis --> extra mind bending --> ponto (0, 1, 0) ==> (1, 0, 0)

In [None]:
arr.swapaxes(1, 2)  # shape 2, 4, 2

In [None]:
arr.swapaxes(1, 2)[0, 0, 1] = -5
arr

In [None]:
# element-wise array functions

arr = np.arange(10)
arr

In [None]:
np.sqrt(arr)

In [None]:
np.exp(arr)  # e^x

In [None]:
x = np.random.randn(5)
y = np.random.randn(5)
print(x)
print(y)
print(np.maximum(x, y))

In [None]:
np.minimum(x, y)

In [None]:
np.absolute(x)

In [None]:
decimal_part, int_part = np.modf(x)
print(decimal_part)
print(int_part)

In [None]:
np.sqrt(x, x)  # inplace operation

In [None]:
x

In [None]:
np.isnan(x)

In [None]:
x = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
y = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

result = [elem_x if elem_cond else elem_y for elem_x, elem_y, elem_cond in zip(x, y, cond)]
result

In [None]:
np.where(cond, x, y)

In [None]:
arr = np.random.randn(4, 4)
arr

In [None]:
arr > 0

In [None]:
np.where(arr > 0, 1, 0)

In [None]:
np.where(arr > 0, arr, 0)

In [None]:
arr

In [None]:
arr.mean()

In [None]:
arr.sum()

In [None]:
arr.mean(axis=0)  # mean over the axis, so axis=0 => across the rows => mean of a column

In [None]:
arr.sum(axis=1)

In [None]:
np.arange(10).cumsum()

In [None]:
np.arange(1, 11).cumprod()

In [None]:
arr

In [None]:
bools = arr > 0
bools

In [None]:
bools.any()

In [None]:
bools.all()

In [None]:
(np.where(arr > 0, arr, 0) < 0).any()

In [None]:
arr

In [None]:
arr.sort()

In [None]:
arr

In [None]:
arr.sort(0)
arr

In [None]:
#
# More linear algebra
#


In [None]:
arr = np.arange(6).reshape(2, 3)
print(np.dot(arr, np.ones(3)))  # 2,3 * 3,1  => 2,1
print(arr.dot(np.ones(3)))
print(arr @ np.ones(3))  

In [None]:
arr

In [None]:
my_arr = arr @ arr.T  # 2,3 * 3,2  => 2,2
my_arr

In [None]:
inverse_m = np.linalg.inv(my_arr)

In [None]:
my_arr @ inverse_m

In [None]:
np.linalg.qr(my_arr)

In [None]:
#
#
#  PANDAS basics
#
#

In [None]:
import pandas as pd

In [None]:
obj = pd.Series([2, -3, 5, 100])
obj

In [None]:
print(obj.values, type(obj.values))

In [None]:
obj.index

In [None]:
obj = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
obj

In [None]:
obj.index

In [None]:
obj['a']

In [None]:
obj.a

In [None]:
obj['c'] = 100
obj

In [None]:
obj > 3

In [None]:
obj[obj > 3]

In [None]:
obj*2

In [None]:
obj / 2

In [None]:
np.exp(obj)

In [None]:
'a' in obj

In [None]:
'e' in obj

In [None]:
obj['e'] = 10

In [None]:
obj

In [None]:
some_data = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj1 = pd.Series(some_data)
obj1

In [None]:
states = ['California', 'Ohio', 'New York', 'Texas']
obj2 = pd.Series(some_data, index=states)
obj2

In [None]:
obj2[obj2.notnull()]