In [1]:
import pandas as pd
import numpy as np

### 1. Vector Operations > For Loops

In [2]:
def matrix_mul(m1, m2):
    result = []
    for i in range(len(m1)):
        row = []
        for j in range(len(m2[0])):
            product = 0
            for v in range(len(m1[i])):
                product += m1[i][v] * m2[v][j]
            row.append(product)
        result.append(row)
    return np.array(result)

In [3]:
m1 = np.random.randn(100, 200)
m2 = np.random.randn(200, 300)

In [4]:
import time
start = time.time()
m3 = matrix_mul(m1, m2)
end = time.time()
print(f'Duration: {end - start} seconds')

Duration: 2.9443230628967285 seconds


In [5]:
code = '''
def matrix_mul(m1, m2):
    result = []
    for i in range(len(m1)):
        row = []
        for j in range(len(m2[0])):
            product = 0
            for v in range(len(m1[i])):
                product += m1[i][v] * m2[v][j]
            row.append(product)
        result.append(row)
    return np.array(result)
    
m1 = np.random.randn(100, 200)
m2 = np.random.randn(200, 300)
m3 = matrix_mul(m1, m2)
'''

In [6]:
import timeit

In [7]:
timeit.timeit(stmt=code, setup='import numpy as np', number =1)

3.1048670000000005

In [8]:
code2 = '''
m1 = np.random.randn(100, 200)
m2 = np.random.randn(200, 300)
m3 = np.dot(m1, m2)
'''

In [9]:
timeit.timeit(stmt=code2, setup='import numpy as np', number= 1)

0.00647209999999987

In [10]:
import time
start = time.time()
m3 = np.dot(m1, m2)
end = time.time()
print(f'Duration: {end - start} seconds')

Duration: 0.0007224082946777344 seconds


### 2. Builtin Methods > Self Implementation

In [11]:
grades = pd.Series(np.random.randint(0, 101, size = 100000))

In [12]:
def pass_or_fail(val):
    if val >= 60:
        return 'Pass'
    else:
        return 'Fail'

In [13]:
grades.apply(pass_or_fail)

0        Fail
1        Pass
2        Fail
3        Pass
4        Pass
         ... 
99995    Fail
99996    Pass
99997    Pass
99998    Fail
99999    Fail
Length: 100000, dtype: object

In [14]:
grades.where(grades >= 60, 'Fail').where(grades < 60, 'Pass')

0        Fail
1        Pass
2        Fail
3        Pass
4        Pass
         ... 
99995    Fail
99996    Pass
99997    Pass
99998    Fail
99999    Fail
Length: 100000, dtype: object

In [15]:
%%timeit
grades.apply(pass_or_fail)

11.6 ms ± 667 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [16]:
%%timeit
grades.where(grades >= 60, 'Fail').where(grades < 60, 'Pass')

3.99 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### 3. Appropriate Data Type

In [17]:
levels = ['xs', 's', 'm', 'l', 'xl']

In [18]:
ranks = pd.Series(np.random.choice(levels, 10000))

In [19]:
ranks_cat = ranks.astype('category')

In [20]:
ranks_cat.nbytes

10040

In [21]:
ranks_ord = ranks_cat.cat.reorder_categories(levels, ordered=True)

In [22]:
ranks_ord[ranks_ord < 'm']

1        s
3       xs
5       xs
8        s
9       xs
        ..
9983     s
9988    xs
9996    xs
9997    xs
9998    xs
Length: 4015, dtype: category
Categories (5, object): ['xs' < 's' < 'm' < 'l' < 'xl']

In [23]:
grades.nbytes

400000

In [24]:
grades.describe()

count    100000.000000
mean         50.086660
std          29.177153
min           0.000000
25%          25.000000
50%          50.000000
75%          75.000000
max         100.000000
dtype: float64

In [25]:
np.iinfo('int8')

iinfo(min=-128, max=127, dtype=int8)

In [26]:
grades2 = grades.astype('int8')

In [27]:
grades2.nbytes

100000