In [1]:
import time
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

## CASE 1: Finding the Sum of numbers

In [2]:
# Using Loops
start = time.time()

# iterative sum
total = 0
# iterating through 1.5 Million numbers
for item in range(0, 1500000):
    total = total + item

print('sum is:' + str(total))
end = time.time()
print(end - start)

sum is:1124999250000
0.17632627487182617


In [3]:
# Using Vectorization
start = time.time()

# vectorized sum - using numpy for vectorization
# np.arange create the sequence of numbers from 0 to 1499999
print(np.sum(np.arange(1500000)))
end = time.time()
print(end - start)

-282181552
0.0


## CASE 2: Mathematical Operations (on DataFrame)

In [4]:
df = pd.DataFrame(np.random.randint(0, 50, size=(100000, 4)), columns=('a','b','c','d'))
print(df.shape)
df.head()

(100000, 4)


Unnamed: 0,a,b,c,d
0,43,3,21,7
1,47,20,12,20
2,21,22,42,10
3,16,22,49,20
4,42,18,19,33


In [5]:
# Using Loops
start = time.time()

# Iterating through DataFrame using iterrows
for idx, row in df.iterrows():
    # creating a new column 
    df.at[idx,'ratio'] = 100 * (row['d'] / row['c'])  
end = time.time()
print(end - start)

6.394658088684082


In [6]:
# Using Vectorization
start = time.time()

df['ratio'] = 100 * (df['d'] / df['c'])

end = time.time()
print(end - start)

0.0010027885437011719


## CASE 3: If-else Statements (on DataFrame)

In [7]:
# Using Loops
start = time.time()

# Iterating through DataFrame using iterrows
for idx, row in df.iterrows():
    if row.a == 0:
        df.at[idx,'e'] = row.d    
    elif (row.a <= 25) & (row.a > 0):
        df.at[idx,'e'] = (row.b)-(row.c)    
    else:
        df.at[idx,'e'] = row.b + row.c
end = time.time()
print(end - start)

9.41081190109253


In [8]:
# Using Vectorization

start = time.time()
df['e'] = df['b'] + df['c']
df.loc[df['a'] <= 25, 'e'] = df['b'] -df['c']
df.loc[df['a']==0, 'e'] = df['d']
end = time.time()
print(end - start)

0.0
