### Writing Efficient Python Code

In [None]:
# Non-pythonic
non_pythonic = []
numbers = 0

for i in range(len(numbers)):
    non_pythonic.append(numbers[i] * 2)
# Pythonic
non_pythonic = [x * 2 for x in numbers]

Building with built-ins

**Python Built-in types:**
- list
- tuple
- set
- dict
- .... many more

**Python Built-in functions:**
- print()
- len()
- range()
- round()
- enumerate()
- map()
- zip()
- .... many more

**Python Built-in modules:**
- os
- sys
- itertools
- collections
- math
- .... many more

In [None]:
# range(start, stop)
num = range(0,11)

num_list = list(num)
print(num_list)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [4]:
# range(stop)
num = range(11)

num_list = list(num)
print(num_list)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [5]:
# range with a step value
even_num = range(2, 11, 2)
even_num_list = list(even_num)

print(even_num_list)

[2, 4, 6, 8, 10]


In [6]:
# using enumerate()
letters = ['a', 'b', 'c']

index_letters = enumerate(letters)

index_letters_list = list(index_letters)
print(index_letters_list)

[(0, 'a'), (1, 'b'), (2, 'c')]


In [7]:
# using map
num = [1.5, 2.3, 3.4, 4.6, 5.0]

round_num = map(round, num)

print(list(round_num))

[2, 2, 3, 5, 5]


In [8]:
# using lambda (annonymous function)
num = [1,2,3,4,5]

square_num = map(lambda x: x ** 2, num)

print(list(square_num))

[1, 4, 9, 16, 25]


Efficient Code In Numpy Arrays

In [2]:
# Numpy array
import numpy as np

num_np = np.array(range(5))


num_np

array([0, 1, 2, 3, 4])

In [None]:
# Numpy array homogeneity
nums_ints = np.array([1,2,3])
print(nums_ints.dtype)

nums_float = np.array([1,2.5,3])
print(nums_float.dtype)


# dtype used for data type checking

int64
float64


In [None]:
# Alternative example before introduced numpy array broadcasting
sqr_nums = []
nums = [1,2,3,4] #sample data

for num in nums:
    sqr_nums.append(num ** 2)
print(sqr_nums)

[1, 4, 9, 16]


In [None]:
# List comprehension (better option but not the best)
sqrd_nums = [num ** 2 for num in nums]

print(sqrd_nums)

[1, 4, 9, 16]


In [None]:
# numpy array broadcasting
nums_np = np.array([-2,-1,0,1,2])

nums_np ** 2

array([4, 1, 0, 1, 4])

Examining Runtime

In [None]:
# using %timeit
import numpy as np

#random = np.random.rand(1000)
%timeit random = np.random.rand(1000)

6.01 μs ± 30.8 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [3]:
# Set numbers of runs to 2 (-r2)
# Set numbers of loops to 10 (-n10)
%timeit -r2 -n10 random = np.random.rand(1000)

20.7 μs ± 3.68 μs per loop (mean ± std. dev. of 2 runs, 10 loops each)


In [16]:
# Using in single line of code
%timeit num = [x for x in range(10)]

188 ns ± 2.42 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [35]:
%%timeit
nums = []
for x in range(10):
    nums.append(x)

# using for multiple lines

241 ns ± 5.27 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [43]:
# saving output
times = %timeit -o random = np.random.rand(1000)
times.timings


5.99 μs ± 121 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


[6.083393999979307e-06,
 6.13083899999765e-06,
 6.166964000003645e-06,
 5.954119000016363e-06,
 5.893202000006567e-06,
 5.854494000013801e-06,
 5.8751789999951145e-06]

In [42]:
times = %timeit -o random = np.random.rand(1000)
times.best

6.14 μs ± 90.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


6.025280000030762e-06

In [44]:
times = %timeit -o random = np.random.rand(1000)
times.worst

6.25 μs ± 60.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


6.370698999999149e-06

In [47]:
# Comparing times using dictionary
f_time = %timeit -o formal_dict = dict()

# Using literals syntax
l_time = %timeit -o formal_dict = {}

38.6 ns ± 0.906 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
19.3 ns ± 0.294 ns per loop (mean ± std. dev. of 7 runs, 100,000,000 loops each)


In [48]:
# compare the two
diff = (f_time.average - l_time.average) * (10**9)
print('l_time better than f_time by {} ns'.format(diff))


l_time better than f_time by 19.277724285613143 ns


In [50]:
# Comparing times (formal vs literals)
%timeit formal = dict()
%timeit literal = {}

36.2 ns ± 1.57 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
19.3 ns ± 0.543 ns per loop (mean ± std. dev. of 7 runs, 100,000,000 loops each)


Code profiling for runtime

In [None]:
# pip install line_profiler

In [6]:
import numpy as np

# Code profiling: runtime
heroes = ['Batman', 'Superman','Spider-man']
hts = np.array([188.0, 191.0, 183.0])
wts = np.array([95.0, 101.0, 74.0])

In [7]:
def convert_units(heroes, heights, weights):
    new_hts = [ht * 0.39370 for ht in heights]
    new_wts = [wt * 2.20462 for wt in weights]

    hero_data = {}

    for i, hero in enumerate(heroes):
        hero_data[hero] = (new_hts[i], new_wts[i])
    
    return hero_data

In [8]:
convert_units(heroes, hts, wts)

{'Batman': (np.float64(74.01559999999999), np.float64(209.4389)),
 'Superman': (np.float64(75.19669999999999), np.float64(222.66661999999997)),
 'Spider-man': (np.float64(72.0471), np.float64(163.14188))}

In [9]:
# runtime
%timeit convert_units(heroes, hts, wts)

1.74 μs ± 17 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [14]:
# how long each line runtime

%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [15]:
%lprun -f convert_units convert_units(heroes, hts, wts)

Timer unit: 1e-07 s

Total time: 2.22e-05 s
File: C:\Users\jetta\AppData\Local\Temp\ipykernel_22620\3078075682.py
Function: convert_units at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def convert_units(heroes, heights, weights):
     2         1        103.0    103.0     46.4      new_hts = [ht * 0.39370 for ht in heights]
     3         1         22.0     22.0      9.9      new_wts = [wt * 2.20462 for wt in weights]
     4                                           
     5         1          7.0      7.0      3.2      hero_data = {}
     6                                           
     7         4         41.0     10.2     18.5      for i, hero in enumerate(heroes):
     8         3         20.0      6.7      9.0          hero_data[hero] = (new_hts[i], new_wts[i])
     9                                           
    10         1         29.0     29.0     13.1      return hero_data

Code Profiling for Memory Usage

In [60]:
# Quick and dirty approach
import sys

num = [*range(1000)]
sys.getsizeof(num)

8056

In [None]:
# With numpy
import numpy as np

num_np = np.array(range(1000))
sys.getsizeof(num_np)

8112

In [17]:
from efficientsample import convert_units

# pip install memory_profiler
%load_ext memory_profiler
%mprun -f convert_units convert_units(heroes, hts, wts)

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler



Filename: d:\Development\python\learn pandas\efficientsample.py

Line #    Mem usage    Increment  Occurrences   Line Contents
     8     99.7 MiB     99.7 MiB           1   def convert_units(heroes, heights, weights):
     9     99.7 MiB      0.0 MiB           4       new_hts = [ht * 0.39370 for ht in heights]
    10     99.7 MiB      0.0 MiB           4       new_wts = [wt * 2.20462 for wt in weights]
    11                                         
    12     99.7 MiB      0.0 MiB           1       hero_data = {}
    13                                         
    14     99.7 MiB      0.0 MiB           4       for i, hero in enumerate(heroes):
    15     99.7 MiB      0.0 MiB           3           hero_data[hero] = (new_hts[i], new_wts[i])
    16                                             
    17     99.7 MiB      0.0 MiB           1       return hero_data

Efficiently, combining, counting, and iterating

In [None]:
name = ['Bulbasaur','Charmander','Squirtle']
hp = [45,39,44]

[('Bulbasaur', 45), ('Charmander', 39), ('Squirtle', 44)]


In [21]:
# Combining objects
combined = []

for i, pokemon in enumerate(name):
    combined.append((pokemon, hp[i]))

print(combined)

[('Bulbasaur', 45), ('Charmander', 39), ('Squirtle', 44)]


In [None]:
# combined zip
combined_zip = zip(name, hp)

# Show class
print(type(combined_zip))

<class 'zip'>


In [None]:
# unpacked zip
combined_zip_list = [*combined_zip]

# Show unzip 
print(combined_zip_list)

[('Bulbasaur', 45), ('Charmander', 39), ('Squirtle', 44)]


In [26]:
# counting with loop
types = ['Grass', 'Dark', 'Fire']
type_counts = {}

for poke_type in types:
    if poke_type not in type_counts:
        type_counts[poke_type] = 1
    else:
        type_counts[poke_type] += 1

print(type_counts)

{'Grass': 1, 'Dark': 1, 'Fire': 1}


In [None]:
# Using counter
types = ['Grass', 'Dark', 'Fire']

from collections import Counter
type_counts = Counter(types)
print(type_counts)


Counter({'Grass': 1, 'Dark': 1, 'Fire': 1})


In [28]:
# combinations with loop
types = ['Grass', 'Dark', 'Fire']
combos = []

In [29]:
for x in types:
    for y in types:
        if x == y:
            continue
        if ((x,y) not in combos) & ((y,x) not in combos):
            combos.append((x,y))
print(combos)

[('Grass', 'Dark'), ('Grass', 'Fire'), ('Dark', 'Fire')]


In [31]:
# itertools.combinations

from itertools import combinations
combos_obj = combinations(types, 2)

print(type(combos_obj))

<class 'itertools.combinations'>


In [32]:
combos = [*combos_obj]
print(combos)

[('Grass', 'Dark'), ('Grass', 'Fire'), ('Dark', 'Fire')]


In [None]:
# Set theory

# intersection()
# difference()
# symmetric_difference()
# union()

In [34]:
list_a = ['Bulbasaur', 'Charmander', 'Squirtle']
list_b = ['Caterpie', 'Pidgey', 'Squirtle']

In [None]:
# inefficient code
in_common = []

for pokemon_a in list_a:
    for pokemon_b in list_b:
        if pokemon_a == pokemon_b:
            in_common.append(pokemon_a)

print(in_common)

['Squirtle']


In [None]:
# Using set : faster than previous code
set_a = set(list_a)
set_b = set(list_b)

set_a.intersection(set_b)

{'Squirtle'}

In [47]:
# set method difference
set_a.difference(set_b)

{'Bulbasaur', 'Charmander'}

In [46]:
set_b.difference(set_a)

{'Caterpie', 'Pidgey'}

In [48]:
# symmetric difference
set_a.symmetric_difference(set_b)

{'Bulbasaur', 'Caterpie', 'Charmander', 'Pidgey'}

In [49]:
set_a.union(set_b)

{'Bulbasaur', 'Caterpie', 'Charmander', 'Pidgey', 'Squirtle'}

In [51]:
pokemon = set_a.union(set_b)
# unique with sets
unique_set = set(pokemon)

print(unique_set)

{'Bulbasaur', 'Charmander', 'Squirtle', 'Caterpie', 'Pidgey'}


In [None]:
# Eliminating Loops

# for loop: iterate over sequence piece by piece
# while loop: repeat loop as long as the condition is met
# nested loop: use one looop inside another loop

In [52]:
stats = [
    [90,92,75,60],
    [25,20,15,90],
    [65,130,60,75]
]

In [56]:
# for loop approach
totals = []
for row in stats:
    totals.append(sum(row))

print(totals)

[317, 150, 330]


In [57]:
# list comprehension
totals_comp = [sum(row) for row in stats]

print(totals_comp)



[317, 150, 330]


In [None]:
# built-in map function
totals_map = [*map(sum, stats)] #faster then the 2 previous code types

print(totals_map)

[317, 150, 330]


In [59]:
# Eliminating loops with built-in modules
types = ['Grass', 'Dark', 'Fire']

In [72]:
# Nested approach
combos = []

for x in types:
    for y in types:
        if x == y:
            continue
        if ((x,y) not in combos) & ((y,x) not in combos):
            combos.append((x,y))
print(combos)

[('Grass', 'Dark'), ('Grass', 'Fire'), ('Dark', 'Fire')]


In [None]:
# built-in module approach
from itertools import combinations
combos2 = [*combinations(types, 2)]

print(combos2)


[('Grass', 'Dark'), ('Grass', 'Fire'), ('Dark', 'Fire')]


In [75]:
# Eliminate loops with numpy
import numpy as np

npstats = np.array([
    [90,92,75,60],
    [25,20,15,90],
    [65,130,60,75]
])

In [78]:
avgs = []
for row in npstats:
    avg = np.mean(row)
    avgs.append(avg)

print(avgs)

[np.float64(79.25), np.float64(37.5), np.float64(82.5)]


In [None]:
# short code
avgs_np = npstats.mean(axis=1) # faster than creating for loop
print(avgs_np)

[79.25 37.5  82.5 ]


Writing better loops

In [1]:
# moving calculations above a loop
import numpy as np

names = ['Absol', 'Aron', 'Jynx', 'Natu', 'Onyx']
attacks = np.array([130,70,50,50,45])

In [4]:
# for loop
for pokemon, attack in zip(names, attacks):
    total_attack_avg = attacks.mean()
    if attack > total_attack_avg:
        print(
            "{}'s attack: {} > average: {}!"
            .format(pokemon, attack, total_attack_avg)
        )

Absol's attack: 130 > average: 69.0!
Aron's attack: 70 > average: 69.0!


In [7]:
for pokemon, attack in zip(names, attacks):
    total_attack_avg = attacks.mean()
    if attack > total_attack_avg:
        print(
            "{}'s attack: {} > average: {}!"
            .format(pokemon, attack, total_attack_avg)
        )

Absol's attack: 130 > average: 69.0!
Aron's attack: 70 > average: 69.0!


In [None]:
# Calculate total average once (outside the loop)
names = ['Absol', 'Aron', 'Jynx', 'Natu', 'Onyx']
attacks = np.array([130,70,50,50,45])
total_attack_avg = attacks.mean()

In [9]:
# This for loop format is much efficient than the previous one
for pokemon, attack in zip(names, attacks):
    if attack > total_attack_avg:
        print(
            "{}'s attack: {} > average: {}!"
            .format(pokemon, attack, total_attack_avg)
        )

Absol's attack: 130 > average: 69.0!
Aron's attack: 70 > average: 69.0!


In [None]:
# using holistic conversions
names = ['Pikachu', 'Squirtle', 'Articuno']
legend = [False, False, True]
generations = [1,1,1]

In [11]:
poke_data = []

for poke_tuple in zip(names, legend, generations):
    poke_list = list(poke_tuple)
    poke_data.append(poke_list)
print(poke_data)

[['Pikachu', False, 1], ['Squirtle', False, 1], ['Articuno', True, 1]]


In [None]:
# Holistic conversion makes easier and efficient
poke_data_tuples = []

for poke_tuple in zip(names, legend, generations):
    poke_data_tuples.append(poke_tuple)
poke_data = [*map(list, poke_data_tuples)]
print(poke_data)

[['Pikachu', False, 1], ['Squirtle', False, 1], ['Articuno', True, 1]]


Basic pandas optimizations

In [None]:
# Loop over the DataFrame and print each row
for row in data_frame.itertuples():
  print(row)

In [None]:
# Iterate with .itertuples()

# Basic syntax for itertuples()
for row_namedtuple in df.itertuples():
    print(row_namedtuple)

# Accessing specific columns by index
for row_namedtuple in df.itertuples():
    print(row_namedtuple.Index)  # Access index
    print(row_namedtuple[1])     # Access first column by position
    print(row_namedtuple[2])     # Access second column by position

# Accessing columns by name (if column names are valid Python identifiers)
for row_namedtuple in df.itertuples():
    print(row_namedtuple.Team)   # Access 'Team' column by name
    print(row_namedtuple.Year)   # Access 'Year' column by name

# Unpacking tuples for easier access
for index, team, year, wins in df.itertuples():
    print(f"Index: {index}, Team: {team}, Year: {year}, Wins: {wins}")

# Using itertuples() with name parameter
for row in df.itertuples(name='TeamData'):
    print(row.Team)
    print(row.Year)

# Using itertuples() without index
for row in df.itertuples(index=False):
    print(row)  # Only column values, no index

In [None]:
def cal_run_diff(column1, column2):
    run_diff = column1 - column2
    return run_diff

# pandas alternative to looping using apply()
table = table_df.apply(
    lambda row: cal_run_diff(row['KW'], row['KA']),
    axis=1 # 0 for columns, rows for 1
)

table['KW'] = table
print(table)

In [None]:
# Optimal Pandas Iterating
table['T1'].values - table['T2'].values # Vectorization

# Run Differential with arrays
run_diff_np = table['col1'].values - table['col2'].values
table['new_column'] = run_diff_np