In [64]:
import pandas as pd
import numpy as np
import random
import time
import datetime as dt


# Lists and Dictionaries via Zipping and Comprehensions

In [65]:
# zipping two or more separate lists to generate a combined list of tuples

# example: calculate multiple metrics, e.g. prediction accuracy
# collect all these calculation results in a list
rmse = 1
mape = 0.04
rsq = 0.9

# collect multiple calculations results in a list
acc_values = [rmse, mape, rsq]

# define a list of names for these results
acc_names = ["RMSE", "MAPE", "R-sq"]

In [66]:
# combine the names of the metrics and their values via zipping to a list of tuples
acc_list = list(zip(acc_names, acc_values))
acc_list

[('RMSE', 1), ('MAPE', 0.04), ('R-sq', 0.9)]

In [67]:
# short digression #1 on zip: unzipping a list of tuples: use the asterisk *

names, values = zip(*acc_list)
print(names)
print(values)

('RMSE', 'MAPE', 'R-sq')
(1, 0.04, 0.9)


In [68]:
# short digression 2 on zip: zipping lists of unequal length
# zip takes the shorter list and skips the corresponding item of the longer partner list

acc_names2 = ["RMSE", "MAPE"]
acc_values2 = [rmse, mape, rsq]
acc_list2 = list(zip(acc_names2, acc_values2))
acc_list2

[('RMSE', 1), ('MAPE', 0.04)]

In [69]:
# short digression 3 on zip: zipping lists of unequal length by using zip_longest
# zip_longest also shows list items that don't have a partner item in the other list

from itertools import zip_longest
acc_names3 = ["RMSE", "MAPE", "R-sq", "MSE"]
acc_values3 = [rmse, mape, rsq]

acc_list3 = list(zip_longest(acc_names3, acc_values3))
acc_list3

[('RMSE', 1), ('MAPE', 0.04), ('R-sq', 0.9), ('MSE', None)]

In [70]:
# convert the list of zipped tuples to a dictionary

acc_dict = dict(acc_list)
acc_dict
# not ready for pretty-printing

{'RMSE': 1, 'MAPE': 0.04, 'R-sq': 0.9}

In [71]:
# faster: zip the names and values lists directly into a dictionary,
# without creating a list of tuples in between
acc_dict = {k: v for k, v in zip(acc_names, acc_values)}
acc_dict

{'RMSE': 1, 'MAPE': 0.04, 'R-sq': 0.9}

In [72]:
# print the values and their names line by line: list comprehension

[print(k, ":", v) for k,v in acc_dict.items()]

RMSE : 1
MAPE : 0.04
R-sq : 0.9


[None, None, None]

In [73]:
# list comprehension for printing, without list of Nones

[print(k, ":", v) for k,v in acc_dict.items()]
print("\n")

RMSE : 1
MAPE : 0.04
R-sq : 0.9




In [74]:
# list comprehension for printing, without list of Nones

[print(k, ":", v) for k,v in acc_dict.items()]
pass

RMSE : 1
MAPE : 0.04
R-sq : 0.9


In [75]:
# dictionary comprehension for printing, without list of Nones

y = [print(k, ":", v) for k,v in acc_dict.items()]

RMSE : 1
MAPE : 0.04
R-sq : 0.9


In [76]:
# dictionary comprehension for printing, without list of Nones

_ = [print(k, ":", v) for k,v in acc_dict.items()]

RMSE : 1
MAPE : 0.04
R-sq : 0.9


In [77]:
# list comprehension for pretty-printing, with number format

c = [print(k, ":", f'{v:.1f}') for k,v in acc_dict.items()]

RMSE : 1.0
MAPE : 0.0
R-sq : 0.9


In [78]:
# summary:
# if you have multiple values with individual names, e.g. a list of metrics
# you want to summarize
# collect all these calculation results in a list
rmse = 1
mape = 0.04
rsq = 0.9

# collect multiple calculations results in a list
acc_values = [rmse, mape, rsq]

# define a list of names for the metrics
acc_names = ["RMSE", "MAPE", "R-sq"]

# combine the lists of names and values via zipping to a dictionary
acc_list = list(zip(acc_names, acc_values))

# pretty-print the dictionary of metrics
_ = [print(k, ":", f'{v:.1f}') for k,v in acc_dict.items()]

RMSE : 1.0
MAPE : 0.0
R-sq : 0.9


# Performance of Comprehensions vs. Loops

In [79]:
# small list of random numbers: the jurassic way to square them in a for loop

rands = [random.randrange(1, 100, 1) for i in range(5)]
print(rands)

# list of random numbers: the jurassic way to square them in a for loop
rands2 = []
for n in rands:
    n = n**2
    rands2.append(n)
print(rands2)


# list comprehension to square them
c = [n**2 for n in rands]
c

[95, 59, 45, 71, 29]
[9025, 3481, 2025, 5041, 841]


[9025, 3481, 2025, 5041, 841]

In [80]:
# list comprehension to square each number in a LARGE list of 100,000 numbers

rands = [random.randrange(1, 100, 1) for i in range(100000)]

t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>
rands2 = []
for n in rands:
    n = n**2
    rands2.append(n)
# >>>>>>>>>>>>>>>>>>>>>>>>
tLoop = time.perf_counter() - t
print(f'{tLoop:.3f} sec')


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
_ = [n**2 for n in rands]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tComp = time.perf_counter() - t
print(f'{tComp:.3f} sec: comprehension vs loop: {100*(tComp/tLoop-1):.1f}%')

0.056 sec
0.045 sec: comprehension vs loop: -20.9%


In [81]:
# expression with a condition (filter via if): 
# square only those of the 100,000 numbers in the list which exceed 90

rands = [random.randrange(1, 100, 1) for i in range(100000)]

t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>
rands2 = []
for n in rands:
    if n > 90:
        n = n**2
        rands2.append(n)
# >>>>>>>>>>>>>>>>>>>>>>>>
tLoop = time.perf_counter() - t
print(f'{tLoop:.3f} sec')


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
c = [n**2 for n in rands if n > 90]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tComp = time.perf_counter() - t
print(f'{tComp:.3f} sec: comprehension vs loop: {100*(tComp/tLoop-1):.1f}%')

print("found: " + str(len(rands2)))
print("found: " + str(len(c)))

0.013 sec
0.007 sec: comprehension vs loop: -45.9%
found: 9044
found: 9044


In [82]:
# expression with condition (filter via if): square only the even numbers among the 100,000 in the list

rands = [random.randrange(1, 100, 1) for i in range(100000)]


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>
rands2 = []
for n in rands:
    if n % 2 == 0:
        n = n**2
        rands2.append(n)
# >>>>>>>>>>>>>>>>>>>>>>>>
tLoop = time.perf_counter() - t
print(f'{tLoop:.3f} sec')


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
c = [n**2 for n in rands if n % 2 == 0]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tComp = time.perf_counter() - t
print(f'{tComp:.3f} sec: comprehension vs loop: {100*(tComp/tLoop-1):.1f}%')

print("found: " + str(len(rands2)))
print("found: " + str(len(c)))

0.035 sec
0.028 sec: comprehension vs loop: -18.8%
found: 49381
found: 49381


In [83]:
# list comprehension for text

# create a list of 10,000 dates
datlist = pd.date_range(dt.datetime.today(), periods=10000).tolist()

# convert the dates to strings via list comprehension
datstrlist = [d.strftime("Day %d in %B of year %Y is a %A") for d in datlist]
datstrlist[:4]


['Day 15 in October of year 2021 is a Friday',
 'Day 16 in October of year 2021 is a Saturday',
 'Day 17 in October of year 2021 is a Sunday',
 'Day 18 in October of year 2021 is a Monday']

In [84]:
# filter the list of strings:
# example: add "weekend" to Saturdays and Sundays in October of each year

t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
strLoop = []
for d in datstrlist:
    if (d.endswith("urday") or d.endswith("unday")) and "Oc" in d:
        strLoop.append(d + " = Oct weekend")
    else:
        strLoop.append(d)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tLoop = time.perf_counter() - t
print(f'{tLoop:.4f} sec')


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
c = [d + " = Oct weekend" if ((d.endswith("urday") or d.endswith("unday")) and "Oc" in d) else d for d in datstrlist]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tComp = time.perf_counter() - t
print(f'{tComp:.4f} sec: comprehension vs loop: {100*(tComp/tLoop-1):.1f}%')

df = pd.DataFrame(list(zip(strLoop,c)), columns=("loop","comprehension"))
df

0.0048 sec
0.0047 sec: comprehension vs loop: -2.1%


Unnamed: 0,loop,comprehension
0,Day 15 in October of year 2021 is a Friday,Day 15 in October of year 2021 is a Friday
1,Day 16 in October of year 2021 is a Saturday =...,Day 16 in October of year 2021 is a Saturday =...
2,Day 17 in October of year 2021 is a Sunday = O...,Day 17 in October of year 2021 is a Sunday = O...
3,Day 18 in October of year 2021 is a Monday,Day 18 in October of year 2021 is a Monday
4,Day 19 in October of year 2021 is a Tuesday,Day 19 in October of year 2021 is a Tuesday
...,...,...
9995,Day 25 in February of year 2049 is a Thursday,Day 25 in February of year 2049 is a Thursday
9996,Day 26 in February of year 2049 is a Friday,Day 26 in February of year 2049 is a Friday
9997,Day 27 in February of year 2049 is a Saturday,Day 27 in February of year 2049 is a Saturday
9998,Day 28 in February of year 2049 is a Sunday,Day 28 in February of year 2049 is a Sunday


In [85]:
# comprehensions vs lambda-filters

rands = [random.randrange(1, 100, 1) for i in range(100000)]


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
strLamb = filter(lambda d: ((d.endswith("urday") or d.endswith("unday")) and "Oc" in d), datstrlist)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tLamb = time.perf_counter() - t
print(f'{tLamb:.4f} sec')


t = time.perf_counter()
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
c = [d for d in datstrlist if ((d.endswith("urday") or d.endswith("unday")) and "Oc" in d)]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tComp = time.perf_counter() - t
print(f'{tComp:.4f} sec: comprehension vs lambda-filter: {100*(tComp/tLamb-1):.1f}%')


df = pd.DataFrame(list(zip(strLamb,c)), columns=("lambda","comprehension"))
df


0.0001 sec
0.0033 sec: comprehension vs lambda-filter: 2271.2%


Unnamed: 0,lambda,comprehension
0,Day 16 in October of year 2021 is a Saturday,Day 16 in October of year 2021 is a Saturday
1,Day 17 in October of year 2021 is a Sunday,Day 17 in October of year 2021 is a Sunday
2,Day 23 in October of year 2021 is a Saturday,Day 23 in October of year 2021 is a Saturday
3,Day 24 in October of year 2021 is a Sunday,Day 24 in October of year 2021 is a Sunday
4,Day 30 in October of year 2021 is a Saturday,Day 30 in October of year 2021 is a Saturday
...,...,...
239,Day 17 in October of year 2048 is a Saturday,Day 17 in October of year 2048 is a Saturday
240,Day 18 in October of year 2048 is a Sunday,Day 18 in October of year 2048 is a Sunday
241,Day 24 in October of year 2048 is a Saturday,Day 24 in October of year 2048 is a Saturday
242,Day 25 in October of year 2048 is a Sunday,Day 25 in October of year 2048 is a Sunday
