# Multiprocessing Demo

In [1]:
import multiprocessing as mp
import time

import numpy as np
import pandas as pd

## Pool.map

In [2]:
def generate_df(seed):
    random = np.random.RandomState(seed)
    # loop enough to hammer the cpu
    for i in range(3000):
        raw_data = random.uniform(0, 1, (1000, 4))
        data = pd.DataFrame(raw_data, columns='a b c d'.split())
    return data

In [3]:
generate_df(1)

Unnamed: 0,a,b,c,d
0,0.137483,0.657284,0.562603,0.817594
1,0.725388,0.334668,0.836014,0.487029
2,0.766834,0.570595,0.544925,0.257220
3,0.285100,0.409054,0.419120,0.669106
4,0.996189,0.271018,0.918941,0.241401
...,...,...,...,...
995,0.094511,0.607388,0.832851,0.788585
996,0.958122,0.653742,0.096488,0.247951
997,0.884332,0.200825,0.542416,0.123100
998,0.184714,0.582153,0.664950,0.461583


In [4]:
%%time

with mp.Pool(6) as p:
    all_data = pd.concat(p.map(generate_df, range(4*6)))

CPU times: user 34.8 ms, sys: 25.3 ms, total: 60.1 ms
Wall time: 3.71 s


In [5]:
all_data

Unnamed: 0,a,b,c,d
0,0.576708,0.195810,0.031729,0.646863
1,0.571561,0.925284,0.067544,0.518820
2,0.254498,0.189676,0.179997,0.992704
3,0.154942,0.376675,0.559175,0.986592
4,0.482726,0.855353,0.391881,0.001010
...,...,...,...,...
995,0.588729,0.540600,0.212360,0.297545
996,0.236846,0.832905,0.748435,0.291838
997,0.826149,0.736286,0.864892,0.469813
998,0.129048,0.745003,0.632109,0.171538


## Pool.starmap

In [6]:
def generate_df_more_args(seed, N, name):
    random = np.random.RandomState(seed)
    raw_data = random.uniform(0, 1, (N, 4))
    # loop enough to hammer the cpu
    for i in range(3000):
        data = pd.DataFrame(raw_data, columns='a b c d'.split())
        data['name'] = name
    return data

In [7]:
args = [
    (i, 2*i + 1, chr(ord('A') + i))
    for i in range(26)
]
args

[(0, 1, 'A'),
 (1, 3, 'B'),
 (2, 5, 'C'),
 (3, 7, 'D'),
 (4, 9, 'E'),
 (5, 11, 'F'),
 (6, 13, 'G'),
 (7, 15, 'H'),
 (8, 17, 'I'),
 (9, 19, 'J'),
 (10, 21, 'K'),
 (11, 23, 'L'),
 (12, 25, 'M'),
 (13, 27, 'N'),
 (14, 29, 'O'),
 (15, 31, 'P'),
 (16, 33, 'Q'),
 (17, 35, 'R'),
 (18, 37, 'S'),
 (19, 39, 'T'),
 (20, 41, 'U'),
 (21, 43, 'V'),
 (22, 45, 'W'),
 (23, 47, 'X'),
 (24, 49, 'Y'),
 (25, 51, 'Z')]

In [8]:
%%time

with mp.Pool(6) as p:
    all_data = pd.concat(p.starmap(generate_df_more_args, args))

CPU times: user 44.1 ms, sys: 30 ms, total: 74.1 ms
Wall time: 15.3 s


In [9]:
all_data

Unnamed: 0,a,b,c,d,name
0,0.548814,0.715189,0.602763,0.544883,A
0,0.417022,0.720324,0.000114,0.302333,B
1,0.146756,0.092339,0.186260,0.345561,B
2,0.396767,0.538817,0.419195,0.685220,B
0,0.435995,0.025926,0.549662,0.435322,C
...,...,...,...,...,...
46,0.757259,0.975881,0.251659,0.968149,Z
47,0.139969,0.711813,0.069177,0.605039,Z
48,0.059525,0.362887,0.258474,0.026127,Z
49,0.732580,0.451459,0.863190,0.529924,Z


In [10]:
all_data.groupby('name').size()

name
A     1
B     3
C     5
D     7
E     9
F    11
G    13
H    15
I    17
J    19
K    21
L    23
M    25
N    27
O    29
P    31
Q    33
R    35
S    37
T    39
U    41
V    43
W    45
X    47
Y    49
Z    51
dtype: int64

## Pool.starmap + output object

In [11]:
class LocalDB:
    def __init__(self, ntables, ncpu=6):
        # generate arbitrary set of "jobs"
        args = [
            (i, 2*i + 1, chr(ord('A') + i))
            for i in range(ntables)
        ]
        # and note the names of the result tables
        self._table_names = names = [arg[-1] for arg in args]
        # get the tables
        with mp.Pool(ncpu) as p:
            datas = p.starmap(generate_df_more_args, args)
        # save tables
        for (name, data) in zip(names, datas):
            setattr(self, name, data)
            
    def __repr__(self):
        return f'LocalDB(with tables {self._table_names})'

In [12]:
%%time

db = LocalDB(26)

CPU times: user 39.7 ms, sys: 26.8 ms, total: 66.4 ms
Wall time: 17.4 s


In [13]:
db

LocalDB(with tables ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'])

In [14]:
db.M

Unnamed: 0,a,b,c,d,name
0,0.154163,0.74005,0.263315,0.533739,M
1,0.014575,0.918747,0.900715,0.033421,M
2,0.956949,0.137209,0.283828,0.606083,M
3,0.944225,0.852736,0.002259,0.521226,M
4,0.552038,0.485377,0.768134,0.160717,M
5,0.76456,0.02081,0.13521,0.116273,M
6,0.309898,0.671453,0.47123,0.816168,M
7,0.289587,0.733126,0.702622,0.327569,M
8,0.334648,0.978058,0.624582,0.950314,M
9,0.767476,0.825009,0.40664,0.451308,M
