In [1]:
import time
import tqdm
import numpy as np
import pandas as pd
import fireducks.pandas as fpd

In [2]:
d = {'a': np.linspace(0, 1000, 1000000)}

In [3]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    df = pd.DataFrame(d)
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('Pandas DataFrame creation stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:00<00:00, 1421.87it/s]

Pandas DataFrame creation stats:
Mean: 0.6982ms
5%:   0.5596ms
95%:  0.9269ms
Std:  0.1425ms





In [4]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    fdf = fpd.DataFrame(d)
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('FireDucks DataFrame creation stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:02<00:00, 400.02it/s]

FireDucks DataFrame creation stats:
Mean: 2.4863ms
5%:   1.6951ms
95%:  3.7628ms
Std:  1.0644ms





In [5]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    df['b'] = df['a'].shift(1)
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('Pandas shifting stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:01<00:00, 936.51it/s]

Pandas shifting stats:
Mean: 1.0607ms
5%:   0.9294ms
95%:  1.2353ms
Std:  0.2485ms





In [6]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    fdf['b'] = fdf['a'].shift(1)
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('FireDucks shifting stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:01<00:00, 748.93it/s]

FireDucks shifting stats:
Mean: 1.3304ms
5%:   0.4922ms
95%:  2.2243ms
Std:  1.1808ms





In [7]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    df['b'] = df['a'].rolling(100).mean()
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('Pandas aggregation (mean) in rw stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:11<00:00, 85.04it/s]

Pandas aggregation (mean) in rw stats:
Mean: 11.7027ms
5%:   10.9564ms
95%:  12.6807ms
Std:  0.7860ms





In [8]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    fdf['b'] = fdf['a'].rolling(100).mean()
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('FireDucks aggregation (mean) in rw stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:03<00:00, 267.34it/s]

FireDucks aggregation (mean) in rw stats:
Mean: 3.7234ms
5%:   2.5150ms
95%:  4.8681ms
Std:  0.7571ms





In [9]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    df['b'] = df['a'].rolling(100).max()
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('Pandas aggregation (max) in rw stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:11<00:00, 88.45it/s]

Pandas aggregation (max) in rw stats:
Mean: 11.2532ms
5%:   10.6221ms
95%:  11.9415ms
Std:  0.5963ms





In [10]:
times = []
for i in tqdm.tqdm(range(1000)):
    t0 = time.time()
    fdf['b'] = fdf['a'].rolling(100).max()
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('FireDucks aggregation (max) in rw stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 1000/1000 [00:06<00:00, 155.47it/s]

FireDucks aggregation (max) in rw stats:
Mean: 6.4024ms
5%:   5.1305ms
95%:  7.4944ms
Std:  1.5627ms





In [12]:
times = []
for i in tqdm.tqdm(range(100)):
    t0 = time.time()
    df['b'] = df['a'].rolling(100).median()
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('Pandas aggregation (median) in rw stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 100/100 [00:25<00:00,  3.90it/s]

Pandas aggregation (median) in rw stats:
Mean: 255.9605ms
5%:   251.0729ms
95%:  261.0982ms
Std:  3.2852ms





In [13]:
times = []
for i in tqdm.tqdm(range(100)):
    t0 = time.time()
    fdf['b'] = fdf['a'].rolling(100).median()
    t1 = time.time()
    times.append((t1 - t0) * 1000)
times = pd.Series(times)
print('FireDucks aggregation (median) in rw stats:')
print(f'Mean: {times.mean():.4f}ms')
print(f'5%:   {times.quantile(0.05):.4f}ms')
print(f'95%:  {times.quantile(0.95):.4f}ms')
print(f'Std:  {times.std():.4f}ms')

100%|██████████| 100/100 [00:00<00:00, 123.51it/s]

FireDucks aggregation (median) in rw stats:
Mean: 8.0589ms
5%:   7.2884ms
95%:  9.1744ms
Std:  0.9258ms



