In [31]:
# !pip install polars
# !pip install vaex
# !pip install duckdb
# !pip install fastparquet

In [1]:
from pathlib import Path

import pandas as pd
import vaex as vx
import polars as pl
import duckdb

from IPython.display import Markdown, display

In [2]:
def printmd(string):
    display(Markdown(string))

In [3]:
rawDataPath = Path('../data/raw')
csvFile = rawDataPath / 'large.csv'
parquetFile = rawDataPath / 'large.parquet'
csvFile.exists() and parquetFile.exists()

True

In [4]:
df.head()

NameError: name 'df' is not defined

In [None]:
# df = pl.concat([pl.read_csv(csvFile) for _ in range(100)],axis=1)
# df.to_csv(rawDataPath / 'large.csv')
# df.to_parquet(rawDataPath / 'large.parquet')
# df.head()

In [5]:
dfcsv = pl.read_csv(rawDataPath / 'large.csv')
dfpar = pl.read_parquet(rawDataPath / 'large.parquet')

In [6]:
dfcsv.shape

(19900, 3926)

In [7]:
dfpar.shape

(19900, 3926)

In [None]:
!du -h ../data/raw/large.*

The time benchmarks here should be taken with a pinch of salt. These libraries have many distinct features beyond that captured here. For example:
- DuckDB is a very lightweight dependancy that makes it highly portable
- DuckDB has aimed for a wide testing base rather than optimising for performance
- Vaex uses memory-mapping that makes it highly effective on large datasets
- etc

In [11]:
# to start an in-memory database
con = duckdb.connect(database=':memory:')

# Compare read time for a CSV

In [40]:
printmd("**Pandas with default C parser**")
%timeit -n 1 -r 1 pd.read_csv(csvFile)

Pandas with default C parser

48.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [41]:
printmd("**Pandas with pyarrow parser**")
%timeit -n 1 -r 1 pd.read_csv(csvFile,engine='pyarrow')

**Pandas with pyarrow parser**

1min 22s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [42]:
printmd("**Vaex**")
%timeit -n 1 -r 1 vx.read_csv(csvFile)

**Vaex**

49.4 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [43]:
printmd("**Polars**")
%timeit -n 1 -r 1 pl.read_csv(csvFile)

**Polars**

22.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [44]:
printmd("**DuckDB**")
%timeit -n 1 -r 1 con.execute(f"SELECT * FROM '{csvFile}';").fetchdf()

**DuckDB**

39.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Compare read time for a Parquet file

In [45]:
printmd("**Pandas with default parquet reader**")
%timeit -n 1 -r 1 pd.read_parquet(parquetFile)

**Pandas**

7.65 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [46]:
printmd("**Pandas with pyarrow parquet reader**")
%timeit -n 1 -r 1 pd.read_parquet(parquetFile,engine='pyarrow')

**Pandas with pyarrow parquet reader**

990 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [47]:
printmd("**Pandas with fastparquet reader**")
%timeit -n 1 -r 1 pd.read_parquet(parquetFile,engine='fastparquet')

**Pandas with fastparquet reader**

1.25 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [48]:
printmd("**Vaex**")
%timeit -n 1 -r 1 vx.open(parquetFile)

**Vaex**

703 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [56]:
printmd("**Polars**")
%timeit -n 1 -r 3 pl.read_parquet(parquetFile)

**Polars**

198 ms ± 55.9 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [55]:
printmd("**Polars with pyarrow**")
%timeit -n 1 -r 3 pl.read_parquet(parquetFile,use_pyarrow=True)

**Polars with pyarrow**

1.36 s ± 456 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [51]:
printmd("**DuckDB**")
%timeit -n 1 -r 1 con.execute(f"SELECT * FROM '{parquetFile}';").fetchdf()

**DuckDB**

12.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [70]:
def groupbyPandas(df:pd.DataFrame):
    return df.groupby('z').mean()
df = pl.read_parquet(parquetFile).to_pandas()
gbPandas = groupbyPandas(df=df)


In [79]:
def groupbyPolars(df:pl.DataFrame):
    gb = df.lazy().groupby('z').agg([pl.mean(df.columns[1:])]).sort("z", reverse=False)
    return gb.collect()
df = pl.read_parquet(parquetFile)
gbPolars = groupbyPolars(df=df)
pd.testing.assert_frame_equal(gbPandas,gbPolars.to_pandas().set_index('z'))

In [6]:
df.head()

#,z,0:June:2020,1:June:2020,2:July:2020,3:June:2021,4:June:2021,5:June:2021,6:April:2016,7:March:2018,8:May:2005,9:August:2016,10:June:2002,11:April:2002,12:November:2001,13:November:2001,14:November:2001,15:January:2003,16:December:2002,17:December:2002,18:December:2002,19:November:2002,20:November:2002,21:November:2002,22:October:2002,23:October:2002,24:October:2002,25:September:2002,26:September:2002,27:September:2002,28:August:2002,29:August:2002,30:August:2002,31:July:2002,32:July:2002,33:July:2002,34:June:2002,35:June:2002,36:June:2002,37:May:2002,38:April:2002,39:April:2002,40:April:2002,41:March:2002,42:March:2002,43:January:2002,44:January:2002,45:January:2002,46:December:2001,47:December:2001,48:December:2001,49:November:2001,50:November:2001,51:November:2001,52:October:2001,53:October:2001,54:October:2001,55:October:2001,56:September:2001,57:September:2001,58:August:2001,59:August:2001,60:July:2001,61:July:2001,62:June:2001,63:June:2001,64:September:2001,65:October:2001,66:October:2001,67:November:2001,68:December:2001,69:December:2001,70:December:2001,71:January:2002,72:January:2002,73:February:2002,74:March:2002,75:March:2002,76:March:2002,77:April:2002,78:May:2002,79:May:2002,80:June:2002,81:June:2002,82:July:2002,83:July:2002,84:August:2002,85:July:2005,86:June:2001,87:June:2001,88:July:2001,89:July:2001,90:August:2001,91:August:2001,92:September:2001,93:September:2001,94:September:2001,95:October:2001,96:October:2001,97:November:2001,98:November:2001,...,3825:June:2019,3826:November:2020,3827:November:2020,3828:November:2020,3829:December:2020,3830:August:2020,3831:August:2020,3832:September:2020,3833:September:2020,3834:September:2020,3835:October:2020,3836:October:2020,3837:October:2020,3838:January:2021,3839:January:2021,3840:July:2020,3841:July:2020,3842:July:2020,3843:July:2020,3844:August:2020,3845:August:2020,3846:August:2020,3847:August:2020,3848:September:2020,3849:September:2020,3850:September:2020,3851:October:2020,3852:October:2020,3853:October:2020,3854:November:2020,3855:July:2020,3856:July:2020,3857:July:2020,3858:July:2020,3859:August:2020,3860:August:2020,3861:August:2020,3862:September:2020,3863:September:2020,3864:September:2020,3865:October:2020,3866:October:2020,3867:October:2020,3868:November:2020,3869:November:2020,3870:December:2020,3871:December:2020,3872:December:2020,3873:January:2021,3874:January:2021,3875:January:2021,3876:February:2021,3877:February:2021,3878:February:2021,3879:March:2021,3880:March:2021,3881:March:2021,3882:April:2021,3883:April:2021,3884:April:2021,3885:June:2021,3886:June:2020,3887:July:2020,3888:July:2020,3889:July:2020,3890:August:2020,3891:August:2020,3892:August:2020,3893:September:2020,3894:September:2020,3895:September:2020,3896:October:2020,3897:October:2020,3898:October:2020,3899:December:2020,3900:January:2021,3901:January:2021,3902:January:2021,3903:February:2021,3904:April:2021,3905:April:2021,3906:April:2021,3907:May:2021,3908:May:2021,3909:June:2021,3910:June:2020,3911:July:2020,3912:July:2020,3913:July:2020,3914:August:2020,3915:August:2020,3916:August:2020,3917:September:2020,3918:September:2020,3919:September:2020,3920:October:2020,3921:October:2020,3922:October:2020,3923:November:2020,3924:November:2020
0,-3,15.655,14.549,15.049,12.237,14.069,14.178,11.103,10.482,13.578,14.729,14.81,12.85,14.77,15.0,15.42,12.12,12.14,12.35,12.59,13.17,13.69,14.23,14.78,15.44,16.45,16.65,16.28,17.4,17.37,16.86,16.78,16.51,16.38,14.91,14.71,14.08,14.03,13.13,12.09,11.88,12.19,12.38,12.3,12.37,12.29,12.5,12.6,12.9,13.63,13.96,14.54,14.91,15.65,16.02,16.41,17.3,17.82,17.53,17.19,17.71,16.14,16.84,16.11,15.06,17.183,16.488,15.033,13.423,13.269,12.936,12.698,12.896,12.43,11.964,11.851,11.909,12.079,12.378,13.163,13.582,14.176,15.055,15.663,17.038,17.28,16.539,15.24,15.83,16.67,15.92,17.48,17.2,17.53,17.87,17.54,16.02,15.62,14.94,14.51,...,15.575,12.069,11.719,11.564,10.98,17.155,17.018,16.609,17.163,15.879,15.694,15.317,14.06,11.932,12.024,16.993,15.109,15.051,17.267,17.501,19.045,17.901,17.197,17.514,17.687,16.704,15.71,15.215,14.435,14.099,14.666,16.112,17.334,16.932,18.043,18.479,17.079,17.486,17.579,16.57,15.847,15.473,14.326,13.677,13.242,12.191,11.981,11.433,11.194,11.142,11.104,11.04,11.318,11.017,11.496,11.454,10.975,11.211,11.164,11.542,13.454,15.236,15.414,16.373,16.573,17.586,18.425,16.367,16.703,17.144,15.987,15.662,15.003,14.172,11.998,11.836,11.716,11.643,11.59,11.338,11.421,11.848,12.084,12.221,14.537,15.1,15.664,16.684,17.5,17.613,18.713,17.341,17.504,18.089,16.77,15.748,15.24,14.937,14.277,14.013
1,-6,15.655,14.549,15.049,12.237,14.069,14.178,11.103,10.482,13.578,14.729,14.81,12.85,14.77,15.0,15.42,12.12,12.14,12.35,12.59,13.17,13.69,14.23,14.78,15.44,16.45,16.65,16.28,17.4,17.37,16.86,16.78,16.51,16.38,14.91,14.71,14.08,14.03,13.13,12.09,11.88,12.19,12.38,12.3,12.37,12.29,12.5,12.6,12.9,13.63,13.96,14.54,14.91,15.65,16.02,16.41,17.3,17.82,17.53,17.19,17.71,16.14,16.84,16.11,15.06,17.183,16.488,15.033,13.423,13.269,12.936,12.698,12.896,12.43,11.964,11.851,11.909,12.079,12.378,13.163,13.582,14.176,15.055,15.663,17.038,17.28,16.539,15.24,15.83,16.67,15.92,17.48,17.2,17.53,17.87,17.54,16.02,15.62,14.94,14.51,...,15.5752,12.069,11.719,11.564,10.98,17.155,17.018,16.609,17.1636,15.879,15.694,15.3171,14.06,11.932,12.024,16.993,15.109,15.051,17.267,17.501,19.0397,17.901,17.197,17.514,17.687,16.704,15.71,15.2154,14.435,14.099,14.666,16.112,17.3339,16.9318,18.0426,18.479,17.0788,17.486,17.579,16.57,15.847,15.4729,14.326,13.677,13.2428,12.1911,11.981,11.433,11.1939,11.142,11.104,11.04,11.3179,11.017,11.496,11.454,10.975,11.2111,11.164,11.542,13.4541,15.236,15.414,16.373,16.573,17.586,18.4239,16.367,16.703,17.144,15.987,15.662,15.003,14.172,11.998,11.8373,11.716,11.6429,11.59,11.338,11.4211,11.848,12.084,12.221,14.537,15.1,15.664,16.684,17.5004,17.613,18.713,17.341,17.504,18.0891,16.77,15.7479,15.24,14.937,14.277,14.013
2,-9,15.655,14.5485,15.0494,12.237,14.069,14.178,11.1025,10.482,13.578,14.729,14.81,12.85,14.77,15.0,15.42,12.12,12.14,12.35,12.59,13.17,13.6898,14.23,14.78,15.44,16.45,16.65,16.28,17.4,17.37,16.86,16.78,16.51,16.38,14.91,14.71,14.08,14.03,13.13,12.09,11.88,12.19,12.38,12.2999,12.37,12.29,12.5,12.6,12.9,13.63,13.96,14.54,14.91,15.65,16.02,16.41,17.3,17.82,17.53,17.19,17.71,16.14,16.84,16.11,15.06,17.183,16.4871,15.0322,13.423,13.269,12.9379,12.698,12.896,12.4308,11.964,11.8518,11.9081,12.079,12.3789,13.1638,13.4162,14.1767,15.0558,15.5894,17.0409,17.279,16.539,15.24,15.83,16.67,15.92,17.48,17.2,17.53,17.87,17.54,16.02,15.62,14.94,14.51,...,15.574,12.0673,11.72,11.564,10.9797,17.1573,17.018,16.6079,17.17,15.885,15.6937,15.3207,14.0746,11.9353,12.025,16.6791,15.1098,15.051,16.917,17.501,18.9165,17.9006,17.198,17.5141,17.6214,16.7052,15.7186,15.213,14.4351,14.1005,14.6716,16.1098,17.3237,16.9303,18.034,18.4782,17.079,17.482,17.5782,16.5731,15.846,15.4726,14.3261,13.7,13.251,12.192,11.982,11.433,11.197,11.1413,11.1052,11.046,11.3192,11.0182,11.505,11.458,10.976,11.212,11.1641,11.542,13.4574,15.236,15.4133,16.3758,16.574,17.285,18.1191,16.3676,16.7031,17.1446,15.986,15.6677,15.006,14.1733,11.9986,11.8465,11.721,11.642,11.5879,11.3396,11.423,11.848,12.0922,12.2205,14.241,15.098,15.6616,16.4906,17.4891,17.5363,18.6377,17.3406,17.508,17.9367,16.763,15.748,15.2406,14.9365,14.2748,14.0149
3,-12,15.655,14.545,15.053,12.237,14.0689,14.178,11.101,10.4825,13.578,14.729,14.8015,12.85,14.7687,15.0,15.42,12.12,12.14,12.35,12.59,13.17,13.6894,14.23,14.78,15.44,16.4458,16.65,16.28,17.4,17.3538,16.86,16.78,16.35,16.3533,14.9021,14.71,14.0733,14.03,13.12,12.0881,11.8796,12.12,12.3767,12.2996,12.3696,12.29,12.4992,12.6,12.9001,13.6298,13.96,14.54,14.9022,15.65,16.02,16.41,17.3,17.82,17.53,17.19,17.71,16.14,16.833,16.0575,15.06,17.1836,16.4864,15.0328,13.4239,13.269,12.9394,12.6988,12.8968,12.4331,11.9699,11.8528,11.9099,12.0795,12.379,13.1643,13.3015,14.1773,15.0563,15.5005,16.8412,17.2731,16.539,15.24,15.83,16.67,15.92,17.48,17.2,17.53,17.87,17.54,16.02,15.62,14.94,14.51,...,15.5229,12.0662,11.72,11.5623,10.9802,17.151,17.0187,16.6076,17.1687,15.885,15.6938,15.3199,14.0774,11.9408,12.024,16.5914,15.1087,15.0504,16.8653,17.5017,18.6004,17.901,17.1987,17.515,17.5548,16.7067,15.718,15.2138,14.436,14.1014,14.671,16.0947,17.2181,16.9317,17.9694,18.4819,17.0823,17.4803,17.579,16.574,15.8464,15.4728,14.3268,13.7101,13.2497,12.192,11.9827,11.433,11.1959,11.1406,11.1021,11.0523,11.3213,11.0183,11.506,11.4082,10.976,11.212,11.165,11.5408,13.4585,15.2357,15.4132,16.3289,16.5728,17.1478,17.5774,16.3727,16.6873,17.1394,15.9853,15.6742,15.006,14.1737,11.999,11.8517,11.7257,11.642,11.5877,11.339,11.4227,11.8487,12.094,12.2173,14.0994,15.0973,15.505,16.3894,17.4475,17.4589,18.5093,17.3398,17.4924,17.4356,16.7698,15.7486,15.2436,14.9357,14.2785,14.0153
4,-15,15.5266,14.5395,15.053,12.2294,14.066,14.1665,11.0999,10.4936,13.578,14.729,14.7888,12.85,14.7667,14.9987,15.4138,12.12,12.1401,12.3501,12.59,13.17,13.6889,14.23,14.78,15.4386,16.4331,16.6282,16.28,17.3914,17.3294,16.835,16.78,15.99,16.3267,14.8942,14.68,14.0533,14.005,13.07,12.0863,11.8791,12.1067,12.3717,12.2993,12.3691,12.29,12.4967,12.6,12.9003,13.6294,13.9589,14.54,14.8906,15.6491,16.0158,16.4052,17.285,17.82,17.465,17.1858,17.71,16.1392,16.812,15.9,15.06,17.1842,16.4859,15.0337,13.4248,13.269,12.9409,12.6997,12.8977,12.4355,11.9768,11.8537,11.9119,12.0802,12.379,13.1646,13.2109,14.1776,15.0566,15.4261,16.6004,17.2671,16.5398,15.24,15.83,16.67,15.92,17.48,17.1944,17.53,17.87,17.54,16.02,15.62,14.94,14.51,...,15.439,12.0726,11.72,11.5606,10.9795,17.1441,17.0195,16.6056,17.1694,15.885,15.694,15.321,14.081,11.9436,12.0245,16.4323,15.109,15.016,16.636,17.5002,18.3234,17.9002,17.1998,17.515,17.5456,16.7075,15.7191,15.2136,14.436,14.101,14.6724,16.0258,17.1437,16.9324,17.6616,18.4651,17.0816,17.4741,17.5794,16.5744,15.8464,15.4735,14.3279,13.7095,13.2522,12.192,11.9834,11.4334,11.1954,11.1418,11.1006,11.0587,11.321,11.0194,11.5056,11.3828,10.976,11.2124,11.165,11.538,13.459,15.2364,15.4117,16.2535,16.5726,17.0052,17.1823,16.3712,16.6665,17.1371,15.985,15.6771,15.0067,14.1732,11.9986,11.8529,11.7264,11.6424,11.5896,11.3402,11.4234,11.849,12.0935,12.2171,13.9757,15.0975,15.3604,16.3182,17.2366,17.3811,18.2731,17.3405,17.4317,17.0296,16.7724,15.7498,15.2468,14.9368,14.28,14.0156
5,-18,15.3089,14.5329,15.0533,12.2166,14.0209,14.0059,11.0996,10.5023,13.578,14.729,14.7762,12.85,14.7648,14.9969,15.4046,12.12,12.1403,12.3506,12.59,13.17,13.6885,14.23,14.78,15.4372,16.4204,16.5955,16.28,17.3657,17.305,16.81,16.7,15.76,16.3,14.8863,14.625,14.0333,13.9675,13.0322,12.0844,11.8786,12.0933,12.3667,12.299,12.3686,12.29,12.4943,12.6,12.9005,13.6289,13.9573,14.5387,14.8789,15.6482,16.0096,16.3908,17.27,17.712,17.292,17.1731,17.677,16.1381,16.791,15.61,15.0488,17.1848,16.4853,15.0347,13.4257,13.269,12.9424,12.7007,12.8985,12.4379,11.9836,11.8546,11.9139,12.0808,12.379,13.1649,13.1203,14.1779,15.0569,15.3517,16.3595,17.2612,16.541,15.24,15.56,16.67,15.92,17.48,17.1775,17.4979,17.87,17.54,16.02,15.62,14.94,14.51,...,15.2882,12.0809,11.72,11.56,10.9791,17.1291,17.02,16.602,17.17,15.885,15.694,15.322,14.0852,11.9458,12.025,16.2423,15.1092,14.965,16.3181,17.4623,18.084,17.8991,17.201,17.5151,17.536,16.708,15.7217,15.213,14.436,14.101,14.6738,15.9422,17.0688,16.9324,17.2976,18.4367,17.081,17.4667,17.58,16.575,15.847,15.474,14.3289,13.705,13.255,12.1921,11.984,11.434,11.196,11.143,11.1,11.0629,11.321,11.0201,11.505,11.3678,10.976,11.213,11.165,11.5341,13.459,15.2369,15.41,16.1842,16.572,16.8579,16.9111,16.3683,16.6436,17.1329,15.9851,15.6791,15.0079,14.1721,11.998,11.854,11.727,11.643,11.5918,11.3419,11.424,11.849,12.0931,12.2188,13.8789,15.0984,15.241,16.2599,16.946,17.2957,18.0178,17.341,17.3478,16.6852,16.773,15.751,15.2491,14.938,14.28,14.015
6,-21,15.1067,14.5243,15.0538,12.1997,13.8915,13.5495,11.0993,10.5033,13.5416,14.7275,14.7635,12.85,14.7628,14.995,15.3954,12.12,12.1405,12.351,12.59,13.17,13.688,14.23,14.78,15.4358,16.4077,16.5627,16.28,17.34,17.2806,16.785,16.62,15.34,16.165,14.8784,14.55,14.0133,13.93,13.0204,12.0825,11.878,12.08,12.3617,12.2988,12.3681,12.29,12.4918,12.6009,12.9008,13.6284,13.9556,14.5369,14.8672,15.6474,16.0033,16.3764,17.255,17.604,16.968,17.1604,17.644,16.1369,16.77,15.1129,15.0375,17.1842,16.4857,15.0333,13.4262,13.2694,12.9432,12.7014,12.8976,12.4388,11.9868,11.8552,11.8981,12.0814,12.3792,13.1645,13.0758,14.1492,15.0509,15.2105,16.1015,17.0724,16.5423,15.24,15.18,16.67,15.9085,17.3933,17.1606,17.4014,17.8271,17.54,16.018,15.62,14.94,14.51,...,15.1241,12.0791,11.7219,11.5636,10.9796,16.8913,17.02,16.5912,17.17,15.8863,15.6946,15.3214,14.0863,11.9448,12.025,15.888,15.1103,14.5695,15.962,17.1945,17.6897,17.8547,17.201,17.517,17.5115,16.708,15.7172,15.2141,14.4366,14.1016,14.6715,15.797,16.9247,16.9234,17.1231,18.3478,17.081,17.4563,17.5806,16.575,15.847,15.474,14.3296,13.7002,13.2544,12.1927,11.984,11.4347,11.1966,11.143,11.0994,11.063,11.3216,11.0213,11.5038,11.3734,10.9766,11.2124,11.1656,11.5059,13.4568,15.2346,15.4024,16.1478,16.461,16.6917,16.666,16.3674,16.6019,17.1024,15.9856,15.6797,15.0097,14.1731,11.9986,11.854,11.7276,11.6436,11.5903,11.3414,11.424,11.8502,12.0936,12.2175,13.8321,15.1006,15.199,16.0858,16.3247,17.1476,17.9521,17.3416,17.2506,16.3501,16.7742,15.7504,15.2496,14.9387,14.2806,14.0144
7,-24,14.959,14.5139,15.0544,12.1801,13.6392,13.1479,11.099,10.5048,13.5051,14.7226,14.7508,12.85,14.7609,14.9931,15.3862,12.12,12.1407,12.3514,12.59,13.17,13.6875,14.23,14.78,15.4344,16.395,16.53,16.28,17.3143,17.2562,16.76,16.48,14.98,16.03,14.8705,14.4067,13.9942,13.8925,13.0087,12.0806,11.8775,12.0718,12.3567,12.2985,12.3675,12.29,12.4893,12.6022,12.901,13.6279,13.954,14.535,14.8556,15.6465,15.9971,16.362,17.24,17.496,16.7714,17.1477,17.611,16.1358,16.749,14.9671,14.9425,17.183,16.4869,15.0307,13.4265,13.27,12.9435,12.702,12.8942,12.4385,11.9865,11.8555,11.8617,12.082,12.3795,13.1635,13.0666,14.0916,15.0408,14.9929,15.8266,16.6747,16.5163,15.205,15.1388,16.542,15.8969,16.555,17.1438,17.305,17.7843,17.54,16.012,15.62,14.94,14.51,...,14.96,12.0795,11.7235,11.5664,10.98,16.531,17.0203,16.5721,17.1704,15.8868,15.695,15.321,14.0872,11.9457,12.0252,15.501,15.1113,14.2866,15.6642,16.6368,17.1727,17.694,17.2015,17.5159,17.4336,16.7081,15.7136,15.2154,14.4368,14.1025,14.6691,15.6368,16.7594,16.7015,16.9915,18.1696,17.082,17.4331,17.581,16.575,15.8466,15.4742,14.3298,13.6936,13.2542,12.193,11.984,11.4353,11.1974,11.1432,11.0983,11.063,11.322,11.0216,11.5014,11.3799,10.9774,11.2114,11.1662,11.4734,13.4169,15.2268,15.3819,15.9876,16.1103,16.461,16.3567,16.3672,16.5525,17.0419,15.9862,15.6805,15.0116,14.1742,11.9992,11.8535,11.7282,11.644,11.5898,11.3396,11.4244,11.851,12.0942,12.2158,13.7778,15.1018,15.132,15.8325,15.8529,16.955,17.8745,17.3391,17.107,16.0331,16.7752,15.7504,15.2502,14.939,14.2812,14.0142
8,-27,14.8353,14.5027,15.0533,12.1588,13.3449,12.8057,11.099,10.5043,13.481,14.7223,14.7381,12.85,14.7589,14.9912,15.362,12.12,12.1409,12.3518,12.59,13.17,13.6871,14.23,14.78,15.433,16.3823,16.4567,16.28,17.2886,17.1771,16.24,16.372,14.44,15.895,14.8626,14.165,13.9767,13.715,12.997,12.0793,11.877,12.0636,12.3517,12.2982,12.367,12.29,12.4869,12.6034,12.9012,13.6275,13.9523,14.5331,14.8439,15.6456,15.9908,16.3476,17.225,17.06,16.6386,17.135,17.578,16.1346,16.728,14.8567,14.68,17.1818,16.4881,15.028,13.4268,13.2706,12.9438,12.7026,12.8909,12.4382,11.9862,11.8558,11.8254,12.0826,12.3798,13.1626,13.0574,14.0341,15.0307,14.7753,15.5516,16.277,16.4496,15.1167,15.0975,16.35,15.8854,16.1767,17.1269,17.2086,17.6,17.54,16.006,15.62,14.94,14.51,...,14.7319,12.0831,11.7246,11.5676,10.98,15.8333,17.0209,16.5255,17.1715,15.8862,15.695,15.321,14.0878,11.9503,12.0258,15.0397,15.1119,14.1984,15.4333,15.4991,16.463,17.1671,17.2033,17.5118,17.2634,16.7087,15.7125,15.2173,14.4362,14.1036,14.6664,15.451,16.5665,16.1602,16.909,17.8114,17.0844,17.3693,17.581,16.575,15.8455,15.4748,14.3292,13.6834,13.2548,12.193,11.984,11.4359,11.1986,11.1438,11.0958,11.063,11.322,11.0205,11.497,11.3879,10.9786,11.2097,11.1668,11.4352,13.2741,15.2035,15.3317,15.6414,15.4509,16.1335,15.933,16.3678,16.4954,16.9298,15.9868,15.6816,15.0135,14.1749,11.9999,11.8523,11.7288,11.644,11.5927,11.3361,11.4255,11.851,12.0947,12.2146,13.7124,15.1012,15.0151,15.4495,15.6051,16.6933,17.7833,17.3313,16.8946,15.7435,16.7759,15.7516,15.2508,14.939,14.2818,14.0148
9,-30,14.7204,14.4671,15.0274,12.1231,13.0881,12.7865,11.099,10.5059,13.463,14.7068,14.7254,12.85,14.757,14.9894,15.308,12.12,12.1411,12.3522,12.59,13.17,13.6866,14.23,14.78,15.4316,16.3696,16.3833,16.0533,17.1455,16.9886,16.145,16.185,14.0107,15.76,14.8175,14.07,13.9592,13.37,12.9852,12.0783,11.8764,12.0555,12.3469,12.2979,12.3665,12.29,12.4844,12.6047,12.9015,13.627,13.9507,14.5312,14.8182,15.6447,15.9846,16.3332,17.21,16.4367,16.43,17.1223,17.545,16.1335,16.707,14.7829,14.5027,17.1809,16.4889,15.0246,13.4271,13.271,12.9441,12.7033,12.888,12.4381,11.987,11.8562,11.7981,12.0796,12.38,13.1624,13.0485,13.9736,14.9535,14.5684,15.2807,15.858,16.3829,14.9567,14.995,16.158,15.8738,16.03,17.11,17.1121,17.18,17.54,16.0,15.62,14.94,14.51,...,14.4203,12.0859,11.7242,11.5684,10.9804,14.9542,17.0201,16.4715,16.9691,15.8845,15.6954,15.3221,14.088,11.9512,12.0264,14.6839,15.1092,14.1615,15.2463,14.6548,15.6543,16.2879,17.2044,17.4977,16.9925,16.7093,15.712,15.218,14.436,14.1053,14.6635,15.3633,16.353,15.543,16.7298,17.2582,17.0841,17.3386,17.5818,16.5754,15.8461,15.4746,14.329,13.651,13.2566,12.1934,11.9845,11.4365,11.199,11.144,11.0929,11.0622,11.3224,11.021,11.4955,11.39,10.9799,11.2098,11.1674,11.4187,12.9103,15.1558,15.2361,15.3604,15.081,15.775,15.4834,16.3387,16.3664,16.7887,15.981,15.6824,15.0149,14.1754,12.0005,11.8505,11.7294,11.6444,11.5944,11.3326,11.4267,11.8514,12.095,12.2116,13.5869,15.0574,14.8725,15.0616,15.4291,16.311,17.6314,17.3188,16.7751,15.5899,16.7755,15.7532,15.251,14.939,14.2828,14.0154


In [None]:
# def groupbyVaex(df:vx.dataframe):
#     return df.groupby('z').agg('mean')
# df = vx.open(parquetFile)
df.sample(10).groupby(by='z',agg='mean',progress=True)
# gbPolars = groupbyVaex(df=df)
# pd.testing.assert_frame_equal(gbPandas,gbPolars.to_pandas().set_index('z'))

groupby [####################--------------------] 50.00% estimated time:     0.11s =  0.0m =  0.0h 

In [80]:
df = pl.read_parquet(parquetFile).to_pandas()
printmd('**Pandas**')
%timeit -n 1 -r 1 groupbyPandas(df=df)

**Pandas**

2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [81]:
df = pl.read_parquet(parquetFile)
printmd('**Polars**')
%timeit -n 1 -r 1 groupbyPolars(df=df)

**Polars**

422 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
