## Requirements

In [3]:
import datetime
import pandas as pd
import polars as pl

## Reading CSV data

The file `large_data_0001.csv` is not part of this repository due to its size.  It can be generated using the `create_csv_data.py` script.  However, note that this takes a couple of minutes for 2,500,000 rows and 100 columns.

In [4]:
file_name = 'large_data_0001.csv'

In [7]:
%time df_polars = pl.read_csv('large_data_0001.csv', try_parse_dates=True)

CPU times: user 3.48 s, sys: 1.75 s, total: 5.22 s
Wall time: 890 ms


In [8]:
%time df_pandas = pd.read_csv(file_name, parse_dates=['timestamp',])

CPU times: user 8.52 s, sys: 2.33 s, total: 10.9 s
Wall time: 10.8 s


Note that polars is clearly much faster and using multiple threads as the CPU time exceeds the walltime by a factor of 4.

In [10]:
df_pandas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 788323 entries, 0 to 788322
Columns: 101 entries, timestamp to C100
dtypes: datetime64[ns](1), float64(100)
memory usage: 607.5 MB


In [11]:
df_polars.estimated_size()

636964984

The size of the dataframes in memory is comparable.

The data consists of a single column with timestamps, and 100 columns of random double precision floating point values.

## Group by

To compare performance

In [24]:
days_pandas = df_pandas.groupby(df_pandas.timestamp.dt.day).mean().drop('timestamp', axis=1)

In [25]:
days_pandas

Unnamed: 0_level_0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,...,C91,C92,C93,C94,C95,C96,C97,C98,C99,C100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,1.616301,1.300036,0.817093,-0.91363,0.535643,-0.429099,1.379245,1.250309,0.573959,1.237401,...,1.028077,0.495149,0.427012,1.460489,1.121102,0.547403,0.544154,0.869386,0.418865,0.095361
9,1.786842,-0.056498,1.201573,-3.228591,0.872878,-1.87554,0.358851,1.010238,0.715874,0.827519,...,-0.706015,-1.444457,2.04932,3.110629,0.452043,0.017991,1.616743,2.704365,1.580477,0.372258
10,1.166582,-1.765113,2.616493,-4.584708,0.898354,-1.781852,-1.061848,3.12399,-1.051368,0.092403,...,-0.666805,-0.931251,5.258575,4.48965,0.336976,-2.257478,2.266737,3.005411,0.266224,0.728347
11,0.684858,0.075567,3.814915,-5.174099,-1.534156,-0.894954,-2.950941,6.056719,-2.593477,0.549919,...,-0.150487,-0.922968,4.638803,6.565021,-2.114639,-2.469373,2.944349,1.892101,0.34317,-0.24744
12,1.235231,1.158072,3.888712,-3.935502,-1.39885,0.48998,-3.206239,6.448509,-3.039776,-2.741378,...,2.216628,-0.64744,3.961334,8.398948,-4.383207,-1.800691,1.611947,0.44616,1.459684,1.974048
13,2.143065,0.349599,2.831058,-4.061167,-0.497423,2.322625,-3.918994,5.501107,-4.203526,-4.138458,...,4.040739,0.269732,2.153507,8.603117,-3.973901,-2.322601,1.501349,-0.178385,1.480074,3.288867
14,3.68937,0.905805,1.035925,-2.450461,0.197875,3.995446,-4.549313,6.753407,-5.230918,-5.653404,...,3.530662,1.607023,-1.462408,6.0478,-6.2362,-4.593256,1.584021,-0.868583,3.15644,3.118407
15,5.932281,2.894121,-0.157428,0.432282,1.267441,3.859335,-6.094502,5.347022,-4.258348,-3.587544,...,4.367099,-0.078792,-1.9476,6.475665,-8.298667,-7.190138,2.670641,-3.040704,5.743192,2.828484
16,7.517741,-0.668608,-2.349386,0.576194,3.205817,3.418862,-5.238684,5.185432,-2.962818,-2.843899,...,6.128017,-0.476227,-2.057771,5.692659,-10.03545,-8.959109,0.89354,-3.915458,6.361079,2.19918
17,8.268051,-2.470533,-2.288816,0.439305,2.777163,3.453478,-5.221394,6.291019,-4.465589,-2.325201,...,6.942416,0.633056,-2.2423,4.354759,-11.718617,-7.888048,-0.178953,-6.511815,5.713905,3.004642


In [18]:
days_polars = df_polars.group_by_dynamic('timestamp', every='1d').agg(pl.exclude('timestamp').mean())

In [19]:
days_polars

timestamp,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,…,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86,C87,C88,C89,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99,C100
datetime[μs],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2024-09-08 00:00:00,1.616301,1.300036,0.817093,-0.91363,0.535643,-0.429099,1.379245,1.250309,0.573959,1.237401,0.982781,1.159419,0.585748,2.077831,1.345749,1.448089,1.339835,1.6741,1.16905,1.125851,1.4459,1.007367,1.406774,1.651724,2.013898,0.903761,1.686244,2.16086,0.48558,0.60199,0.911072,1.105152,1.249857,2.314174,-0.020963,1.621261,…,1.973455,0.193607,-0.228364,2.240416,0.523938,-0.060897,2.703007,0.729973,1.013803,1.009531,0.740613,1.451533,2.310108,1.665348,0.983064,0.342018,0.904942,1.264219,1.118301,1.154367,0.917321,1.626718,0.423192,0.455271,0.949298,1.498886,0.752201,1.028077,0.495149,0.427012,1.460489,1.121102,0.547403,0.544154,0.869386,0.418865,0.095361
2024-09-09 00:00:00,1.786842,-0.056498,1.201573,-3.228591,0.872878,-1.87554,0.358851,1.010238,0.715874,0.827519,2.091645,1.052777,-0.118401,1.590183,2.055889,0.140329,2.170482,2.159051,1.638223,1.479489,2.707478,2.720873,2.553631,0.043101,2.904378,1.331285,2.855481,3.873791,1.276888,0.344223,3.335522,0.844653,1.134549,2.265679,0.05006,2.27837,…,0.460976,0.410341,0.108861,4.748316,1.627707,0.539094,3.540375,-0.552004,3.050409,0.69322,2.125061,0.401773,3.052611,1.326555,1.08147,-2.080009,-0.369088,0.746834,3.414918,0.333866,0.105144,-0.072555,-0.910073,-0.06145,0.458449,2.09503,1.380138,-0.706015,-1.444457,2.04932,3.110629,0.452043,0.017991,1.616743,2.704365,1.580477,0.372258
2024-09-10 00:00:00,1.166582,-1.765113,2.616493,-4.584708,0.898354,-1.781852,-1.061848,3.12399,-1.051368,0.092403,2.106916,-0.72748,0.498398,1.416709,0.363319,-0.391991,3.839071,-0.559622,1.432966,2.574934,3.257619,3.773844,1.94342,0.487923,4.609183,0.432212,2.489001,5.194257,1.394994,-1.117632,5.396862,1.058438,-0.544048,3.556528,-0.427315,2.231885,…,-2.881626,0.569615,-1.261293,5.996657,2.038671,1.031554,4.983702,1.927114,3.448345,0.929329,1.797745,1.851652,5.041373,2.039314,3.29611,-1.226162,0.158645,-1.090539,4.955187,-0.847299,2.466571,-0.566162,-0.115582,-1.271779,-0.900249,1.29768,-0.05552,-0.666805,-0.931251,5.258575,4.48965,0.336976,-2.257478,2.266737,3.005411,0.266224,0.728347
2024-09-11 00:00:00,0.684858,0.075567,3.814915,-5.174099,-1.534156,-0.894954,-2.950941,6.056719,-2.593477,0.549919,3.437001,-2.011032,1.015086,1.589247,-1.553823,-1.732432,5.557026,-0.534163,1.742049,2.292838,2.839152,2.494044,4.129099,3.513298,5.858024,0.533401,3.977967,3.367904,0.175142,-1.95852,5.924509,3.221549,-2.133666,2.881073,-0.571055,0.948886,…,-4.786809,1.142252,-3.188121,6.507497,-0.666797,0.535986,4.66567,3.929653,2.002436,2.061943,1.742559,3.647125,6.660319,0.926644,3.59704,-0.11981,0.077828,-3.857305,6.467044,-2.50863,2.708966,1.772397,0.719615,-1.489783,-2.405882,0.509248,-1.610418,-0.150487,-0.922968,4.638803,6.565021,-2.114639,-2.469373,2.944349,1.892101,0.34317,-0.24744
2024-09-12 00:00:00,1.235231,1.158072,3.888712,-3.935502,-1.39885,0.48998,-3.206239,6.448509,-3.039776,-2.741378,3.292258,-1.328935,1.091565,3.241458,-1.942603,-2.442468,4.576457,-0.607023,3.264525,2.481576,3.089234,1.256887,4.097257,4.340677,4.585012,0.637519,4.171094,0.218176,-3.239449,0.180732,4.846397,4.185399,-2.495979,3.723028,2.443621,-0.20868,…,-3.441645,1.108775,-1.878083,6.426544,-2.17253,2.38261,4.544575,5.203969,1.507189,3.275772,1.413667,3.411534,6.781933,0.198718,2.15762,1.793607,-1.232916,-4.271413,7.815731,-3.907045,3.654803,3.66498,1.935722,-0.288148,-3.070293,-0.746518,-1.875368,2.216628,-0.64744,3.961334,8.398948,-4.383207,-1.800691,1.611947,0.44616,1.459684,1.974048
2024-09-13 00:00:00,2.143065,0.349599,2.831058,-4.061167,-0.497423,2.322625,-3.918994,5.501107,-4.203526,-4.138458,2.289986,-0.387025,0.51745,3.327066,-0.729529,-4.005852,5.455174,-0.766733,1.826089,2.457233,2.775518,-2.161288,2.321094,6.338865,3.58725,1.011957,2.155379,-2.228234,-1.870754,1.196543,2.069739,5.737658,-2.350389,6.229373,6.908057,1.81484,…,-3.649144,-0.495649,-2.05956,6.667012,-3.24819,2.73069,4.887371,6.925909,2.023793,5.549064,0.025296,4.60181,5.296645,0.77579,5.825648,1.947121,-0.602369,-4.279528,8.019723,-2.550035,3.023903,3.01404,1.207319,-2.977805,-2.334758,-1.400146,-2.24348,4.040739,0.269732,2.153507,8.603117,-3.973901,-2.322601,1.501349,-0.178385,1.480074,3.288867
2024-09-14 00:00:00,3.68937,0.905805,1.035925,-2.450461,0.197875,3.995446,-4.549313,6.753407,-5.230918,-5.653404,2.938637,0.153726,1.197427,2.067732,-1.365605,-5.68802,4.539515,-1.777085,1.622908,3.525372,0.698851,-3.145046,1.997584,5.499385,3.285727,-1.987387,2.055214,-3.124402,-0.366389,1.446609,1.710554,6.587202,0.062492,7.041162,5.291422,1.38144,…,-3.494436,-1.194484,-1.848601,8.443746,-2.532608,2.584296,5.737177,10.316784,0.414589,4.43434,-0.519822,5.738226,5.924964,0.894796,5.122497,2.079483,0.913236,-2.081347,9.135655,-1.885221,1.692249,3.962532,1.388968,-1.824095,-4.063396,1.790472,-3.882066,3.530662,1.607023,-1.462408,6.0478,-6.2362,-4.593256,1.584021,-0.868583,3.15644,3.118407
2024-09-15 00:00:00,5.932281,2.894121,-0.157428,0.432282,1.267441,3.859335,-6.094502,5.347022,-4.258348,-3.587544,3.663918,-4.18298,1.212095,3.61274,-0.61018,-4.451037,2.743829,-3.875607,1.029112,5.290608,1.67304,-2.656575,2.080336,4.628141,1.544832,-1.280446,3.877778,-2.796863,-1.458438,1.727956,-1.082872,7.785599,1.103394,8.09311,4.422299,-0.447836,…,-1.69825,-0.576541,-1.86137,8.697586,-3.124559,2.479545,6.906934,10.803066,0.282833,2.284327,-0.326932,7.172197,5.945314,3.088151,2.197997,-0.27037,-1.429124,-1.30821,11.043346,-3.719491,-0.113464,4.767791,2.506543,-2.080722,-2.380697,2.936742,-6.838807,4.367099,-0.078792,-1.9476,6.475665,-8.298667,-7.190138,2.670641,-3.040704,5.743192,2.828484
2024-09-16 00:00:00,7.517741,-0.668608,-2.349386,0.576194,3.205817,3.418862,-5.238684,5.185432,-2.962818,-2.843899,4.891795,-4.547975,1.541345,2.686747,-0.905386,-3.911697,2.195091,-5.593515,0.622991,4.658961,1.252889,-4.243377,3.599806,5.847883,-0.225521,-1.491861,4.306071,-1.05586,-4.476028,4.9063,-3.422875,6.70291,-0.115326,10.419873,5.171338,-1.505992,…,-1.140233,-0.409036,-2.908541,9.416229,-4.08284,5.125313,7.58932,10.250572,-0.558202,2.438107,-1.391983,10.640271,4.15382,4.113465,2.457383,-2.969235,-1.040482,-2.388332,7.528498,-6.64598,0.688663,5.440681,2.436438,-2.117734,-3.446317,1.096174,-7.180263,6.128017,-0.476227,-2.057771,5.692659,-10.03545,-8.959109,0.89354,-3.915458,6.361079,2.19918
2024-09-17 00:00:00,8.268051,-2.470533,-2.288816,0.439305,2.777163,3.453478,-5.221394,6.291019,-4.465589,-2.325201,4.4229,-6.556634,0.562967,1.414037,-0.563072,-2.355096,1.524032,-4.137663,2.088169,4.103109,0.953313,-3.695529,4.471817,7.70909,-1.842608,-2.125707,4.269358,0.21231,-4.837307,3.963298,-3.396995,4.917516,-1.507746,10.185949,7.713718,-2.489582,…,-2.92,-1.0288,-1.021764,9.170211,-4.298342,6.045182,8.165771,9.693711,-1.630529,2.791592,-1.688646,11.959492,1.889026,3.290468,2.151743,-3.97619,-0.332475,-3.876011,7.277666,-7.432629,0.214592,2.865362,3.299368,-0.088393,-3.592759,2.012585,-7.478848,6.942416,0.633056,-2.2423,4.354759,-11.718617,-7.888048,-0.178953,-6.511815,5.713905,3.004642


In [26]:
%timeit days_pandas = df_pandas.groupby(df_pandas.timestamp.dt.day).mean().drop('timestamp', axis=1)

612 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [27]:
%timeit days_polars = df_polars.group_by_dynamic('timestamp', every='1d').agg(pl.exclude('timestamp').mean())

130 ms ± 2.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
%timeit df_pandas['avg'] = df_pandas[[f'C{i}' for i in range(1, 101)]].sum(axis=1)

255 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
%timeit df_polars.select(pl.all(), pl.mean_horizontal(pl.exclude('timestamp')).alias('avg'))

54.5 ms ± 1.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


On these operations, polars is faster as well.