In [1]:
import pandas as pd
import numpy as np

def create_sample_dataframe(n_rows=1000000, n_cols=100):
    """
    Create a pandas dataframe containing n_rows rows and n_cols columns
    and mess up with it by changing the dots (.) by commas (,).
    """
    cpf = np.random.randint(1, 999999999, size=n_rows)
    variables = {f'column_{col_number}': np.random.random(n_rows) for col_number in range(n_cols)}
    variables.update({'CPF': cpf})  

    return pd.DataFrame(variables).applymap(lambda x : str(x).replace('.',','))

df = create_sample_dataframe()

In [2]:
%%time

# applymap executes the results in a vectorized manner (in chunks of 1024 at a time)
results_float = df.applymap(lambda x : float(x.replace(',','.')))

CPU times: user 1min 2s, sys: 11.9 s, total: 1min 14s
Wall time: 1min 18s


In [11]:
%%time

results_astype = df.applymap(lambda x : x.replace(',','.')).astype(float)

CPU times: user 1min 11s, sys: 1min 14s, total: 2min 26s
Wall time: 3min 26s


In [3]:
from tqdm.auto import tqdm
tqdm.pandas(desc="Applying transformation")
# now you canse use the method .progress_applymap
results_tqdm = df.progress_applymap(lambda x : float(x.replace(',','.')))

  from pandas import Panel


HBox(children=(FloatProgress(value=0.0, description='Applying transformation', max=101000000.0, style=Progress…




In [8]:
import swifter

In [None]:
from tqdm.auto import tqdm
import re

tqdm.pandas(desc="Applying transformation")
# now you canse use the method .progress_applymap
results_tqdm = df.swifter.progress_applymap(lambda x : float(re.sub(x, ',','.')))

In [None]:
from tqdm.auto import tqdm
import re

tqdm.pandas(desc="Applying transformation")
# now you canse use the method .progress_applymap
results_tqdm = df.swifter.progress_applymap(lambda x : float(re.sub(x, ',','.')))

In [2]:
%%time

# applymap executes the results in a vectorized manner (in chunks of 1024 at a time)
results = df.applymap(lambda x : x.replace(',','.')).astype(float)

CPU times: user 1min 11s, sys: 1min 7s, total: 2min 19s
Wall time: 3min 7s


In [3]:
%%time

# applymap executes the results in a vectorized manner (in chunks of 1024 at a time)
results_float = df.applymap(lambda x : float(x.replace(',','.')))

CPU times: user 1min 3s, sys: 14.1 s, total: 1min 17s
Wall time: 1min 27s


In [6]:
import swifter

In [5]:
df.head()

Unnamed: 0,column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,...,column_91,column_92,column_93,column_94,column_95,column_96,column_97,column_98,column_99,CPF
0,8944059737095,5681785827371026,7383304922962252,4011156405363565,3883991464218194,7403460143795301,6789134664944976,6454792524540731,5973447838822474,9093396495190387,...,20078413245212745,638813896903054,32572724048072055,628891495265339,7173676040506948,7141608953458511,9546820579643402,6761977692318312,48698139905629756,282103147
1,2864605597067462,5960995273531611,5848902820009593,8507212275581756,4160479518195267,26952187892745416,984567546754045,9691684542991319,387243360420228,580906132326933,...,6895151307030839,3177412870171712,17310886125306968,25133895523434724,27499701459683423,9391867343283499,496642819174105,8260141594475012,8032838912406364,808018254
2,8572805870949307,21293803700021596,7024097357738357,41111824249806206,42871668791853335,4702733717161294,7750936602559269,5375866547014164,18446808757978106,15440360640075423,...,111702738217479,8200482439758889,9123305277989886,17023948862046634,8508441569368141,6464777086957889,9247043146516686,5320035580357477,47885892036018707,688301128
3,33578700091268154,7312039746114422,7478343706042496,8594402075664087,43482935080050533,4274366757324706,2958175381631045,7316339869818789,7061935055444601,34516877552351566,...,9756653318443067,31671130071217846,4611728568168547,39120153514870415,6181653763205216,6372414425765245,2311577494178002,5135578013342201,816438066095404,330626865
4,6246383312332069,24270179178224294,4610584253361256,8681442796035721,5797251557288722,8963976926950546,4934501978239636,4268207780426093,2962149555880804,7880665363187249,...,54795184606254654,6592300357149661,1217410577764233,7030885605555837,9707525108176613,185167974031611,41372045072057917,17769071916766443,21620651852206618,874593677


In [8]:
%%time

results_swifter = df.swifter.applymap(lambda x : x.replace(',','.'))

CPU times: user 5.3 s, sys: 17.6 s, total: 22.9 s
Wall time: 27.5 s


In [12]:
df_bkp = df.copy()

In [14]:
from tqdm.auto import tqdm

for col in tqdm(df.columns):
    df[col].apply(lambda x : x.replace(',','.'))

HBox(children=(FloatProgress(value=0.0, max=101.0), HTML(value='')))




In [36]:
df[['column_3','column_1','column_2']].column_1.swifter.apply(lambda x : x.replace(',','.'))

  from pandas import Panel


HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=1000000.0, style=ProgressStyle(descrip…




0          0.5681785827371026
1         0.05960995273531611
2         0.21293803700021596
3          0.7312039746114422
4         0.24270179178224294
                 ...         
999995    0.05998580364287465
999996     0.5956784202268005
999997     0.2082541154143227
999998    0.07290311300121666
999999     0.5625249460169565
Name: column_1, Length: 1000000, dtype: object

In [17]:
%%timeit

df['column_0'].apply(lambda x : x.replace(',','.'), )

289 ms ± 3.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
%%prun 

df['column_0'] = df['column_0'].str.replace(',','.').astype(float)

 

In [40]:

df['column_1'].apply(lambda x : float(x.replace(',','.')))

 

In [38]:
%%prun 

pd.to_numeric(df['column_1'].apply(lambda x : x.replace(',','.')))

 

In [None]:
# To see tqdm bar in the applymap method: (~twice as slow, though) 
from tqdm.auto import tqdm
tqdm.pandas(desc="Applying transformation")
# now you canse use the method .progress_applymap
results_tqdm = df.progress_applymap(lambda x : x.replace(',','.')).astype(float)

                          