<div style="line-height:0.5">
<h1 style="color:lime"> Time calculation </h1>
<span style="display: inline-block;">
    <h3 style="color: lightblue; display: inline;">Keywords:</h3> cProfile profiler + pandas IndexSlice() + pandas apply() + time.time()
</span>
</div>

In [50]:
import time
import timeit
import cProfile
import datetime
import numpy as np
import pandas as pd

<h2 style="color:lime"> <u> Example #1 </u></h2>
Using time

In [24]:
def method_to_measure_11():
    # Generate two random 
    size = 10000
    array1 = np.random.rand(size, size)
    array2 = np.random.rand(size, size)
    
    # Perform element-wise multiplication
    multiplied_array = np.multiply(array1, array2)
    # Perform element-wise addition
    added_array = np.add(multiplied_array, array1)
    # Calculate the sum of all elements
    result = np.sum(added_array)
    
    return result

In [25]:
def method_to_measure_12():
    size = 10000
    matrix1 = np.random.rand(size, size)
    matrix2 = np.random.rand(size, size)
    
    # Matrix multiplication
    result_matrix = np.matmul(matrix1, matrix2)
    # Square each element in the result_matrix
    squared_matrix = [[x**2 for x in row] for row in result_matrix]
    # Calculate the sum of squared elements
    sum_of_squares = sum(sum(row) for row in squared_matrix)
    
    return sum_of_squares

In [26]:
start_time = time.time()
result_numpy_complex = method_to_measure_11()
end_time = time.time()
elapsed_time = end_time - start_time
print(f"The execution time of Method 11 is : {elapsed_time} seconds")
print(f"Result: {result_numpy_complex}")

The execution time of Method 11 is : 8.13084602355957 seconds
Result: 75000272.60400487


In [27]:
start_time = time.time()
result_numpy_complex = method_to_measure_12()
end_time = time.time()
elapsed_time = end_time - start_time
print(f"The execution time of Method 12 is : {elapsed_time} seconds")
print(f"Result: {result_numpy_complex}")

The execution time of Method 12 is : 111.8476243019104 seconds
Result: 625017595248108.0


<h2 style="color:lime"> <u> Example #2 </u></h2>
Using time (but with context manager)

In [34]:
class Timer:
    def __enter__(self):
        self.start_time = time.time()
        return self

    def __exit__(self, *args):
        self.end_time = time.time()
        self.elapsed_time = self.end_time - self.start_time
        print(f"Method 12 Execution time: {self.elapsed_time} seconds")

# Use Timer within a context manager
with Timer():
    method_to_measure_12

Method 12 Execution time: 1.6689300537109375e-06 seconds


<h2 style="color:lime"> <u> Example #3 </u></h2>
Using timeit

In [9]:
level_1 = ['A1', 'A1', 'A1', 'B1', 'B1', 'B1', 'C1', 'C1', 'C1']
level_2 = ['a_1', 'b_1', 'c_1', 'a_1', 'b_1', 'c_1', 'a_1', 'b_1', 'c_1']

matri_data = [['a', 23, 'h', 'o', 45, 'v', 'a3', 1, 'b1'], ['b', 34, 'i', 'p', 3, 'w', 'a4', 32, 'b2'], 
        ['c', 5, 'j', 'q', 7, 'x', 'a5', 6, 'b3'], ['d', 2, 'k', 'r', 5, 'y', 'a6', 76, 'b4'], 
        ['e', 78, 'l', 's', 65, 'z', 'a7', 9, 'b5'], ['f', 98, 'm', 't', 23, 'a1', 'a8', 14, 'b6'], 
        ['g', 3, 'n', 'u', 1, 'a2', 'a9', 45, 'b7']]

In [10]:
""" Multi-level column labels. 
Create a MultiIndex object to set hierarchical column labels by taking a list of tuples which contains labels for each level. 
""" 
columns = pd.MultiIndex.from_tuples(list(zip(level_1, level_2)))
df1 = pd.DataFrame(matri_data, columns=columns)
date = ['1/1/2023','1/2/2023','1/3/2023','1/4/2023','1/5/2023','1/6/2023','1/7/2023']

## Insert date 
df1.insert(0, 'date', date)
df1.set_index('date', inplace=True)
df1

Unnamed: 0_level_0,A1,A1,A1,B1,B1,B1,C1,C1,C1
Unnamed: 0_level_1,a_1,b_1,c_1,a_1,b_1,c_1,a_1,b_1,c_1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1/1/2023,a,23,h,o,45,v,a3,1,b1
1/2/2023,b,34,i,p,3,w,a4,32,b2
1/3/2023,c,5,j,q,7,x,a5,6,b3
1/4/2023,d,2,k,r,5,y,a6,76,b4
1/5/2023,e,78,l,s,65,z,a7,9,b5
1/6/2023,f,98,m,t,23,a1,a8,14,b6
1/7/2023,g,3,n,u,1,a2,a9,45,b7


In [11]:
""" Task 1 with loop.
Filter rows with conditions.
N.B.
For each column_name (A1,B1,C1):
    # 'b_1' column values greater than 30 
    # 'a_1' column values equal to 'c'.
"""
for column_name in df1.columns.get_level_values(0).unique():
    df1.loc[(df1[column_name, 'b_1'] > 30) | (df1[column_name, 'a_1'] == 'c'), (column_name,'e_1')] = 1

df1 

Unnamed: 0_level_0,A1,A1,A1,B1,B1,B1,C1,C1,C1,A1,B1,C1
Unnamed: 0_level_1,a_1,b_1,c_1,a_1,b_1,c_1,a_1,b_1,c_1,e_1,e_1,e_1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1/1/2023,a,23,h,o,45,v,a3,1,b1,,1.0,
1/2/2023,b,34,i,p,3,w,a4,32,b2,1.0,,1.0
1/3/2023,c,5,j,q,7,x,a5,6,b3,1.0,,
1/4/2023,d,2,k,r,5,y,a6,76,b4,,,1.0
1/5/2023,e,78,l,s,65,z,a7,9,b5,1.0,1.0,
1/6/2023,f,98,m,t,23,a1,a8,14,b6,1.0,,
1/7/2023,g,3,n,u,1,a2,a9,45,b7,,,1.0


In [12]:
df2 = df1

In [13]:
""" Mask dataframe.
N.B.
"df1[mask]" => returns the very same df1 !
"""
mask_for_all_column_name_wrong = (df1.loc[:, pd.IndexSlice[:, 'b_1']] > 30).any(axis=1) | \
    (df1.loc[:, pd.IndexSlice[:, 'a_1']] == 'c').any(axis=1)
mask_for_all_column_name_wrong

date
1/1/2023    True
1/2/2023    True
1/3/2023    True
1/4/2023    True
1/5/2023    True
1/6/2023    True
1/7/2023    True
dtype: bool

In [14]:
""" Mask as a list of lists representing the conditions for each date and column combination. """
mask_correct = []

for date, data in df1.groupby(level=0):
    date_mask_correct = []
    for col in ['A1', 'B1', 'C1']:
        condition_b1 = (data[(col, 'b_1')] > 30).any()
        condition_a1 = (data[(col, 'a_1')] == 'c').any()

        # Append True if either condition is met, otherwise append False
        date_mask_correct.append(condition_b1 or condition_a1)

    # Append the mask for the current date to the main mask
    mask_correct.append(date_mask_correct)

mask_correct

[[False, True, False],
 [True, False, True],
 [True, False, False],
 [False, False, True],
 [True, True, False],
 [True, False, False],
 [False, False, True]]

In [15]:
""" Replace NaNs and 1.0 values in 'e_1' columns with 0 and 1, respectively """

df1[('A1', 'e_1')] = df1[('A1', 'e_1')].fillna(0).replace(1.0, 1)
df1[('B1', 'e_1')] = df1[('B1', 'e_1')].fillna(0).replace(1.0, 1)
df1[('C1', 'e_1')] = df1[('C1', 'e_1')].fillna(0).replace(1.0, 1)

print(df1)

          A1          B1          C1           A1   B1   C1
         a_1 b_1 c_1 a_1 b_1 c_1 a_1 b_1 c_1  e_1  e_1  e_1
date                                                       
1/1/2023   a  23   h   o  45   v  a3   1  b1  0.0  1.0  0.0
1/2/2023   b  34   i   p   3   w  a4  32  b2  1.0  0.0  1.0
1/3/2023   c   5   j   q   7   x  a5   6  b3  1.0  0.0  0.0
1/4/2023   d   2   k   r   5   y  a6  76  b4  0.0  0.0  1.0
1/5/2023   e  78   l   s  65   z  a7   9  b5  1.0  1.0  0.0
1/6/2023   f  98   m   t  23  a1  a8  14  b6  1.0  0.0  0.0
1/7/2023   g   3   n   u   1  a2  a9  45  b7  0.0  0.0  1.0


In [16]:
""" Merge the columns of first index  """
df_ok = df1.reindex(columns=['A1','B1','C1'], level=0)
df_ok

Unnamed: 0_level_0,A1,A1,A1,A1,B1,B1,B1,B1,C1,C1,C1,C1
Unnamed: 0_level_1,a_1,b_1,c_1,e_1,a_1,b_1,c_1,e_1,a_1,b_1,c_1,e_1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1/1/2023,a,23,h,0.0,o,45,v,1.0,a3,1,b1,0.0
1/2/2023,b,34,i,1.0,p,3,w,0.0,a4,32,b2,1.0
1/3/2023,c,5,j,1.0,q,7,x,0.0,a5,6,b3,0.0
1/4/2023,d,2,k,0.0,r,5,y,0.0,a6,76,b4,1.0
1/5/2023,e,78,l,1.0,s,65,z,1.0,a7,9,b5,0.0
1/6/2023,f,98,m,1.0,t,23,a1,0.0,a8,14,b6,0.0
1/7/2023,g,3,n,0.0,u,1,a2,0.0,a9,45,b7,1.0


In [17]:
""" Set the value of the (column_name, 'e_1') column to 1 where the conditions are met """

mask = (df2.loc[:, pd.IndexSlice[:, 'b_1']] > 30).any(axis=1) | (df2.loc[:, pd.IndexSlice[:, 'a_1']] == 'c').any(axis=1)
df2.loc[mask, pd.IndexSlice[column_name, 'e_1']] = 1

df_ok2 = df2.reindex(columns=['A1','B1','C1'], level=0)
df_ok2

Unnamed: 0_level_0,A1,A1,A1,A1,B1,B1,B1,B1,C1,C1,C1,C1
Unnamed: 0_level_1,a_1,b_1,c_1,e_1,a_1,b_1,c_1,e_1,a_1,b_1,c_1,e_1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1/1/2023,a,23,h,0.0,o,45,v,1.0,a3,1,b1,1.0
1/2/2023,b,34,i,1.0,p,3,w,0.0,a4,32,b2,1.0
1/3/2023,c,5,j,1.0,q,7,x,0.0,a5,6,b3,1.0
1/4/2023,d,2,k,0.0,r,5,y,0.0,a6,76,b4,1.0
1/5/2023,e,78,l,1.0,s,65,z,1.0,a7,9,b5,1.0
1/6/2023,f,98,m,1.0,t,23,a1,0.0,a8,14,b6,1.0
1/7/2023,g,3,n,0.0,u,1,a2,0.0,a9,45,b7,1.0


In [18]:
""" Store instruction in strings to use timeit """
code_with_loop = """
for column_name in ['B1']:
    mask = (df1.loc[:, pd.IndexSlice[:, 'b_1']] > 30).any(axis=1) | (df1.loc[:, pd.IndexSlice[:, 'a_1']] == 'c').any(axis=1)
    df1.loc[mask, pd.IndexSlice[column_name, 'e_1']] = 1
"""
code_without_loop = """
mask = (df1.loc[:, pd.IndexSlice[:, 'b_1']] > 30).any(axis=1) | (df1.loc[:, pd.IndexSlice[:, 'a_1']] == 'c').any(axis=1)
df1.loc[mask, pd.IndexSlice['B1', 'e_1']] = 1
"""

In [19]:
""" Measure time """
# instruction 1) with loop
time_with_loop = timeit.timeit(stmt=code_with_loop, globals=globals(), number=10000)
# instruction 2) without loop
time_without_loop = timeit.timeit(stmt=code_without_loop, globals=globals(), number=10000)

print("Time taken for method 1)  {:.6f} seconds".format(time_with_loop))
print("Time taken for method 2)  {:.6f} seconds".format(time_without_loop))

Time taken 1)  33.308959 seconds
Time taken 2)  33.634081 seconds


<h2 style="color:lime"> <u> Example #4 </u></h2>
Using timeit

In [39]:
def method_example_4():
    # Seed for reproducibility
    np.random.seed(0)  
    data = {
        'Original_A': np.random.rand(10),
        'Original_B': np.random.randint(1, 100, size=10),
        'Original_C': np.random.choice(['neck', 'arm', 'rib cage'], size=10)
    }
    df = pd.DataFrame(data)
    
    # Rename columns
    df.rename(columns={'Original_A': 'Modified_A', 'Original_B': 'Modified_B', 'Original_C': 'Modified_C'}, inplace=True)
    
    # Calculate the cumulative sum of 'Modified_A'
    df['Cumulative_Sum_A'] = np.cumsum(df['Modified_A'])
    # Calculate the rolling mean of 'Modified_B' using a window of 3
    df['Rolling_Mean_B'] = df['Modified_B'].rolling(window=3).mean()
    # Calculate the exponential moving average (EMA)
    alpha = 0.2
    df['EMA_A'] = df['Modified_A'].ewm(alpha=alpha, adjust=False).mean()
    
    return df

# Try the method 
modified_df = method_example_4()
modified_df


Unnamed: 0,Modified_A,Modified_B,Modified_C,Cumulative_Sum_A,Rolling_Mean_B,EMA_A
0,0.548814,40,neck,0.548814,,0.548814
1,0.715189,88,neck,1.264003,,0.582089
2,0.602763,47,arm,1.866766,58.333333,0.586224
3,0.544883,89,rib cage,2.411649,74.666667,0.577956
4,0.423655,82,neck,2.835304,72.666667,0.547095
5,0.645894,38,rib cage,3.481198,69.666667,0.566855
6,0.437587,26,neck,3.918786,48.666667,0.541002
7,0.891773,78,arm,4.810559,47.333333,0.611156
8,0.963663,73,arm,5.774221,59.0,0.681657
9,0.383442,10,rib cage,6.157663,53.666667,0.622014


In [40]:
""" Measure execution time using timeit.
The globals() function is necessary to access the global namespace (which includes the functions defined in the current module) 
to avoid to import the method explicitly, from another module and pass it as setup parameter.
"""
code = "method_example_4()"
execution_time = timeit.timeit(stmt=code, globals=globals(), number=1)
print(f"Execution time: {execution_time} seconds")

Execution time: 0.013795158010907471 seconds


<h2 style="color:lime"> <u> Example #5 </u></h2>
Using datetime

In [41]:
def method_used_for_5():
    size = 5000
    np_array = np.random.rand(size, size)
    df = pd.DataFrame(np_array)

    # Standardization
    df = df.apply(lambda x: (x - x.mean()) / x.std())      
    # Sum of squared values in each row
    df['sum_of_squares'] = df.apply(lambda row: sum(row**2), axis=1)
    # Mean of the sum_of_squares column
    result = df['sum_of_squares'].mean()
    
    return result

In [43]:
start_time = datetime.datetime.now()
result_complex = method_used_for_5()
end_time = datetime.datetime.now()
elapsed_time = end_time - start_time
elapsed_time = end_time - start_time
print(f"Execution time of method 5 with datatime : {elapsed_time} seconds")
print(f"Result: {result_complex}")

Execution time of method 5 with datatime : 0:00:14.021569 seconds
Result: 4999.0


<h2 style="color:lime"> <u> Example #6 </u></h2>
Using cProfile

In [60]:
def arimean(*args):
    return sum(args) / len(args)
def curry(func):
    curry.__curried_func_name__ = func.__name__
    f_args, f_kwargs = [], {}
    
    def funz_inside(*args, **kwargs):
        nonlocal f_args, f_kwargs
        if args or kwargs:
            f_args += args
            f_kwargs.update(kwargs)
            return funz_inside
        else:
            result = func(*f_args, *f_kwargs)
            f_args, f_kwargs = [], {}
            return result
    return funz_inside

curried_arimean = curry(arimean)
curried_arimean(2)(5)(9)(4, 5)
curried_arimean(5, 9)

# Arithmetic mean 
print(curried_arimean())
print(curried_arimean(3)(4)(7)())
print(curried_arimean(4)(3, 7)())

5.571428571428571
4.666666666666667
4.666666666666667


In [61]:
""" # Measuring time with cProfile.
N.B.
run command from terminal => python -m cProfile -o profile_results.prof your_script.py """

cProfile.run("curried_arimean(2)(5)(9)(4, 5)", sort="cumulative")

         11 function calls in 0.000 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        4    0.000    0.000    0.000    0.000 152946472.py:7(f)
        4    0.000    0.000    0.000    0.000 {method 'update' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [None]:
def arimean(*args):
    """ Calculate the arithmetic mean of some numbers.\\
    It takes any number of arguments using the *args syntax (e.g. individual nums or inside a list). 
    """
    return sum(args) / len(args)

In [75]:
def curry(func):
    """ Take another function as an argument, to return a new function that can be used to curry a function.\\ 
    
    Details: 
        Currying is a functional programming technique of breaking down the evaluation of a function,\\
        It can be partially applied with some arguments and a new function that accepts the remaining arguments is returned.
    """
    # Keep the name of the curried function
    curry.__curried_func_name__ = func.__name__
    f_args, f_kwargs = [], {}
    def func_curried(*args, **kwargs):
        """ Define the curried function 'f' that takes variable arguments and keyword arguments """
        nonlocal f_args, f_kwargs
        # Accumulate provided args in the lists and return 'f'
        if args or kwargs:
            f_args += args
            f_kwargs.update(kwargs)
            return func_curried
        else:
            # If no arguments are provided, call the original 'func' with accumulated arguments
            data = [i for i in range(1000000)]
            result = func(*data)
            # Reset the accumulated arguments for the next call
            f_args, f_kwargs = [], {}
            return result
    return func_curried

curried_arimean = curry(arimean)
curried_arimean

<function __main__.curry.<locals>.f(*args, **kwargs)>

In [76]:
""" Get time by profiling results of the method. 
Using the Python "__main__ guard" is necessary to ensure that the profiling code works.
"""
if __name__ == "__main__": 
    # Create a cProfile object
    profiler = cProfile.Profile()
    # Run your code within the profiler
    profiler.enable()
    
    # Run the curried_arimean function with a larger dataset
    curried_arimean()
    
    # Stop the profiler
    profiler.disable()
    profiler.print_stats(sort="cumulative")

         6 function calls in 0.158 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.028    0.028    0.158    0.158 817990695.py:8(f)
        1    0.115    0.115    0.115    0.115 817990695.py:16(<listcomp>)
        1    0.000    0.000    0.014    0.014 817990695.py:1(arimean)
        1    0.014    0.014    0.014    0.014 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.len}


