### Importing Performance Evaluator functions

In [1]:
# Import the PerformanceEvaluator class
from PerformanceEvaluator import PerformanceEvaluator

# Initialize the evaluator
evaluator = PerformanceEvaluator()

# Example: Testing Data Retrieval usage with Polars and Pandas
print("Testing Data Retrieval")


Testing Data Retrieval


### Testing the performance of Pandas and Polar Functions

In [2]:
# import pandas and polars libraries
import polars as pl
import pandas as pd

# Initialize the evaluator
evaluator = PerformanceEvaluator()

# Define the number of iterations
n = 100

# Load Data Frames
df_polars = pl.read_csv('data.csv')
df_pandas = pd.read_csv('data.csv')

In [3]:
# Testing Data Retrieval usage with Polars and Pandas
print("Testing Data Retrieval")
polars_mean_time = evaluator.averaging(pl.read_csv, 'data.csv', n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(pd.read_csv, 'data.csv', n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)

Testing Data Retrieval
Polars mean execution time over 100 runs: 0.547119 seconds
Pandas mean execution time over 100 runs: 6.809997 seconds
Polars Wins!
The execution time for Polars was 6.26 seconds faster than Pandas.
Relative Difference: 91.97% faster!


In [4]:
# Test Filtering
print("\nTesting Filtering")
polars_mean_time = evaluator.averaging(lambda: df_polars.filter(pl.col('Vict Age') > 2), n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(lambda: df_pandas[df_pandas['Vict Age'] > 2], n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)


Testing Filtering
Polars mean execution time over 100 runs: 0.068801 seconds
Pandas mean execution time over 100 runs: 0.202896 seconds
Polars Wins!
The execution time for Polars was 0.13 seconds faster than Pandas.
Relative Difference: 66.09% faster!


In [5]:
# Test Aggregation
print("\nTesting Aggregation")
polars_mean_time = evaluator.averaging(lambda: df_polars.groupby('AREA NAME').agg(pl.col('Vict Age').mean()), n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(lambda: df_pandas.groupby('AREA NAME')['Vict Age'].mean(), n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)


Testing Aggregation


  polars_mean_time = evaluator.averaging(lambda: df_polars.groupby('AREA NAME').agg(pl.col('Vict Age').mean()), n=n, library_name="Polars")


Polars mean execution time over 100 runs: 0.029085 seconds
Pandas mean execution time over 100 runs: 0.079509 seconds
Polars Wins!
The execution time for Polars was 0.05 seconds faster than Pandas.
Relative Difference: 63.42% faster!


In [6]:
# Test Joining
print("\nTesting Joining")
polars_mean_time = evaluator.averaging(lambda: df_polars.join(df_polars, on='id', how='inner'), n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(lambda: pd.merge(df_pandas, df_pandas, on='id', how='inner'), n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)


Testing Joining
Polars mean execution time over 100 runs: 0.434027 seconds
Pandas mean execution time over 100 runs: 2.035426 seconds
Polars Wins!
The execution time for Polars was 1.60 seconds faster than Pandas.
Relative Difference: 78.68% faster!


In [7]:
# Test Transformation
print("\nTesting Transformation")
polars_mean_time = evaluator.averaging(lambda: df_polars.with_columns((pl.col('Vict Sex') + pl.col('Vict Descent')).alias('C')), n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(lambda: df_pandas.assign(C=df_pandas['Vict Sex'] + df_pandas['Vict Descent']), n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)


Testing Transformation
Polars mean execution time over 100 runs: 0.016174 seconds
Pandas mean execution time over 100 runs: 0.305624 seconds
Polars Wins!
The execution time for Polars was 0.29 seconds faster than Pandas.
Relative Difference: 94.71% faster!


In [8]:
# Test Sorting
print("\nTesting Sorting")
polars_mean_time = evaluator.averaging(lambda: df_polars.sort('AREA NAME'), n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(lambda: df_pandas.sort_values('AREA NAME'), n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)


Testing Sorting
Polars mean execution time over 100 runs: 0.508413 seconds
Pandas mean execution time over 100 runs: 1.243820 seconds
Polars Wins!
The execution time for Polars was 0.74 seconds faster than Pandas.
Relative Difference: 59.12% faster!


In [9]:
# Test Window Functions
print("\nTesting Window Functions")
polars_mean_time = evaluator.averaging(lambda: df_polars.with_columns(pl.col('Vict Age').rolling_mean(window_size=3).alias('Vict_Sex_B')), n=n, library_name="Polars")
pandas_mean_time = evaluator.averaging(lambda: df_pandas.assign(rolling_mean_B=df_pandas['Vict Age'].rolling(window=3).mean()), n=n, library_name="Pandas")

# Calculate the performance difference
evaluator.compare_execution_times(polars_mean_time, pandas_mean_time)


Testing Window Functions
Polars mean execution time over 100 runs: 0.008542 seconds
Pandas mean execution time over 100 runs: 0.192828 seconds
Polars Wins!
The execution time for Polars was 0.18 seconds faster than Pandas.
Relative Difference: 95.57% faster!
