<a href="https://colab.research.google.com/github/imsumedhaa/Fireducks-vs-Pandas-The-comparison-test/blob/main/Demo_code_Fireducks_vs_pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

!pip install fireducks


import pandas as pd
import fireducks.pandas as fd
import time
import numpy as np

#Create dummy sales datasets

df = pd.DataFrame({
  "product": np.random.choice(["apple", "banana", "orange", "kiwi"], size=1_000_000),
  "region": np.random.choice(["north", "south", "east", "west"], size=1_000_000),
  "price": np.random.uniform(10, 200, size=1_000_000),
  "units_sold": np.random.randint(1, 50, size=1_000_000)
})
df.to_csv("sales_data.csv", index=False)
print("✅ Created 'sales_data.csv'")



Collecting fireducks
  Downloading fireducks-1.2.6-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting firefw==1.2.6 (from fireducks)
  Downloading firefw-1.2.6-py3-none-any.whl.metadata (818 bytes)
Collecting pyarrow<19.1,>=19.0 (from fireducks)
  Downloading pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Downloading fireducks-1.2.6-cp311-cp311-manylinux_2_28_x86_64.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading firefw-1.2.6-py3-none-any.whl (12 kB)
Downloading pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl (42.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.1/42.1 MB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, firefw, fireducks
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 18.1.0
    Uninstalling pyarrow-18.1.0:
      Successfully uninstalled pya

In [3]:
# Read CSV

start = time.time()
pandas_df = pd.read_csv("sales_data.csv")
print("📄 Pandas Read Time:", round(time.time() - start, 4), "sec")
start = time.time()
fd_df = fd.read_csv("sales_data.csv")
print("🔥 FireDucks Read Time:", round(time.time() - start, 4), "sec")



📄 Pandas Read Time: 0.712 sec
🔥 FireDucks Read Time: 0.0514 sec


In [4]:
#Filter ->price>100

start = time.time()
filtered_pandas = pandas_df[pandas_df['price'] > 100]
print("📉 Pandas Filter Time:", round(time.time() - start, 4), "sec")
start = time.time()
filtered_fd = fd_df[fd_df['price'] > 100]
print("🔥 FireDucks Filter Time:", round(time.time() - start, 4), "sec")



📉 Pandas Filter Time: 0.0801 sec
🔥 FireDucks Filter Time: 0.0057 sec


In [5]:
#Group by region, average price

start = time.time()

group_pandas = filtered_pandas.groupby('region')['price'].mean().reset_index()

print("📊 Pandas GroupBy Time:", round(time.time() - start, 4), "sec")

start = time.time()
group_fd = filtered_fd.groupby('region')['price'].mean().reset_index()

print("🔥 FireDucks GroupBy Time:", round(time.time() - start, 4), "sec")


📊 Pandas GroupBy Time: 0.0626 sec
🔥 FireDucks GroupBy Time: 0.0076 sec


In [6]:
#Sort by average price descending

start = time.time()
sorted_pandas = group_pandas.sort_values(by='price', ascending=False)
print("🔽 Pandas Sort Time:", round(time.time() - start, 4), "sec")
start = time.time()
sorted_fd = group_fd.sort_values(by='price', ascending=False)
print("🔥 FireDucks Sort Time:", round(time.time() - start, 4), "sec")
# -----------------------------------
print("✅ Sales Benchmark Completed!")



🔽 Pandas Sort Time: 0.0044 sec
🔥 FireDucks Sort Time: 0.0007 sec
✅ Sales Benchmark Completed!
