<a href="https://colab.research.google.com/github/imsumedhaa/FireDucks-in-Finance/blob/main/FireDucks_in_Finance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 1️⃣ Install and Set Up FireDucks
!pip install fireducks

import os
os.environ["FIREDUCKS_FLAGS"] = "--benchmark-mode"

import pandas as pd
import fireducks.pandas as fd
import numpy as np
import time
df = pd.DataFrame({
    "account_id": np.random.randint(10000, 50000, size=10_000_000),
    "region": np.random.choice(["North", "South", "East", "West"], size=10_000_000),
    "amount": np.random.exponential(scale=1200, size=10_000_000),
    "transaction_type": np.random.choice(["deposit", "withdrawal", "transfer"], size=10_000_000),
    "timestamp": pd.date_range(start="2023-01-01", periods=10_000_000, freq='s')
})
df.to_csv("financial_data.csv", index=False)
print("✅ Dataset with 10M rows created: financial_data.csv")

✅ Dataset with 10M rows created: financial_data.csv


In [2]:
import platform
import psutil
import multiprocessing
import pandas
import fireducks

print("🔍 System Information:")
print(f"OS: {platform.system()} {platform.release()}")
print(f"Processor: {platform.processor()}")
print(f"CPU cores: {multiprocessing.cpu_count()}")
print(f"Total RAM: {psutil.virtual_memory().total / (1024**3):.2f} GB")

print("\n📦 Library Versions:")
print(f"Pandas version: {pandas.__version__}")
print(f"FireDucks version: {fireducks.__version__}")


🔍 System Information:
OS: Linux 6.1.123+
Processor: x86_64
CPU cores: 2
Total RAM: 12.67 GB

📦 Library Versions:
Pandas version: 2.2.2
FireDucks version: 1.2.8


In [None]:
#  Read CSV
# Pandas
start = time.time()
pandas_df = pd.read_csv("financial_data.csv")
print("📄 Pandas Read Time:", round((time.time() - start) * 1000, 2), "ms")

# FireDucks
start = time.time()
fd_df = fd.read_csv("financial_data.csv")
print("🔥 FireDucks Read Time:", round((time.time() - start) * 1000, 2), "ms")

📄 Pandas Read Time: 9287.68 ms
🔥 FireDucks Read Time: 1751.87 ms


In [None]:
# Group by Region - Full Dataset
start = time.time()
pandas_grouped = pandas_df.groupby("region")["amount"].agg(["mean", "sum"]).reset_index()
print("📊 Pandas GroupBy Time:", round((time.time() - start) * 1000, 2), "ms")

start = time.time()
fd_grouped = fd_df.groupby("region")["amount"].agg(["mean", "sum"]).reset_index()
print("🔥 FireDucks GroupBy Time:", round((time.time() - start) * 1000, 2), "ms")

📊 Pandas GroupBy Time: 705.2 ms
🔥 FireDucks GroupBy Time: 19.0 ms


In [None]:
# Sort by Total Amount
start = time.time()
pandas_sorted = pandas_grouped.sort_values(by="sum", ascending=False)
print("🔽 Pandas Sort Time:", round((time.time() - start) * 1000, 2), "ms")

start = time.time()
fd_sorted = fd_grouped.sort_values(by="sum", ascending=False)
print("🔥 FireDucks Sort Time:", round((time.time() - start) * 1000, 2), "ms")

🔽 Pandas Sort Time: 0.96 ms
🔥 FireDucks Sort Time: 7.35 ms


In [None]:
# Filter (amount > 5000)
start = time.time()
pandas_filtered = pandas_df[pandas_df['amount'] > 5000]
print("📉 Pandas Filter Time:", round((time.time() - start) * 1000, 2), "ms")

start = time.time()
fd_filtered = fd_df[fd_df['amount'] > 5000]
print("🔥 FireDucks Filter Time:", round((time.time() - start) * 1000, 2), "ms")

📉 Pandas Filter Time: 70.42 ms
🔥 FireDucks Filter Time: 25.06 ms
