<a href="https://colab.research.google.com/github/imsumedhaa/Fireducks-vs-Pandas-AI-ML-Pipelines/blob/main/AI_%26_ML_Pipelines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install fireducks
FIREDUCKS_FLAGS="--benchmark-mode"
import pandas as pd
import fireducks.pandas as fd
import numpy as np
import time

# Function to evaluate FireDucks
def evaluate(df):
    try:
        df._evaluate()
    except AttributeError:
        pass

# Create sample data
df = pd.DataFrame({
    'product': np.random.choice(['apple', 'banana', 'orange', 'kiwi'], size=1_000_000),
    'region': np.random.choice(['north', 'south', 'east', 'west'], size=1_000_000),
    'price': np.random.uniform(10, 200, size=1_000_000),
    'units_sold': np.random.randint(1, 50, size=1_000_000)
})
df.to_csv('sales_data.csv', index=False)



In [18]:
start = time.time()
pdf = pd.read_csv('sales_data.csv')
print('📄 Pandas Read Time:', round(time.time() - start, 4), 'sec')

## 2. **FireDucks Benchmark: Read**
start = time.time()
fdf = fd.read_csv('sales_data.csv')
evaluate(fdf)
print('🔥 FireDucks Read Time:', round(time.time() - start, 4), 'sec')


📄 Pandas Read Time: 0.468 sec
🔥 FireDucks Read Time: 0.228 sec


In [11]:
## 3. **Pandas Benchmark: Filter**
start = time.time()
filtered_pdf = pdf[pdf['price'] > 100]
print('📉 Pandas Filter Time:', round(time.time() - start, 4), 'sec')

## 4. **FireDucks Benchmark: Filter**
start = time.time()
filtered_fdf = fdf[fdf['price'] > 100]
evaluate(filtered_fdf)
print('🔥 FireDucks Filter Time:', round(time.time() - start, 4), 'sec')

📉 Pandas Filter Time: 0.0344 sec
🔥 FireDucks Filter Time: 0.0406 sec


In [13]:
## 5. **Pandas Benchmark: GroupBy**
start = time.time()
grouped_pdf = filtered_pdf.groupby('region')['price'].mean().reset_index()
print('📊 Pandas GroupBy Time:', round(time.time() - start, 4), 'sec')

## 6. **FireDucks Benchmark: GroupBy**
start = time.time()
grouped_fdf = filtered_fdf.groupby('region')['price'].mean().reset_index()
evaluate(grouped_fdf)
print('🔥 FireDucks GroupBy Time:', round(time.time() - start, 4), 'sec')

📊 Pandas GroupBy Time: 0.0383 sec
🔥 FireDucks GroupBy Time: 0.0153 sec


In [15]:
## 7. **Pandas Benchmark: Sort**
start = time.time()
sorted_pdf = grouped_pdf.sort_values(by='price', ascending=False)
print('🔽 Pandas Sort Time:', round(time.time() - start, 4), 'sec')

## 8. **FireDucks Benchmark: Sort**
start = time.time()
sorted_fdf = grouped_fdf.sort_values(by='price', ascending=False)
evaluate(sorted_fdf)
print('🔥 FireDucks Sort Time:', round(time.time() - start, 4), 'sec')

print("✅ Benchmark Completed!")

🔽 Pandas Sort Time: 0.0009 sec
🔥 FireDucks Sort Time: 0.0029 sec
✅ Benchmark Completed!
