## Using Window Functions Over Partitions (Pandas)

In [0]:
import pandas as pd

#reading cleaned & merged datasets
order_items_data_cleaned = pd.read_csv('order_items_data_cleaned.csv')
orders_customers_data_cleaned = pd.read_csv('orders_customers_data_cleaned.csv')
products_data_cleaned = pd.read_csv('products_data_cleaned.csv')
order_payments_data_cleaned = pd.read_csv('order_payments_data_cleaned.csv')

In [0]:
#1. Create a rolling sum or cumulative metric for: Total Sales per Customer: A running total of product price for each customer partitioned by Customer ID.

order_items_data_cleaned['total_sales_per_customer'] = order_items_data_cleaned.groupby('customer_id')['total_price'].cumsum()

#display(order_items_data_cleaned.head(10))
print(order_items_data_cleaned[['customer_id', 'total_price', 'total_sales_per_customer']].head())


                        customer_id  total_price  total_sales_per_customer
0  3ce436f183e68e07877b285a838db11a        72.19                     72.19
1  f6dd3ec061db4e3987629fe6b26e5cce       259.83                    259.83
2  6489ae5e4333f3693df5ad4372dab6d3       216.87                    216.87
3  d4eb9395c8c0431ee92fce09860c5a06        25.78                     25.78
4  58dbd0b2d70206bf40e62cd34e84d795       218.04                    218.04


In [0]:
#2. Average Delivery Time per Product Category: A rolling average of delivery time partitioned by product category.

#Using sorted values and rolling
overwritten_index = order_items_data_cleaned.groupby('product_category_name')['delivery_time'].rolling(window=5, min_periods=1).mean().reset_index(level=0, drop=True)
order_items_data_cleaned['avg_delivery_time_per_category'] = overwritten_index

#Displaying the result to verify
print(order_items_data_cleaned[['product_category_name', 'delivery_time', 'avg_delivery_time_per_category']].head())

  product_category_name  delivery_time  avg_delivery_time_per_category
0            cool_stuff              7                             7.0
1              pet_shop             16                            16.0
2      moveis_decoracao              7                             7.0
3            perfumaria              6                             6.0
4    ferramentas_jardim             25                            25.0


In [0]:
# Save the cleaned order items data to a CSV file
order_items_data_cleaned.to_csv('order_items_data_cleaned.csv', index=False) 