In [4]:
# -------------------------------------------------------------------
# Data Preparation for the Daily Sales Report Automator
# -------------------------------------------------------------------
# This notebook simulates a daily data file from the main Olist dataset.
# The output is a single CSV file that the main Python script will process.
# -------------------------------------------------------------------

import pandas as pd
import os

# --- 1. Load Original Datasets ---
print("Loading original Olist datasets...")
try:
    # Assumes the Olist CSVs are in the same 'notebooks' folder for this preparation step
    orders = pd.read_csv('olist_orders_dataset.csv')
    payments = pd.read_csv('olist_order_payments_dataset.csv')
    print("Datasets loaded successfully.")
except FileNotFoundError:
    print("ERROR: Olist source files not found in the 'notebooks/' directory.")
    raise

# --- 2. Process and Merge Data ---
print("Merging orders and payments tables...")
df_merged = pd.merge(orders, payments, on='order_id')

# Convert timestamp to datetime objects
df_merged['order_purchase_timestamp'] = pd.to_datetime(df_merged['order_purchase_timestamp'])

# --- 3. Filter for a Specific Day to Simulate a Daily Report ---
target_date = '2018-08-01'
daily_data = df_merged[df_merged['order_purchase_timestamp'].dt.date == pd.to_datetime(target_date).date()].copy()
print(f"Filtered data for {target_date}. Found {len(daily_data)} records.")

# --- 4. Define Output Path and Save the File ---
# The path goes up one level ('..') from 'notebooks/' to the project root, then into 'data/'
output_folder = '../data'
output_filename = f'daily_sales_{target_date}.csv'
output_path = os.path.join(output_folder, output_filename)

# Ensure the target directory exists
os.makedirs(output_folder, exist_ok=True)

# Save the filtered dataframe to the data folder
daily_data.to_csv(output_path, index=False)

print(f"\nSUCCESS! File '{output_filename}' has been created in the '{output_folder}' folder.")
print(f"Full path: {os.path.abspath(output_path)}")



Loading original Olist datasets...
Datasets loaded successfully.
Merging orders and payments tables...
Filtered data for 2018-08-01. Found 331 records.

SUCCESS! File 'daily_sales_2018-08-01.csv' has been created in the '../data' folder.
Full path: c:\Users\gisro\OneDrive\Área de Trabalho\report_automation\data\daily_sales_2018-08-01.csv
