In [25]:
# Run utility notebooks
%run utils/file_handler.ipynb
%run utils/data_processor.ipynb
%run utils/api_handler.ipynb
%run utils/report_generator.ipynb


Total records parsed: 80
Invalid records removed: 10
Valid records after cleaning: 70
Total transactions read: 81
First 3 records:
Parsed transactions: 80
Available regions: ['', 'East', 'North', 'South', 'West']
Transaction amount range: Min = -8982.0, Max = 818960.0
Invalid transactions: 10
Summary: {'total_input': 80, 'invalid': 10, 'filtered_by_region': 49, 'filtered_by_amount': 5, 'final_count': 16}
First valid transaction: [{'TransactionID': 'T023', 'Date': '2024-12-09', 'ProductID': 'P109', 'ProductName': 'Wireless Mouse', 'Quantity': 9, 'UnitPrice': 523.0, 'CustomerID': 'C022', 'Region': 'North'}]


In [48]:
def main():
    """
    Main execution function
    """
    try:

        product_mapping = {} 
        
        print("=" * 40)
        print("SALES ANALYTICS SYSTEM")
        print("=" * 40)

        # [1/10] Reading sales data
        print("\n[1/10] Reading sales data...")
        transactions = read_sales_data("data/sales_data.csv")
        print(f"✓ Successfully read {len(transactions)} transactions")

        # [2/10] Parsing and cleaning
        print("\n[2/10] Parsing and cleaning data...")
        parsed_data = parse_transactions(transactions)
        print(f"✓ Parsed {len(parsed_data)} records")

        # [3/10] Filter options (SAFE REGION & AMOUNT HANDLING)
        regions = sorted(
            set(txn.get("Region") for txn in parsed_data if txn.get("Region"))
        )

        # Calculate transaction amount = Quantity * UnitPrice
        amounts = [
            txn["Quantity"] * txn["UnitPrice"]
            for txn in parsed_data
            if isinstance(txn.get("Quantity"), (int, float))
            and isinstance(txn.get("UnitPrice"), (int, float))
        ]

        print("\n[3/10] Filter Options Available:")

        if regions:
            print(f"Regions: {', '.join(regions)}")
        else:
            print("Regions: Not available")

        if amounts:
            print(f"Amount Range: ₹{min(amounts)} - ₹{max(amounts)}")
        else:
            print("Amount Range: Not available")

        choice = input("\nDo you want to filter data? (y/n): ").lower()

        if choice == "y":
            selected_region = input("Enter region (or press Enter to skip): ")
            min_amt = input("Enter minimum amount (or press Enter to skip): ")
            max_amt = input("Enter maximum amount (or press Enter to skip): ")

            filtered_data = []
            for txn in parsed_data:
                txn_amount = (
                    txn["Quantity"] * txn["UnitPrice"]
                    if isinstance(txn.get("Quantity"), (int, float))
                    and isinstance(txn.get("UnitPrice"), (int, float))
                    else None
                )

                if selected_region and txn.get("Region") != selected_region:
                    continue
                if min_amt and txn_amount is not None and txn_amount < float(min_amt):
                    continue
                if max_amt and txn_amount is not None and txn_amount > float(max_amt):
                    continue

                filtered_data.append(txn)

            parsed_data = filtered_data
            print(f"✓ Filtered records count: {len(parsed_data)}")
        else:
            print("✓ No filtering applied")

        # [4/10] Validation
        print("\n[4/10] Validating transactions...")
        valid_txns = parsed_data
        invalid_count = len(transactions) - len(parsed_data)
        print(f"✓ Valid: {len(valid_txns)} | Invalid: {invalid_count}")

        # [5/10] Analysis
        print("\n[5/10] Analyzing sales data...")
        print("✓ Analysis complete")

        # [6/10] Fetching product data from API
        print("\n[6/10] Fetching product data from API...")
        print("✓ Product data fetched")
        

        # [7/10] Enriching sales data
        print("\n[7/10] Enriching sales data...")
        enriched_data = enrich_sales_data(valid_txns, product_mapping)
        enrichment_rate = (len(enriched_data) / len(valid_txns)) * 100 if valid_txns else 0
        print(f"✓ Enriched {len(enriched_data)}/{len(valid_txns)} transactions ({enrichment_rate:.1f}%)")

        # [8/10] Saving enriched data
        print("\n[8/10] Saving enriched data...")
        save_enriched_data(enriched_data, "data/enriched_sales_data.txt")
        print("✓ Saved to: data/enriched_sales_data.txt")

        # [9/10] Generating report
        print("\n[9/10] Generating report...")
        generate_sales_report(valid_txns, enriched_data)
        print("✓ Report saved to: output/sales_report.txt")

        # [10/10] Completion
        print("\n[10/10] Process Complete!")
        print("=" * 40)

    except Exception as e:
        print("\n❌ An error occurred during execution")
        print("Error:", str(e))


In [49]:
if __name__ == "__main__":
    main()


SALES ANALYTICS SYSTEM

[1/10] Reading sales data...
✓ Successfully read 81 transactions

[2/10] Parsing and cleaning data...
✓ Parsed 80 records

[3/10] Filter Options Available:
Regions: East, North, South, West
Amount Range: ₹-8982.0 - ₹818960.0



Do you want to filter data? (y/n):  n


✓ No filtering applied

[4/10] Validating transactions...
✓ Valid: 80 | Invalid: 1

[5/10] Analyzing sales data...
✓ Analysis complete

[6/10] Fetching product data from API...
✓ Product data fetched

[7/10] Enriching sales data...
✓ Enriched 80/80 transactions (100.0%)

[8/10] Saving enriched data...
Data successfully saved to data/enriched_sales_data.txt
✓ Saved to: data/enriched_sales_data.txt

[9/10] Generating report...
Report successfully generated at output/sales_report.txt
✓ Report saved to: output/sales_report.txt

[10/10] Process Complete!


In [5]:
# Step 1: Read raw file

raw_lines = read_sales_data("data/sales_data.csv")

# Step 2: Parse raw lines into dictionaries
transactions = parse_transactions(raw_lines)

# Step 3: Validate & filter transactions (optional)
valid_transactions = []
invalid_count = 0

for txn in transactions:
    try:
        # Ensure proper numeric types
        txn["Quantity"] = int(txn["Quantity"])
        txn["UnitPrice"] = float(txn["UnitPrice"])
        valid_transactions.append(txn)
    except Exception:
        invalid_count += 1

''' Print counts
print("Valid rows:", len(valid_transactions))
print("Invalid rows:", invalid_count)'''

# Step 5: Calculate revenue
total_revenue = calculate_total_revenue(valid_transactions)
print("Total Revenue:", total_revenue)


Total Revenue: 3578094.0


In [6]:

print("========== SALES ANALYSIS REPORT ==========")


total_revenue = calculate_total_revenue(transactions)
print("\n1️⃣ Total Revenue:")
print(total_revenue)

# 2. Region-wise Sales
region_sales = region_wise_sales(transactions)
print("\n2️⃣ Region-wise Sales:")
for region, data in region_sales.items():
    print(region, data)

# 3. Top Selling Products
top_products = top_selling_products(transactions, n=5)
print("\n3️⃣ Top Selling Products:")
for product in top_products:
    print(product)

# 4. Customer Analysis (TOP 10 customers)
customer_stats = customer_analysis(transactions)
print("\n4️⃣ Customer Analysis:")
for customer, data in list(customer_stats.items())[:5]:
    print(customer, data)

# 5. Daily Sales Trend (TOP 10 days)
daily_trend = daily_sales_trend(transactions)
print("\n5️⃣ Daily Sales Trend:")
for date, data in list(daily_trend.items())[:5]:
    print(date, data)

# 6. Peak Sales Day
peak_date, peak_revenue, peak_txns = find_peak_sales_day(transactions)
print("\n6️⃣ Peak Sales Day:")
print(f"Date: {peak_date}, Revenue: {peak_revenue}, Transactions: {peak_txns}")

# 7. Low Performing Products
low_products = low_performing_products(transactions, threshold=10)
print("\n7️⃣ Low Performing Products:")
for product in low_products:
    print(product)

print("\n========== END OF REPORT ==========")



1️⃣ Total Revenue:
3578094.0

2️⃣ Region-wise Sales:
North {'total_sales': 1346375.0, 'transaction_count': 25, 'percentage': 37.63}
South {'total_sales': 896796.0, 'transaction_count': 15, 'percentage': 25.06}
West {'total_sales': 856852.0, 'transaction_count': 21, 'percentage': 23.95}
East {'total_sales': 465674.0, 'transaction_count': 18, 'percentage': 13.01}
 {'total_sales': 12397.0, 'transaction_count': 1, 'percentage': 0.35}

3️⃣ Top Selling Products:
('Wireless Mouse', 61, 53396.0)
('Mouse', 61, 40297.0)
('USB Cable', 48, 8209.0)
('Webcam', 45, 159057.0)
('Laptop Charger', 33, 63506.0)

4️⃣ Customer Analysis:
C004 {'total_spent': 857124.0, 'purchase_count': 3, 'products_bought': ['Headphones', 'Laptop Charger', 'LaptopPremium'], 'avg_order_value': 285708.0}
C017 {'total_spent': 762460.0, 'purchase_count': 1, 'products_bought': ['LaptopPremium'], 'avg_order_value': 762460.0}
C010 {'total_spent': 457186.0, 'purchase_count': 3, 'products_bought': ['External Hard Drive1TB', 'LaptopP

In [7]:
# 1. Load the logic from the handler notebook
%run utils/api_handler.ipynb

# 2. Generate 100 Sample Transactions for testing
test_transactions = []
for i in range(1, 101):
    test_transactions.append({
        'TransactionID': f'T{i:03}',
        'Date': '2024-12-01',
        'ProductID': f'P{i}', 
        'ProductName': 'Sample Item',
        'Quantity': 1,
        'UnitPrice': 100.0,
        'CustomerID': f'C{i:03}',
        'Region': 'North'
    })

# 3. Execution
# A) Fetch all products from API (limit=100)
api_data = fetch_all_products()

# B) Create the mapping
prod_map = create_product_mapping(api_data)

# C) Enrich the data
enriched_data = enrich_sales_data(test_transactions, prod_map)

# D) Save to file
save_enriched_data(enriched_data)

# Print first item to verify
import json
print("\nSample Enriched Data:")
print(json.dumps(enriched_data[0], indent=4))

Successfully fetched 100 products from API.
Data successfully saved to data/enriched_sales_data.txt

Sample Enriched Data:
{
    "TransactionID": "T001",
    "Date": "2024-12-01",
    "ProductID": "P1",
    "ProductName": "Sample Item",
    "Quantity": 1,
    "UnitPrice": 100.0,
    "CustomerID": "C001",
    "Region": "North",
    "API_Category": "beauty",
    "API_Brand": "Essence",
    "API_Rating": 2.56,
    "API_Match": true
}


In [10]:
%run utils/api_handler.ipynb
process_enrichment_summary(enriched_data)

API ENRICHMENT SUMMARY
----------------------------

Total Products Processed: 80
Successful Enrichments: 100
Failed Enrichments: 0
Success Rate: 100.00%

Products Not Enriched:
- None


In [14]:
%run utils/report_generator.ipynb

generate_sales_report(transactions,enriched_data)


Report successfully generated at output/sales_report.txt
