In [47]:
import json

# Read JSON file
with open('sample-dataset-3.json', 'r') as file:
    transactions = json.load(file)

1. Create a function that transforms raw transaction data:
* Flatten nested JSON structures using lambda functions
* Calculate total transaction values

In [57]:
from functools import reduce

def transform_transaction_data(transactions):
    # Flatten each transaction and calculate the total transaction value
    def flatten_transaction(transaction):
        total_value = sum(item['price'] * item['quantity'] for item in transaction['items'])
        return {
        'transaction_id': transaction['transaction_id'],
        'timestamp': transaction['timestamp'],
        'customer_id': transaction['customer']['id'],
        'customer_region': transaction['customer']['region'],
        'payment_method': transaction['payment_method'],
        'status': transaction['status'],
        'total_value': total_value,
        'items': [{'product_id': item['product_id'], 'quantity': item['quantity']} for item in transaction['items']],  # Ensure items is correctly structured
        'categories': [item['category'] for item in transaction['items']]
    }


    # Apply flattening to all transactions
    return list(map(lambda x: flatten_transaction(x), transactions))


* Extract unique product categories using map() and set()

In [49]:
def extract_unique_categories(transactions):
    # Use map() to extract categories from each transaction and flatten into a single list
    all_categories = list(reduce(lambda x, y: x + y, map(lambda x: x['categories'], transactions)))
    return set(all_categories)


* Group transactions by region and calculate regional sales

In [50]:
from collections import defaultdict

def regional_sales(transactions):
    region_sales = defaultdict(float)
    
    for transaction in transactions:
        region_sales[transaction['customer_region']] += transaction['total_value']
    
    return region_sales


* Find top-selling products using sorted() with custom key

In [58]:
def top_selling_products(transactions):
    product_sales = defaultdict(int)
    
    for transaction in transactions:
        # Ensure that 'items' is a list of dictionaries with 'product_id' and 'quantity'
        for item in transaction['items']:
            product_sales[item['product_id']] += item['quantity']
    
    # Sort products by total quantity sold in descending order
    return sorted(product_sales.items(), key=lambda x: x[1], reverse=True)


* Calculate average transaction value by payment method


In [52]:
def average_transaction_value_by_payment(transactions):
    payment_method_sales = defaultdict(list)
    
    # Group transactions by payment method and store total values
    for transaction in transactions:
        payment_method_sales[transaction['payment_method']].append(transaction['total_value'])
    
    # Calculate the average for each payment method
    return {method: sum(values) / len(values) for method, values in payment_method_sales.items()}


3. Create a report generation function that:
* Filters completed transactions using filter()


In [53]:
def filter_completed_transactions(transactions):
    return list(filter(lambda x: x['status'] == 'completed', transactions))


* Sorts data by multiple criteria using lambda

In [54]:
def sort_transactions(transactions):
    return sorted(transactions, key=lambda x: (x['customer_region'], x['total_value']), reverse=True)


* Generates summary statistics for different time periods

In [55]:
from collections import defaultdict
from datetime import datetime

def generate_monthly_report(transactions):
    # Group transactions by month and year
    monthly_sales = defaultdict(float)
    
    for transaction in transactions:
        # Extract month and year from timestamp
        date = datetime.strptime(transaction['timestamp'], '%Y-%m-%dT%H:%M:%S')
        month_year = date.strftime('%Y-%m')
        
        # Aggregate sales by month-year
        monthly_sales[month_year] += transaction['total_value']
    
    return monthly_sales


Integrating All the functions

In [59]:
# Step 1: Transform raw transaction data
flattened_transactions = transform_transaction_data(transactions)

# Print flattened transactions to inspect the structure
print(flattened_transactions)

# Step 2: Analysis
unique_categories = extract_unique_categories(flattened_transactions)
region_sales_report = regional_sales(flattened_transactions)
top_products = top_selling_products(flattened_transactions)  # This should now work as expected
average_transaction_by_payment = average_transaction_value_by_payment(flattened_transactions)

# Step 3: Reports
completed_transactions = filter_completed_transactions(flattened_transactions)
sorted_transactions = sort_transactions(flattened_transactions)
monthly_sales_report = generate_monthly_report(flattened_transactions)

# Output results
print(f"Unique Categories: {unique_categories}")
print(f"Regional Sales: {region_sales_report}")
print(f"Top-Selling Products: {top_products}")
print(f"Average Transaction Value by Payment: {average_transaction_by_payment}")
print(f"Monthly Sales Report: {monthly_sales_report}")


[{'transaction_id': 'T123456', 'timestamp': '2024-01-15T14:30:00', 'customer_id': 'CUS123', 'customer_region': 'North', 'payment_method': 'credit_card', 'status': 'completed', 'total_value': 559.97, 'items': [{'product_id': 'P789', 'quantity': 1}, {'product_id': 'P456', 'quantity': 2}], 'categories': ['Electronics', 'Accessories']}, {'transaction_id': 'T123457', 'timestamp': '2024-01-15T14:45:00', 'customer_id': 'CUS456', 'customer_region': 'South', 'payment_method': 'paypal', 'status': 'completed', 'total_value': 74.97, 'items': [{'product_id': 'P234', 'quantity': 3}], 'categories': ['Books']}, {'transaction_id': 'T123458', 'timestamp': '2024-01-15T15:00:00', 'customer_id': 'CUS789', 'customer_region': 'West', 'payment_method': 'credit_card', 'status': 'pending', 'total_value': 1199.96, 'items': [{'product_id': 'P789', 'quantity': 1}, {'product_id': 'P555', 'quantity': 1}, {'product_id': 'P777', 'quantity': 2}], 'categories': ['Electronics', 'Electronics', 'Electronics']}, {'transacti