In [None]:
"""
:author: Excellent Willie-Pepple
:date: 2025-02-1
:title: Data Analysis with Python - Assignment 1
:description: This script reads a CSV file containing sales data, prints the first five rows, calculates and prints the total revenue and average cost per order, along with other features found in the readme.md.
:linkedin: https://www.linkedin.com/in/excell-pepple/
"""

In [601]:
def read_data(file_name: str):
    """
    Read data from a csv file.

    :param file_name: Name of the csv file.
    :return: A list of lists where each inner list represents a row from the csv file.
    """
    with open(file_name, encoding="utf-8") as f:
        lines = f.readlines()
        return lines

In [602]:
def print_head(data:[str]):
    """
    Print the first five rows of a list.

    :param data: The list to print data from.
    """
    for row in data[1:6]:
        #divides the line into a list based on the comma separated string
        row_data = row.split(",")
        #extract the data
        order_id = int(row_data[0])
        category_id = int(row_data[1])
        price = float(row_data[2])
        quantity = int(row_data[3])
        payment = int(row_data[4])
        age = int(row_data[5])
        gender = int(row_data[6])
        #format the row and print it to the console
        formated_row = f"Order ID: {order_id}, Category ID: {category_id}, Price: ${price:.2f}, Quantity: {quantity}, Payment Method: {payment}, Age: {age}, Gender: {gender}"
        print(formated_row)

In [603]:
def key_metrics(data:[]):
    """
    Calculate and print the total revenue and average cost per order from the sales data.

    :param data: The sales data as a list of lists.
    """
    row_data = [row.split(',') for row in data[1:]]
    total_revenue = sum((float(row[2]) * int(row[3])) for row in row_data)
    average_cost_per_order = total_revenue / len(row_data)

    # print the results to the console
    print("Key Sales Metrics")
    print(f"Total Revenue: ${total_revenue:.2f}\nAverage Cost Per Order: ${average_cost_per_order:.2f}")

In [604]:
# TODO: Ask prof kim if we should use filter method or create our own
def filter_data_by_price(data:[], price:float):
    """
    Filter sales data by price and return a new list containing only the rows that meet the condition.

    :param data: The sales data as a list of lists.
    :param price: The minimum price threshold.
    :return: A new list containing only the rows that meet the price condition.
    """
    # extract the data from the list
    row_data = [row.split(',') for row in data[1:]]
    # filter the data based on the price condition and create a new list of rows that meet the condition
    filtered_data = [row for row in row_data[1:] if float(row[2]) >= price]

    # print the results to the console
    formatted_data = f"Order ID and Items Priced > ${price:.2f}\n"
    for row in filtered_data:
        formatted_data += f"Order ID: {row[0]}, Price: ${float(row[2]):.2f}\n"
    print(formatted_data)

In [605]:
def gender_based_order(data:[]):
    """
    Calculate and print the number of female and male orders.

    :param data: The sales data as a list of lists.
    """
    #extract the data from the list
    row_data = [row.split(',') for row in data[1:]]
    # calculate the number of female and male orders using list comprehension and sum function
    male_orders = sum(1 for row in row_data if row[6].strip() == "2")
    female_orders = sum(1 for row in row_data if row[6].strip() == "1")
    # print the results to the console
    print("Gender Based Order Analysis")
    print(f"Female Orders: {female_orders}")
    print(f"Male Orders: {male_orders}")

In [606]:
data = read_data("../resources/sale_data.csv")

In [607]:
#data?
print_head(data)

Order ID: 1, Category ID: 4, Price: $15.06, Quantity: 7, Payment Method: 2, Age: 24, Gender: 1
Order ID: 2, Category ID: 1, Price: $15.98, Quantity: 17, Payment Method: 2, Age: 22, Gender: 1
Order ID: 3, Category ID: 4, Price: $15.76, Quantity: 3, Payment Method: 2, Age: 20, Gender: 2
Order ID: 4, Category ID: 1, Price: $6.04, Quantity: 14, Payment Method: 2, Age: 20, Gender: 1
Order ID: 5, Category ID: 1, Price: $20.66, Quantity: 9, Payment Method: 2, Age: 21, Gender: 1


In [608]:
key_metrics(data)

Key Sales Metrics
Total Revenue: $16395.78
Average Cost Per Order: $163.96


In [609]:
filter_data_by_price(data, 30)

Order ID and Items Priced > $30.00
Order ID: 8, Price: $31.78
Order ID: 18, Price: $33.79
Order ID: 20, Price: $32.27
Order ID: 21, Price: $31.97
Order ID: 29, Price: $34.33
Order ID: 39, Price: $34.00
Order ID: 41, Price: $34.66
Order ID: 48, Price: $30.79
Order ID: 49, Price: $30.86
Order ID: 56, Price: $34.96
Order ID: 66, Price: $31.49
Order ID: 76, Price: $34.04



In [610]:
gender_based_order(data)

Gender Based Order Analysis
Female Orders: 44
Male Orders: 55


In [611]:
row_data = [row.split(',') for row in data[1:]]
"""
lambda function that calculates the
percentage of orders made with a specific payment method
:param payment_method: takes in a string representing the payment method ("1" for credit card and "2" for digital wallet
:return: a float representing the percentage of orders made with the specified payment method
"""
payment_ratio = lambda payment_method: ((sum(1 for row in row_data if row[4].strip() == payment_method)) / len(row_data)) * 100


In [612]:
print("Payment Method Analysis")
print(f"Credit Card Orders: {payment_ratio('1'):.1f}%")
print(f"Digital Wallet Orders: {payment_ratio('2'):.1f}%")

Payment Method Analysis
Credit Card Orders: 43.0%
Digital Wallet Orders: 57.0%


In [613]:
# Define a structure to make it easier to store the values for popular categories
popular_category = {"id": 0, "revenue": 0.00, "percentage": 0.0 }

def analyze_category(price:float, quantity:int, category_id: int):
    """
    Calculates and prints the number of orders, total revenue, and percentage of revenue for a specific category.

    :param price: The total price of orders for the category.
    :param quantity: The total quantity of orders for the category.
    :param category_id: The unique identifier for the category.
    """
    # Get the total revenue for all the data points
    total_revenue = sum((float(row[2]) * int(row[3])) for row in row_data)
    # Calculate the percentage of revenue for the specific category
    revenue_percentage = (price/total_revenue) * 100
    # Update the popular category if the current category has more revenue percentage than the previous one
    if revenue_percentage > popular_category["percentage"]:
        popular_category["id"] = category_id
        popular_category["revenue"] = price
        popular_category["percentage"] = revenue_percentage
    # format the data for readability
    formatted_string = f"\tCategory {category_id}:\n\t\tNumber of Orders: {quantity}\n\t\tTotal Revenue: ${(price):.2f}\n\t\tPercentage of Revenue: {revenue_percentage:.2f}%\n"
    return formatted_string

# Write to the output file
with open("../output/category_analysis.txt", encoding="utf-8", mode="w") as f:
    f.write("Category Statistics\n")
    # for each category generate the statistics and add them to the output file
    for id in range(1, 7):
        # create a new list of data that belongs to the category by its id
        category_data = list(filter(lambda x: int(x[1]) == id, row_data))
        # calculate the total price and total quantity of orders made for the category
        total_price = sum(float(x[2]) * int(x[3]) for x in category_data)
        total_quantity_of_orders_made = len(category_data)
        # write the statistics to the output file and calculate the most commonly purchased category
        new_line = analyze_category(total_price, total_quantity_of_orders_made, id)
        f.write(new_line)
        f.write("\n")
    # Write the data for the most popular category
    f.write(f"""Most commonly purchased category: {popular_category["id"]}, Total revenue: ${popular_category["revenue"]:.2f}""")


In [614]:
def find_max_value(in_list: []):
    # returns the maximum value in a list using Python's built-in max function.'
    return max(in_list)

def find_min_value(in_list: []):
    # returns the minimum value in a list using Python's built-in min function.'
    return min(in_list)

def find_average(in_list: []):
    # returns the average value in a list using Python's built-in sum and len functions.'
    return sum(in_list) / len(in_list)

def run_facts(in_list:[], func_array):
    # applies a given list of functions to the input list and returns the results.
    results = [func(in_list) for func in func_array]
    return results

In [615]:
function_array = [find_max_value, find_min_value, find_average]
# creates the sample list by iterating over the price from the row_data
sample_data = [float(x[2]) for x in row_data]
# evaluates the sample data using the defined functions and prints the results.
function_evaluation = run_facts(sample_data, function_array)
print(f"Max = {function_evaluation[0]:.2f}, Min = {function_evaluation[1]:.2f}, Average = {function_evaluation[2]:.2f}")

Max = 34.96, Min = 2.28, Average = 17.07
