In [6]:
# Project: Optimized Employee Data Storage
# Description: Using NumPy Structured Arrays to mimic C-structs for memory-efficient data storage.

import numpy as np

# 1. Define Data Structure
# Creating a custom data type (dtype) that mimics a SQL table or CSV row
# U10: String (10 chars), i4: Integer (32-bit), f8: Float (64-bit)
employee_dtype = [("name", "U10"), ("age", "i4"), ("salary", "f8")]

# 2. Initialize Data
# Storing mixed data types in a single continuous block of memory (highly efficient)
employees = np.array([
    ('Alice', 25, 50000.0),
    ('Bob', 30, 60000.0),
    ('Carol', 22, 45000.0),
    ('Dave', 40, 82000.0)
], dtype=employee_dtype)

# 3. Data Analysis
print("--- Employee Roster ---")
print(employees)

# Extracting specific fields without loops (Vectorization)
average_salary = employees['salary'].mean()
print(f"\nAverage Salary: ${average_salary:,.2f}")

# Filtering: Who earns more than the average?
high_earners = employees[employees['salary'] > average_salary]
print("\n--- High Earners ---")
print(high_earners['name'])

# 4. Memory Optimization
# Demonstrating the small memory footprint of this structure
print(f"\nItem Size (bytes): {employees.itemsize}")

--- Employee Roster ---
[('Alice', 25, 50000.) ('Bob', 30, 60000.) ('Carol', 22, 45000.)
 ('Dave', 40, 82000.)]

Average Salary: $59,250.00

--- High Earners ---
['Bob' 'Dave']

Item Size (bytes): 52
