In [5]:
import time

def square_numbers_loop(numbers):
    result = []
    for num in numbers:
        result.append(num ** 2)
    return result

# Let's test this with 100,000 numbers to see the performance
test_numbers = list(range(1000000))

start_time = time.time()
squared_loop = square_numbers_loop(test_numbers)
loop_time = time.time() - start_time
print(f"Loop time: {loop_time:.4f} seconds")


Loop time: 0.0840 seconds


In [6]:
def square_numbers_comprehension(numbers):
    return [num ** 2 for num in numbers]  # Create the entire list in one line

start_time = time.time()
squared_comprehension = square_numbers_comprehension(test_numbers)
comprehension_time = time.time() - start_time
print(f"Comprehension time: {comprehension_time:.4f} seconds")
print(f"Improvement: {loop_time / comprehension_time:.2f}x faster")


Comprehension time: 0.0736 seconds
Improvement: 1.14x faster


In [7]:
def find_common_elements_list(list1, list2):
    common = []
    for item in list1:  # Go through each item in the first list
        if item in list2:  # Check if it exists in the second list
            common.append(item)  # If yes, add it to our common list
    return common

# Test with reasonably large lists
large_list1 = list(range(10000))
large_list2 = list(range(5000, 15000))

start_time = time.time()
common_list = find_common_elements_list(large_list1, large_list2)
list_time = time.time() - start_time
print(f"List approach time: {list_time:.4f} seconds")


List approach time: 0.8478 seconds


In [8]:
def find_common_elements_set(list1, list2):
    set2 = set(list2)  # Convert list to a set (one-time cost)
    return [item for item in list1 if item in set2]  # Check membership in set

start_time = time.time()
common_set = find_common_elements_set(large_list1, large_list2)
set_time = time.time() - start_time
print(f"Set approach time: {set_time:.4f} seconds")
print(f"Improvement: {list_time / set_time:.2f}x faster")


Set approach time: 0.0010 seconds
Improvement: 863.53x faster


In [9]:
def calculate_sum_manual(numbers):
    total = 0
    for num in numbers:
        total += num
    return total

def find_max_manual(numbers):
    max_val = numbers[0]
    for num in numbers[1:]:
        if num > max_val:
            max_val = num
    return max_val

test_numbers = list(range(1000000))

start_time = time.time()
manual_sum = calculate_sum_manual(test_numbers)
manual_max = find_max_manual(test_numbers)
manual_time = time.time() - start_time
print(f"Manual approach time: {manual_time:.4f} seconds")



Manual approach time: 0.0805 seconds


In [10]:
start_time = time.time()
builtin_sum = sum(test_numbers)
builtin_max = max(test_numbers)
builtin_time = time.time() - start_time
print(f"Built-in approach time: {builtin_time:.4f} seconds")
print(f"Improvement: {manual_time / builtin_time:.2f}x faster")


Built-in approach time: 0.0413 seconds
Improvement: 1.95x faster


In [13]:
def create_csv_plus(data):
    result = ""  # Start with an empty string
    for row in data:  # Go through each row of data
        for i, item in enumerate(row):  # Go through each item in the row
            result += str(item)  # Add the item to our result string
            if i < len(row) - 1:  # If it's not the last item
                result += ","     # Add a comma
        result += "\n"  # Add a newline after each row
    return result

# Test data: 1000 rows with 10 columns each
test_data = [[f"item_{i}_{j}" for j in range(10)] for i in range(1000)]

start_time = time.time()
csv_plus = create_csv_plus(test_data)
plus_time = time.time() - start_time
print(f"String concatenation time: {plus_time:.4f} seconds")


String concatenation time: 0.0043 seconds


In [14]:
def create_csv_join(data):
    # For each row, join the items with commas, then join all rows with newlines
    return "\n".join(",".join(str(item) for item in row) for row in data)

start_time = time.time()
csv_join = create_csv_join(test_data)
join_time = time.time() - start_time
print(f"Join method time: {join_time:.4f} seconds")
print(f"Improvement: {plus_time / join_time:.2f}x faster")


Join method time: 0.0022 seconds
Improvement: 1.94x faster


In [16]:
import sys

def process_large_dataset_list(n):
    processed_data = []
    for i in range(n):
        # Simulate some data processing
        processed_value = i ** 2 + i * 3 + 42
        processed_data.append(processed_value)  # Store each processed value
    return processed_data

# Test with 100,000 items
n = 100000
list_result = process_large_dataset_list(n)
list_memory = sys.getsizeof(list_result)
print(f"List memory usage: {list_memory:,} bytes")

List memory usage: 800,984 bytes


In [17]:
def process_large_dataset_generator(n):
    for i in range(n):
        # Simulate some data processing
        processed_value = i ** 2 + i * 3 + 42
        yield processed_value  # Yield each value instead of storing it

# Create the generator (this doesn't process anything yet!)
gen_result = process_large_dataset_generator(n)
gen_memory = sys.getsizeof(gen_result)
print(f"Generator memory usage: {gen_memory:,} bytes")
print(f"Memory improvement: {list_memory / gen_memory:.0f}x less memory")

# Now we can process items one at a time
total = 0
for value in process_large_dataset_generator(n):
    total += value
    # Each value is processed on-demand and can be garbage collected



Generator memory usage: 224 bytes
Memory improvement: 3576x less memory
