# Imports

In [4]:
import multiprocessing
import random
import time
import threading

# Functions

In [5]:
def generate_and_add_numbers(n: int = 1000):
    total = 0
    for i in range(n):
        total += random.randint(0,1000000)
    return total


def generate_and_join_letters(n: int = 1000):
    letters = ''
    for i in range(n):
        letters += chr(random.randint(33, 126))
    return letters

# Sequential Execution

In [6]:
print("Starting the Program")
total_start_time = time.time()

generate_and_add_numbers(int(1e7))
generate_and_join_letters(int(1e7))

total_end_time = time.time()
print("Exiting the Program")
sequential_execution_time = total_end_time - total_start_time
print(f"It took {sequential_execution_time}s to execute the tasks.")

Starting the Program
Exiting the Program
It took 6.6638617515563965s to execute the tasks.


# Threads execution

In [7]:
print("Starting the  thread program")
total_start_time = time.time()

thread_numbers = threading.Thread(target=generate_and_add_numbers, args=[int(1e7)])
thread_letters = threading.Thread(target=generate_and_join_letters, args=[int(1e7)])

thread_numbers.start()
thread_letters.start()

thread_numbers.join()
thread_letters.join()

total_end_time = time.time()
print("Exiting the thread program")
thread_execution_time = total_end_time - total_start_time
print(f"It took {thread_execution_time}s to execute the tasks with thread.")

Starting the  thread program
Exiting the thread program
It took 6.890458345413208s to execute the tasks with thread.


# Trials with the same threads

In [8]:
print("Starting the two threads program for generate numbers")
total_start_time = time.time()

thread_numbers = threading.Thread(target=generate_and_add_numbers, args=[int(1e7)])
thread_letters = threading.Thread(target=generate_and_add_numbers, args=[int(1e7)])

thread_numbers.start()
thread_letters.start()

thread_numbers.join()
thread_letters.join()

total_end_time = time.time()
print("Exiting two threads program for generate numbers")
execution_time = total_end_time - total_start_time
print(f"It took {execution_time}s to execute the tasks with threads.")

Starting the two threads program for generate numbers
Exiting two threads program for generate numbers
It took 6.923914670944214s to execute the tasks with threads.


In [9]:
print("Starting the two threads program for generate letters")
total_start_time = time.time()

thread_numbers = threading.Thread(target=generate_and_add_numbers, args=[int(1e7)])
thread_letters = threading.Thread(target=generate_and_add_numbers, args=[int(1e7)])

thread_numbers.start()
thread_letters.start()

thread_numbers.join()
thread_letters.join()

total_end_time = time.time()
print("Exiting two threads program for generate letters")
execution_time = total_end_time - total_start_time
print(f"It took {execution_time}s to execute the tasks with threads.")

Starting the two threads program for generate letters
Exiting two threads program for generate letters
It took 7.013241529464722s to execute the tasks with threads.


# Trials With processes

In [10]:
print("Starting the two processs program for generate letters")
total_start_time = time.time()

process_numbers = multiprocessing.Process(target=generate_and_add_numbers, args=[int(1e7)])
process_letters = multiprocessing.Process(target=generate_and_add_numbers, args=[int(1e7)])

process_numbers.start()
process_letters.start()

process_numbers.join()
process_letters.join()

total_end_time = time.time()
print("Exiting two processs program for generate letters")
process_execution_time = total_end_time - total_start_time
print(f"It took {process_execution_time}s to execute the tasks with processs.")

Starting the two processs program for generate letters
Exiting two processs program for generate letters
It took 3.556506395339966s to execute the tasks with processs.


# Interpretations
- Computing the speedups

In [11]:
speedup_thread = sequential_execution_time/thread_execution_time
speedup_processes = sequential_execution_time/process_execution_time

print(f"The speedup using threads is {speedup_thread}")
print(f"The speedup using processes is {speedup_processes}")

The speedup using threads is 0.9671144381842681
The speedup using processes is 1.8737100431726903


- Computing the Efficiency 

In [12]:
np = 4
efficiency_thread = speedup_thread/np
efficiency_processes = speedup_processes/np

print(f"The efficiency using threads is {efficiency_thread}")
print(f"The efficiency using processes is {efficiency_processes}")

The efficiency using threads is 0.24177860954606703
The efficiency using processes is 0.4684275107931726


- Amdhal and Gustafson Laws

In [15]:
P = 0.88
amdhal_s=1/((1-P) + (P/np))
print(amdhal_s)

2.9411764705882355


In [17]:
alpha = 1-0.88
gustafson= np + alpha*(1-np)
print(gustafson)

3.64


# Conclusions

### **Key Takeaways:**

- **Threads**: Threads did not provide much benefit in this case. The speedup was close to 1, and efficiency was low. This suggests that threading may not be the best approach for this particular task or that the overhead of managing threads was high. Threading in Python, especially with the Global Interpreter Lock (GIL), might not always give optimal performance for CPU-bound tasks.

- **Processes**: Processes performed better with a speedup of **1.87** and efficiency of **0.468**, indicating that parallelizing with separate processes (which run in separate memory spaces) can be more effective for tasks that are CPU-bound. This aligns with the observed performance improvement.

- **Amdhal's Law**: The speedup observed using processes is far below the theoretical maximum suggested by Amdhal’s Law (**2.94**), indicating that there are inefficiencies in parallelization. It suggests that further improvements could be made to achieve a speedup closer to the theoretical maximum by reducing overhead or improving parallel workload distribution.

- **Gustafson's Law**: The speedup using Gustafson's Law (**3.64**) suggests that for larger problem sizes, parallelism could lead to even more significant improvements. This indicates that parallelization is more effective for large-scale problems where the workload is substantial enough to benefit from additional processors.

### **Conclusion:**

- **Parallelization with Processes** is more effective than with threads for this specific task, likely due to lower overhead and better CPU utilization with processes.
- **Threading may not always scale well** for CPU-bound tasks in Python, especially when dealing with Python's Global Interpreter Lock (GIL).
- **Amdhal’s Law** provides an upper bound on speedup, which may not always be achievable in real-world scenarios, especially with high overheads or imperfect parallelism.
- **Gustafson’s Law** is more optimistic, showing that parallelization could scale more effectively as the problem size grows, making it more suitable for larger workloads.

Therefore, if you're dealing with larger datasets or more complex tasks, parallelizing with processes is likely to yield better performance. For smaller tasks or workloads, the overhead of parallelism might outweigh the benefits.