In [None]:
import numpy as np
import pandas as pd
import time as time

# Sample Data: Simulating a dataset of action movies from the 1990s
data = {
    "title": ["Movie A", "Movie B", "Movie C", "Movie D", "Movie E"],
    "duration": [85, 120, 95, 88, 75],  # Movie durations in minutes
    "rating": [7.5, 8.2, 6.9, 7.8, 5.4]
}

action_movies_1990s = pd.DataFrame(data)

# --- 1. Inefficient approach using iterrows() ---
start_time = time.time()
short_movie_count = 0
for label, row in action_movies_1990s.iterrows():
    if row["duration"] < 90:
        short_movie_count += 1
end_time = time.time()
loop_time = end_time - start_time

print(f"Short movies count (iterrows approach): {short_movie_count}")
print(f"Loop execution time: {loop_time:.6f} seconds")

# --- 2. Efficient vectorized approach ---
start_time = time.time()
short_movie_count_vectorized = (action_movies_1990s["duration"] < 90).sum()
end_time = time.time()
vectorized_time = end_time - start_time

print(f"Short movies count (vectorized approach): {short_movie_count_vectorized}")
print(f"Vectorized execution time: {vectorized_time:.6f} seconds")

# --- 3. Using apply() to categorize movies efficiently ---
action_movies_1990s["category"] = action_movies_1990s["duration"].apply(lambda x: "Short" if x < 90 else "Long")
print("\nCategorized DataFrame:")
print(action_movies_1990s)

# --- 4. Using NumPy for conditional operations ---
action_movies_1990s["fast_watch"] = np.where(action_movies_1990s["duration"] < 90, True, False)
print("\nDataFrame with NumPy fast_watch column:")
print(action_movies_1990s)

# --- 5. Performance comparison summary ---
print("\nPerformance Summary:")
print(f"Loop execution time: {loop_time:.6f} seconds")
print(f"Vectorized execution time: {vectorized_time:.6f} seconds")