In [2]:
import dask
import dask.array as da
import dask.dataframe as dd
import dask.bag as db
import dask.delayed as delayed
from dask.distributed import Client

# Global flag to track the success of tests
all_tests_successful = True

def test_dask_imports():
    global all_tests_successful
    if all_tests_successful:
        try:
            import dask
            import dask.array as da
            import dask.dataframe as dd
            import dask.bag as db
            import dask.delayed as delayed
            import dask.distributed
            print("Step 1: Importing dask modules passed.")
        except Exception as e:
            print(f"Step 1 failed: {str(e)}")
            all_tests_successful = False

def test_dask_array_operations():
    global all_tests_successful
    if all_tests_successful:
        try:
            x = da.random.random((1000, 1000), chunks=(100, 100))
            x_sum = x.sum().compute()
            x_mean = x.mean().compute()
            print(f"Step 2: Dask array operations passed (sum={x_sum}, mean={x_mean}).")
        except Exception as e:
            print(f"Step 2 failed: {str(e)}")
            all_tests_successful = False

def select_columns(df, columns=None):
    """Select specified columns from the DataFrame"""
    if columns is not None:
        return df[columns]
    return df

def test_dask_dataframe_operations():
    global all_tests_successful
    if all_tests_successful:
        try:
            df = dd.demo.make_timeseries(start='2000', end='2001', freq='1D', 
                                         partition_freq='1ME', seed=42)

            # Apply the function with columns keyword argument for proper column projection
            df = df.map_partitions(select_columns, columns=['x', 'y', 'id'])

            mean_x = df['x'].mean().compute()
            max_y = df['y'].max().compute()
            print(f"Step 3: Dask DataFrame operations passed (mean_x={mean_x}, max_y={max_y}).")
        except Exception as e:
            print(f"Step 3 failed: {str(e)}")
            all_tests_successful = False

def test_dask_bag_operations():
    global all_tests_successful
    if all_tests_successful:
        try:
            b = db.from_sequence(range(1000), npartitions=10)
            b_sum = b.sum().compute()
            b_mean = b.mean().compute()
            print(f"Step 4: Dask Bag operations passed (sum={b_sum}, mean={b_mean}).")
        except Exception as e:
            print(f"Step 4 failed: {str(e)}")
            all_tests_successful = False

def test_dask_delayed_operations():
    global all_tests_successful
    if all_tests_successful:
        try:
            @delayed
            def inc(x):
                return x + 1
            
            @delayed
            def add(x, y):
                return x + y
            
            x = inc(10)
            y = inc(20)
            total = add(x, y)
            total_result = total.compute()
            print(f"Step 5: Dask Delayed operations passed (result={total_result}).")
        except Exception as e:
            print(f"Step 5 failed: {str(e)}")
            all_tests_successful = False

def test_dask_distributed_operations():
    global all_tests_successful
    if all_tests_successful:
        try:
            client = Client()
            x = da.random.random((1000, 1000), chunks=(100, 100))
            x_sum = x.sum().compute()
            x_mean = x.mean().compute()
            client.close()
            print(f"Step 6: Dask Distributed operations passed (sum={x_sum}, mean={x_mean}).")
        except Exception as e:
            print(f"Step 6 failed: {str(e)}")
            all_tests_successful = False

def run_tests():
    test_dask_imports()
    test_dask_array_operations()
    test_dask_dataframe_operations()
    test_dask_bag_operations()
    test_dask_delayed_operations()
    test_dask_distributed_operations()
    if all_tests_successful:
        print("All tests for the 'dask' package completed successfully.")
    else:
        print("Some steps failed. Please check the messages above for details.")

if __name__ == "__main__":
    run_tests()


Step 1: Importing dask modules passed.
Step 2: Dask array operations passed (sum=499618.53905399126, mean=0.4996185390539913).
Step 3: Dask DataFrame operations passed (mean_x=0.02196081431020074, max_y=0.9957366905300404).




Step 4: Dask Bag operations passed (sum=499500, mean=499.5).
Step 5: Dask Delayed operations passed (result=32).
Step 6: Dask Distributed operations passed (sum=500012.5572649956, mean=0.5000125572649956).
All tests for the 'dask' package completed successfully.
