In [2]:
import dask.bag as db

nested_containers = [
  [0, 1, 2, 3],
  {},
  [6.5, 3.14],
  'Python',
  {'version':3},
  ''
]

dask_bag = db.from_sequence(nested_containers)
dask_bag

dask.bag<from_se..., npartitions=6>

In [4]:
dask_bag.count().compute()

6

In [6]:
dask_bag.any().compute(), dask_bag.all().compute()

(True, False)

In [9]:
zen = db.read_text('the-zen-of-python.txt')
print(zen.count().compute())

21


In [10]:
taken = zen.take(1)
type(taken)
taken

('The Zen of Python, by Tim Peters\n',)

In [11]:
zen.take(3)

('The Zen of Python, by Tim Peters\n',
 '\n',
 'Beautiful is better than ugly.\n')

In [17]:
import glob

txt_files = glob.glob('dask_*.py')
txt_files

['dask_delayed.py',
 'dask_aggregating_delayed.py',
 'dask_multi_arrays.py',
 'dask_chunking_arrays2.py',
 'dask_analyzing_weather_data.py',
 'dask_timing_computations_hdi5.py',
 'dask_nyc_taxi_rides.py',
 'dask_timing_dataframes.py',
 'dask_dataframes.py']

In [19]:
# Functional approaches with dask bags

def squared(x):
    return x ** 2
squares = map(squared, [1, 2, 3, 4, 5, 6])
squares

<map at 0x11149c160>

In [20]:
squares = list(squares)
squares

[1, 4, 9, 16, 25, 36]

In [21]:
def is_even(x):
    return x % 2 == 0
evens = filter(is_even, [1, 2, 3, 4, 5, 6])
list(evens)

[2, 4, 6]

In [23]:
even_squares = filter(is_even, squares)
list(even_squares)

[4, 16, 36]

In [24]:
# With dask bags
import dask.bag as db

numbers = db.from_sequence([1, 2, 3, 4, 5, 6])
squares = numbers.map(squared)
squares

dask.bag<map-squ..., npartitions=6>

In [25]:
# The computed result is a list, not a dask bag, so it has to be
# able to fit in memory.
result = squares.compute()
result

[1, 4, 9, 16, 25, 36]

In [26]:
numbers = db.from_sequence([1, 2, 3, 4, 5, 6])

evens = numbers.filter(is_even)
evens.compute()

[2, 4, 6]

In [27]:
even_squares = numbers.map(squared).filter(is_even)
even_squares.compute()

[4, 16, 36]

In [28]:
zen = db.read_text('the-zen-of-python.txt')
uppercase = zen.str.upper()
uppercase.take(1)

('THE ZEN OF PYTHON, BY TIM PETERS\n',)

In [29]:
# map can also be used, but it's less efficient
def my_upper(string):
    return string.upper()

my_uppercase = zen.map(my_upper)
my_uppercase.take(1)

('THE ZEN OF PYTHON, BY TIM PETERS\n',)