### Pipeline - Branching
(multiple Transforms to a single PCollections)
#### 1- Individual outputs:

In [3]:
import apache_beam as beam

with beam.Pipeline() as pipe:
    input_collection = (
        pipe
        | "Read file" >> beam.io.ReadFromText('data/dept_data.txt')
        | "Split" >> beam.Map(lambda record: record.split(','))
    )

    accounts_count = (
        input_collection
        | 'Get all Accounts dept persons' >> beam.Filter(lambda record: record[3] == 'Accounts')
        | 'Pair each accounts employee with 1' >> beam.Map(lambda record: ("Accounts, " +record[1], 1))
        | 'Group and sum Accounts' >> beam.CombinePerKey(sum)
        | 'Write results for account' >> beam.io.WriteToText('output/Account')
    )

    hr_count = (
        input_collection
        | 'Get all HR dept persons' >> beam.Filter(lambda record: record[3] == 'HR')
        | 'Pair each hr employee with 1' >> beam.Map(lambda record: ("HR, " +record[1], 1))
        | 'Group and sum HR' >> beam.CombinePerKey(sum)
        | 'Write results for hr' >> beam.io.WriteToText('output/HR')
    )
    
# visualize output
!{('head -n 20 output/Account-00000-of-00001')}

!{('head -n 20 output/HR-00000-of-00001')}



('Accounts, Marco', 31)
('Accounts, Rebekah', 31)
('Accounts, Itoe', 31)
('Accounts, Edouard', 31)
('Accounts, Kyle', 62)
('Accounts, Kumiko', 31)
('Accounts, Gaston', 31)
('Accounts, Ayumi', 30)
('HR, Beryl', 62)
('HR, Olga', 31)
('HR, Leslie', 31)
('HR, Mindy', 31)
('HR, Vicky', 31)
('HR, Richard', 31)
('HR, Kirk', 31)
('HR, Kaori', 31)
('HR, Oscar', 31)


#### 2.- Simgle output:

In [2]:
import apache_beam as beam

with beam.Pipeline() as pipe:

    input_collection = ( 
        pipe
        | "Read from text file" >> beam.io.ReadFromText('data/dept_data.txt')
        | "Split rows" >> beam.Map(lambda record: record.split(','))
    )

    accounts_count = (
        input_collection
        | 'Get all Accounts dept persons' >> beam.Filter(lambda record: record[3] == 'Accounts')
        | 'Pair each accounts employee with 1' >> beam.Map(lambda record: ("Accounts, " +record[1], 1))
        | 'Group and sum Accounts' >> beam.CombinePerKey(sum)
    )

    hr_count = (
        input_collection
        | 'Get all HR dept persons' >> beam.Filter(lambda record: record[3] == 'HR')
        | 'Pair each hr employee with 1' >> beam.Map(lambda record: ("HR, " +record[1], 1))
        | 'Group and sum HR' >> beam.CombinePerKey(sum)
    )

    output =(
        (accounts_count,hr_count)
        | beam.Flatten()
        | beam.io.WriteToText('output/both')
    )

# visualize output
!{('head -n 15 output/both-00000-of-00001')}

('Accounts, Marco', 31)
('Accounts, Rebekah', 31)
('Accounts, Itoe', 31)
('Accounts, Edouard', 31)
('Accounts, Kyle', 62)
('Accounts, Kumiko', 31)
('Accounts, Gaston', 31)
('Accounts, Ayumi', 30)
('HR, Beryl', 62)
('HR, Olga', 31)
('HR, Leslie', 31)
('HR, Mindy', 31)
('HR, Vicky', 31)
('HR, Richard', 31)
('HR, Kirk', 31)
