### Running a "local" batch job

In [None]:
import apache_beam as beam
import csv


with beam.Pipeline('DirectRunner') as pipeline:

  airports = (pipeline
     | beam.io.ReadFromText('gs://echan-dataflow/airports.csv.gz')
     | beam.Map(lambda line: next(csv.reader([line])))
     | beam.Map(lambda fields: (fields[0], fields[21], fields[26]))
  )

  (airports 
     | beam.Map(lambda airport_data: f'{airport_data[0]},{airport_data[1]},{airport_data[2]}')
     | beam.io.WriteToText('gs://echan-dataflow/extracted_airports')
  )

  pipeline.run()

### Launching batch job to Google Dataflow

In [None]:
from apache_beam.options import pipeline_options
from apache_beam.options.pipeline_options import GoogleCloudOptions
from apache_beam.runners import DataflowRunner
import google.auth
# import csv


options = pipeline_options.PipelineOptions(flags=[])

_, options.view_as(GoogleCloudOptions).project = google.auth.default()
options.view_as(GoogleCloudOptions).job_name = 'my-first-dataflow'
options.view_as(GoogleCloudOptions).region = 'us-west1'
options.view_as(GoogleCloudOptions).staging_location = 'gs://echan-dataflow/staging'
options.view_as(GoogleCloudOptions).temp_location = 'gs://echan-dataflow/temp'
# options.view_as(SetupOptions).save_main_session=True
options.view_as(pipeline_options.SetupOptions).sdk_location = (
    f'/root/apache-beam-custom/packages/beam/sdks/python/dist/apache-beam-{beam.version.__version__}0.tar.gz' )


with beam.Pipeline(options=options) as pipeline:

  airports = (pipeline
     | beam.io.ReadFromText('gs://echan-dataflow/airports.csv.gz')
     | beam.Map(lambda line: line.strip().split(","))
     | beam.Map(lambda fields: (fields[0], fields[-12], fields[-7]))
  )

  (airports 
     | beam.Map(lambda airport_data: f'{airport_data[0]},{airport_data[1]},{airport_data[2]}')
     | beam.io.WriteToText('gs://echan-dataflow/extracted_airports_cloud')
  )

  DataflowRunner().run_pipeline(pipeline, options=options)