In [1]:
import pandas as pd

import apache_beam as beam
import apache_beam.runners.interactive.interactive_beam as ib
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
from apache_beam.options import pipeline_options
from apache_beam.options.pipeline_options import GoogleCloudOptions
from apache_beam.io.gcp.internal.clients import bigquery
import google.auth

In [2]:
def show_data(readable_file):
    print(readable_file)

In [8]:
def read_table_BQ(query):
    with beam.Pipeline(RUNNER, options = opts) as p:
        (p 
            | f'Reading bq' >> beam.io.Read(beam.io.BigQuerySource(query=query, use_standard_sql=True))
            | 'show data' >> beam.FlatMap(show_data)
        )

In [9]:
bucket_name = "fk_dotz_bucket"

PROJECT='teste-dotz-292803' 
dataset = 'dotz'
REGION='us-east1-b'

options = {
      'project': PROJECT,
      'region': REGION,
      'teardown_policy': 'TEARDOWN_ALWAYS',
      'no_save_main_session': True,
  }
    
RUNNER = 'DirectRunner' #DataflowRunner 
opts = beam.pipeline.PipelineOptions(flags = [], **options)



In [10]:
query = '''
    WITH ref as (
      SELECT * FROM `teste-dotz-292803.dotz.bill_of_materials`
    ), table_union as (
      SELECT tube_assembly_id, component_id_8 component_id, quantity_8 quantity FROM ref WHERE component_id_8 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_7, quantity_7 FROM ref WHERE component_id_7 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_6, quantity_6 FROM ref WHERE component_id_6 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_5, quantity_5 FROM ref WHERE component_id_5 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_4, quantity_4 FROM ref WHERE component_id_4 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_3, quantity_3 FROM ref WHERE component_id_3 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_2, quantity_2 FROM ref WHERE component_id_2 IS NOT NULL
      UNION ALL
      SELECT tube_assembly_id, component_id_1, quantity_1 FROM ref 
    )
    SELECT DISTINCT * FROM table_union  order by tube_assembly_id desc limit 10
'''
read_table_BQ(query)



{'tube_assembly_id': 'TA-21199', 'component_id': None, 'quantity': None}
{'tube_assembly_id': 'TA-21198', 'component_id': None, 'quantity': None}
{'tube_assembly_id': 'TA-21197', 'component_id': 'C-1733', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21196', 'component_id': 'C-1364', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21195', 'component_id': 'C-1373', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21194', 'component_id': 'C-1850', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21194', 'component_id': 'C-1420', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21194', 'component_id': 'C-1421', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21193', 'component_id': 'C-1421', 'quantity': 1.0}
{'tube_assembly_id': 'TA-21193', 'component_id': 'C-1850', 'quantity': 1.0}


In [13]:
query = '''
     SELECT distinct tube_assembly_id FROM `teste-dotz-292803.dotz.bill_of_materials` limit 10
'''
read_table_BQ(query)



{'tube_assembly_id': 'TA-00064'}
{'tube_assembly_id': 'TA-00106'}
{'tube_assembly_id': 'TA-00019'}
{'tube_assembly_id': 'TA-00044'}
{'tube_assembly_id': 'TA-00070'}
{'tube_assembly_id': 'TA-00140'}
{'tube_assembly_id': 'TA-00137'}
{'tube_assembly_id': 'TA-00101'}
{'tube_assembly_id': 'TA-00139'}
{'tube_assembly_id': 'TA-00074'}


In [15]:
query = '''
    SELECT ROW_NUMBER() OVER() quotation_id, tube_assembly_id, supplier, PARSE_DATE('%Y-%m-%d', quote_date) quote_date, 
        annual_usage, min_order_quantity, 
        case when bracket_pricing = 'Yes' then
          1
          else
          0
        end bracket_pricing,
        quantity, cost 
    FROM `teste-dotz-292803.dotz.price_quote`  limit 10
'''
read_table_BQ(query)



{'quotation_id': 1, 'tube_assembly_id': 'TA-04788', 'supplier': 'S-0026', 'quote_date': '1982-09-22', 'annual_usage': 5, 'min_order_quantity': 0, 'bracket_pricing': 1, 'quantity': 5, 'cost': 6.74649321455796}
{'quotation_id': 9, 'tube_assembly_id': 'TA-00393', 'supplier': 'S-0066', 'quote_date': '1995-04-12', 'annual_usage': 0, 'min_order_quantity': 0, 'bracket_pricing': 1, 'quantity': 5, 'cost': 78.26210334792991}
{'quotation_id': 5, 'tube_assembly_id': 'TA-04788', 'supplier': 'S-0026', 'quote_date': '1982-09-22', 'annual_usage': 5, 'min_order_quantity': 0, 'bracket_pricing': 1, 'quantity': 150, 'cost': 1.1990674538039099}
{'quotation_id': 2, 'tube_assembly_id': 'TA-04788', 'supplier': 'S-0026', 'quote_date': '1982-09-22', 'annual_usage': 5, 'min_order_quantity': 0, 'bracket_pricing': 1, 'quantity': 25, 'cost': 2.1171469427953}
{'quotation_id': 8, 'tube_assembly_id': 'TA-00393', 'supplier': 'S-0066', 'quote_date': '1995-04-12', 'annual_usage': 0, 'min_order_quantity': 0, 'bracket_pric

In [16]:
query = '''
    SELECT component_id, component_type_id, type, connection_type_id, outside_shape, base_type, height_over_tube, bolt_pattern_long,
            case when groove = 'Yes' then
              1
              else
              0
            end groove, 
            base_diameter, shoulder_diameter,  
            case when unique_feature = 'Yes' then
              1
              else
              0
            end unique_feature,  
            case when orientation = 'Yes' then
              1
              else
              0
            end orientation,
            ROUND(weight,3) weight
    FROM `teste-dotz-292803.dotz.comp_boss`  limit 10
'''
read_table_BQ(query)



{'component_id': 'C-0513', 'component_type_id': 'CP-019', 'type': None, 'connection_type_id': '9999', 'outside_shape': None, 'base_type': None, 'height_over_tube': 35.0, 'bolt_pattern_long': 77.77, 'groove': 0, 'base_diameter': None, 'shoulder_diameter': None, 'unique_feature': 0, 'orientation': 1, 'weight': 2.14}
{'component_id': 'C-0161', 'component_type_id': 'CP-020', 'type': None, 'connection_type_id': '9999', 'outside_shape': None, 'base_type': None, 'height_over_tube': 19.2, 'bolt_pattern_long': 76.2, 'groove': 0, 'base_diameter': None, 'shoulder_diameter': None, 'unique_feature': 0, 'orientation': 1, 'weight': 0.559}
{'component_id': 'C-0899', 'component_type_id': 'CP-020', 'type': None, 'connection_type_id': '9999', 'outside_shape': None, 'base_type': None, 'height_over_tube': 8.3, 'bolt_pattern_long': 76.2, 'groove': 0, 'base_diameter': None, 'shoulder_diameter': None, 'unique_feature': 0, 'orientation': 1, 'weight': 0.426}
{'component_id': 'C-0780', 'component_type_id': 'CP-0