## BigQuery Write API
On this notebook we will try the bigquery write api, so we can be ready to send new data to our tables.

For that we created a .proto file that can be seen on the api/proto_message.proto address.

and ran the following command 

```protoc -I=. --python_out=. ./proto_message.proto```

This created a .py file that can be seen on the repo. We will now use that generated file to create and parse our messages sent to BigQuery

In [1]:
import datetime

In [3]:
import proto_message_pb2 as bqm

In [4]:
job = bqm.Job()
job.id = -1
job.job = "Not registered job"

In [5]:
department = bqm.Department()
department.id = -1
department.department = "Not registered department"

In [6]:
hired_employee = bqm.Hired_employee()
hired_employee.id = 2000
hired_employee.name = "Alejandro Rojas"
hired_employee.datetime = datetime.datetime(2023,2,20,19).strftime("%Y-%m-%d %H:%M:%S")
hired_employee.department_id = 5
hired_employee.job_id = 179

In [14]:
 datetime.datetime(2023,2,20,19,5,12,34).timestamp()

1676937912.000034

In [9]:
hired_employee.SerializeToString()

b'\x08\xd0\x0f\x12\x0fAlejandro Rojas\x1a\x132023-02-20 19:00:00 \x05(\xb3\x01'

Great! We've created our protobuf messages. 

Now, following a kind guide written by matthieucham from stax labs, we will try to use a wrapper for our BigQuery calls. Initially try the one suggested by him, if not then tweak it

https://dev.to/stack-labs/13-tricks-for-the-new-bigquery-storage-write-api-in-python-296e

In [10]:
"""Wrapper around BigQuery call."""
from __future__ import annotations
from typing import Any, Iterable
import logging
from google.cloud import bigquery_storage
from google.cloud.bigquery_storage_v1 import exceptions as bqstorage_exceptions

from google.cloud.bigquery_storage_v1 import types, writer
from google.protobuf import descriptor_pb2
from google.protobuf.descriptor import Descriptor



class DefaultStreamManager:  # pragma: no cover
    """Manage access to the _default stream write streams."""

    def __init__(
        self,
        table_path: str,
        message_protobuf_descriptor: Descriptor,
        bigquery_storage_write_client: bigquery_storage.BigQueryWriteClient,
    ):
        """Init."""
        self.stream_name = f"{table_path}/_default"
        self.message_protobuf_descriptor = message_protobuf_descriptor
        self.write_client = bigquery_storage_write_client
        self.append_rows_stream = None

    def _init_stream(self):
        """Init the underlying stream manager."""
        # Create a template with fields needed for the first request.
        request_template = types.AppendRowsRequest()
        # The initial request must contain the stream name.
        request_template.write_stream = self.stream_name
        # So that BigQuery knows how to parse the serialized_rows, generate a
        # protocol buffer representation of our message descriptor.
        proto_schema = types.ProtoSchema()
        proto_descriptor = descriptor_pb2.DescriptorProto()  # pylint: disable=no-member
        self.message_protobuf_descriptor.CopyToProto(proto_descriptor)
        proto_schema.proto_descriptor = proto_descriptor
        proto_data = types.AppendRowsRequest.ProtoData()
        proto_data.writer_schema = proto_schema
        request_template.proto_rows = proto_data
        # Create an AppendRowsStream using the request template created above.
        self.append_rows_stream = writer.AppendRowsStream(
            self.write_client, request_template
        )

    def send_appendrowsrequest(
        self, request: types.AppendRowsRequest
    ) -> writer.AppendRowsFuture:
        """Send request to the stream manager. Init the stream manager if needed."""
        try:
            if self.append_rows_stream is None:
                self._init_stream()
            return self.append_rows_stream.send(request)
        except bqstorage_exceptions.StreamClosedError:
            # the stream needs to be reinitialized
            self.append_rows_stream.close()
            self.append_rows_stream = None
            raise

    # Use as a context manager

    def __enter__(self) -> DefaultStreamManager:
        """Enter the context manager. Return the stream name."""
        self._init_stream()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """Exit the context manager : close the stream."""
        if self.append_rows_stream is not None:
            # Shutdown background threads and close the streaming connection.
            self.append_rows_stream.close()


class BigqueryWriteManager:
    """Encapsulation for bigquery client."""

    def __init__(
        self,
        project_id: str,
        dataset_id: str,
        table_id: str,
        bigquery_storage_write_client: bigquery_storage.BigQueryWriteClient,
        pb2_descriptor: Descriptor,
    ):  # pragma: no cover
        """Create a BigQueryManager."""
        self.bigquery_storage_write_client = bigquery_storage_write_client

        self.table_path = self.bigquery_storage_write_client.table_path(
            project_id, dataset_id, table_id
        )
        self.pb2_descriptor = pb2_descriptor

    def write_rows(self, pb_rows: Iterable[Any]) -> None:
        """Write data rows."""
        with DefaultStreamManager(
            self.table_path, self.pb2_descriptor, self.bigquery_storage_write_client
        ) as target_stream_manager:
            proto_rows = types.ProtoRows()
            # Create a batch of row data by appending proto2 serialized bytes to the
            # serialized_rows repeated field.
            for row in pb_rows:
                proto_rows.serialized_rows.append(row.SerializeToString())
            # Create an append row request containing the rows
            request = types.AppendRowsRequest()
            proto_data = types.AppendRowsRequest.ProtoData()
            proto_data.rows = proto_rows
            request.proto_rows = proto_data

            future = target_stream_manager.send_appendrowsrequest(request)

            # Wait for the append row requests to finish.
            future.result()
