diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/client.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/client.py index 778351012f06b..673274876854f 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/client.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/client.py @@ -22,9 +22,13 @@ SOFTWARE. """ +from typing import Dict, List + import backoff -from apiclient import discovery, errors -from requests.status_codes import codes as status_codes +from googleapiclient import errors +from requests import codes as status_codes + +from .helpers import SCOPES, Helpers def error_handler(error): @@ -32,22 +36,21 @@ def error_handler(error): class GoogleSheetsClient: - @staticmethod + def __init__(self, credentials: Dict[str, str], scopes: List[str] = SCOPES): + self.client = Helpers.get_authenticated_sheets_client(credentials, scopes) + @backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler) - def get(client: discovery.Resource, **kwargs): - return client.get(**kwargs).execute() + def get(self, **kwargs): + return self.client.get(**kwargs).execute() - @staticmethod @backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler) - def create(client: discovery.Resource, **kwargs): - return client.create(**kwargs).execute() + def create(self, **kwargs): + return self.client.create(**kwargs).execute() - @staticmethod @backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler) - def get_values(client: discovery.Resource, **kwargs): - return client.values().batchGet(**kwargs).execute() + def get_values(self, **kwargs): + return self.client.values().batchGet(**kwargs).execute() - @staticmethod @backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler) - def update_values(client: discovery.Resource, **kwargs): - return client.values().batchUpdate(**kwargs).execute() + def update_values(self, **kwargs): + return self.client.values().batchUpdate(**kwargs).execute() diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py index e75c186ab4bf9..c6322f78012c0 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py @@ -48,11 +48,11 @@ def __init__(self): def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. - client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"])) + client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: # Attempt to get first row of sheet - GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") + client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") except errors.HttpError as err: reason = str(err) # Give a clearer message if it's a common error like 404. @@ -64,13 +64,11 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: return AirbyteConnectionStatus(status=Status.SUCCEEDED) def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: - client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"])) + client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") - spreadsheet_metadata = Spreadsheet.parse_obj( - GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False) - ) + spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) sheet_names = [sheet.properties.title for sheet in spreadsheet_metadata.sheets] streams = [] for sheet_name in sheet_names: @@ -88,7 +86,7 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: - client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"])) + client = GoogleSheetsClient(json.loads(config["credentials_json"])) sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog) spreadsheet_id = config["spreadsheet_id"] @@ -106,7 +104,7 @@ def read( range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}" logger.info(f"Fetching range {range}") row_batch = SpreadsheetValues.parse_obj( - GoogleSheetsClient.get_values(client, spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS") + client.get_values(spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS") ) row_cursor += ROW_BATCH_SIZE + 1 # there should always be one range since we requested only one diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py index 70fdb74a5914e..3a00d6010708e 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py @@ -27,10 +27,9 @@ from typing import Dict, FrozenSet, Iterable, List from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog -from apiclient import discovery from google.oauth2 import service_account +from googleapiclient import discovery -from .client import GoogleSheetsClient from .models.spreadsheet import RowData, Spreadsheet SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly", "https://www.googleapis.com/auth/drive.readonly"] @@ -38,17 +37,17 @@ class Helpers(object): @staticmethod - def get_authenticated_sheets_client(credentials: Dict[str, str], scopes=SCOPES) -> discovery.Resource: + def get_authenticated_sheets_client(credentials: Dict[str, str], scopes: List[str] = SCOPES) -> discovery.Resource: creds = Helpers.get_authenticated_google_credentials(credentials, scopes) return discovery.build("sheets", "v4", credentials=creds).spreadsheets() @staticmethod - def get_authenticated_drive_client(credentials: Dict[str, str], scopes=SCOPES) -> discovery.Resource: + def get_authenticated_drive_client(credentials: Dict[str, str], scopes: List[str] = SCOPES) -> discovery.Resource: creds = Helpers.get_authenticated_google_credentials(credentials, scopes) return discovery.build("drive", "v3", credentials=creds) @staticmethod - def get_authenticated_google_credentials(credentials: Dict[str, str], scopes=SCOPES): + def get_authenticated_google_credentials(credentials: Dict[str, str], scopes: List[str] = SCOPES): return service_account.Credentials.from_service_account_info(credentials, scopes=scopes) @staticmethod @@ -87,10 +86,8 @@ def get_formatted_row_values(row_data: RowData) -> List[str]: return [value.formattedValue for value in row_data.values] @staticmethod - def get_first_row(client: discovery.Resource, spreadsheet_id: str, sheet_name: str) -> List[str]: - spreadsheet = Spreadsheet.parse_obj( - GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet_name}!1:1") - ) + def get_first_row(client, spreadsheet_id: str, sheet_name: str) -> List[str]: + spreadsheet = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet_name}!1:1")) # There is only one sheet since we are specifying the sheet in the requested ranges. returned_sheets = spreadsheet.sheets @@ -134,7 +131,7 @@ def row_data_to_record_message(sheet_name: str, cell_values: List[str], column_i @staticmethod def get_available_sheets_to_column_index_to_name( - client: discovery.Resource, spreadsheet_id: str, requested_sheets_and_columns: Dict[str, FrozenSet[str]] + client, spreadsheet_id: str, requested_sheets_and_columns: Dict[str, FrozenSet[str]] ) -> Dict[str, Dict[int, str]]: available_sheets = Helpers.get_sheets_in_spreadsheet(client, spreadsheet_id) @@ -151,8 +148,8 @@ def get_available_sheets_to_column_index_to_name( return available_sheets_to_column_index_to_name @staticmethod - def get_sheets_in_spreadsheet(client: discovery.Resource, spreadsheet_id: str): - spreadsheet_metadata = Spreadsheet.parse_obj(GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False)) + def get_sheets_in_spreadsheet(client, spreadsheet_id: str): + spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) return [sheet.properties.title for sheet in spreadsheet_metadata.sheets] @staticmethod diff --git a/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py index 6e007fea472f2..8abc030e0b1f9 100644 --- a/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py +++ b/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py @@ -29,7 +29,6 @@ from typing import Dict from airbyte_protocol import ConfiguredAirbyteCatalog, ConnectorSpecification -from apiclient import discovery from base_python_test import StandardSourceTestIface from google_sheets_source.client import GoogleSheetsClient from google_sheets_source.helpers import Helpers @@ -62,7 +61,7 @@ def get_catalog(self) -> ConfiguredAirbyteCatalog: def setup(self) -> None: Path(self._get_tmp_dir()).mkdir(parents=True, exist_ok=True) - sheets_client = Helpers.get_authenticated_sheets_client(self._get_creds(), SCOPES) + sheets_client = GoogleSheetsClient(self._get_creds(), SCOPES) spreadsheet_id = self._create_spreadsheet(sheets_client) self._write_spreadsheet_id(spreadsheet_id) @@ -90,7 +89,7 @@ def _get_creds(self) -> Dict[str, str]: def _get_tmp_dir(): return "/test_root/gsheet_test" - def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str: + def _create_spreadsheet(self, sheets_client: GoogleSheetsClient) -> str: """ :return: spreadsheetId """ @@ -99,7 +98,7 @@ def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str: "sheets": [{"properties": {"title": "sheet1"}}, {"properties": {"title": "sheet2"}}], } - spreadsheet = Spreadsheet.parse_obj(GoogleSheetsClient.create(sheets_client, body=request)) + spreadsheet = Spreadsheet.parse_obj(sheets_client.create(body=request)) spreadsheet_id = spreadsheet.spreadsheetId rows = [["header1", "irrelevant", "header3", "", "ignored"]] @@ -109,13 +108,11 @@ def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str: rows.append(["", "", ""]) rows.append(["orphan1", "orphan2", "orphan3"]) - GoogleSheetsClient.update_values( - sheets_client, + sheets_client.update_values( spreadsheetId=spreadsheet_id, body={"data": {"majorDimension": "ROWS", "values": rows, "range": "sheet1"}, "valueInputOption": "RAW"}, ) - GoogleSheetsClient.update_values( - sheets_client, + sheets_client.update_values( spreadsheetId=spreadsheet_id, body={"data": {"majorDimension": "ROWS", "values": rows, "range": "sheet2"}, "valueInputOption": "RAW"}, ) diff --git a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py index 47137b89e1bf1..88f773b1783a3 100644 --- a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py +++ b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py @@ -23,9 +23,10 @@ """ import unittest -from unittest.mock import Mock +from unittest.mock import Mock, patch from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream +from google_sheets_source.client import GoogleSheetsClient from google_sheets_source.helpers import Helpers from google_sheets_source.models import CellData, GridData, RowData, Sheet, SheetProperties, Spreadsheet @@ -142,8 +143,10 @@ def test_get_first_row(self): client = Mock() client.get.return_value.execute.return_value = fake_response - - actual = Helpers.get_first_row(client, spreadsheet_id, sheet) + with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None): + sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"]) + sheet_client.client = client + actual = Helpers.get_first_row(sheet_client, spreadsheet_id, sheet) self.assertEqual(expected_first_row, actual) client.get.assert_called_with(spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet}!1:1") @@ -154,8 +157,10 @@ def test_get_sheets_in_spreadsheet(self): client.get.return_value.execute.return_value = Spreadsheet( spreadsheetId=spreadsheet_id, sheets=[Sheet(properties=SheetProperties(title=t)) for t in expected_sheets] ) - - actual_sheets = Helpers.get_sheets_in_spreadsheet(client, spreadsheet_id) + with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None): + sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"]) + sheet_client.client = client + actual_sheets = Helpers.get_sheets_in_spreadsheet(sheet_client, spreadsheet_id) self.assertEqual(expected_sheets, actual_sheets) client.get.assert_called_with(spreadsheetId=spreadsheet_id, includeGridData=False) @@ -186,9 +191,11 @@ def mock_client_call(spreadsheetId, includeGridData, ranges=None): client = Mock() client.get.side_effect = mock_client_call - + with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None): + sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"]) + sheet_client.client = client actual = Helpers.get_available_sheets_to_column_index_to_name( - client, spreadsheet_id, {sheet1: frozenset(sheet1_first_row), "doesnotexist": frozenset(["1", "2"])} + sheet_client, spreadsheet_id, {sheet1: frozenset(sheet1_first_row), "doesnotexist": frozenset(["1", "2"])} ) expected = {sheet1: {0: "1", 1: "2", 2: "3", 3: "4"}}